| distributed init (rank 0): env://, gpu 0
| distributed init (rank 3): env://, gpu 3
| distributed init (rank 2): env://, gpu 2
| distributed init (rank 1): env://, gpu 1
Namespace(batch_size=128, epochs=300, update_freq=4, model='base', drop_path=0, input_size=256, layer_scale_init_value=1e-06, model_ema=False, model_ema_decay=0.9999, model_ema_force_cpu=False, model_ema_eval=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=5.0, momentum=0.9, weight_decay=0.05, weight_decay_end=None, lr=0.004, layer_decay=1.0, min_lr=1e-06, warmup_epochs=20, warmup_steps=-1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', crop_pct=None, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', head_init_scale=1.0, model_key='model|module', model_prefix='', data_path='/dev/shm/imagenet', eval_data_path=None, nb_classes=1000, imagenet_default_mean_and_std=True, data_set='IMNET', output_dir='./checkpoint_base_256_11.4G', log_dir=None, device='cuda', seed=0, resume='', auto_resume=True, save_ckpt=True, save_ckpt_freq=1, save_ckpt_num=3, start_epoch=0, eval=False, dist_eval=True, disable_eval=False, num_workers=10, pin_mem=True, world_size=4, local_rank=-1, dist_on_itp=False, dist_url='env://', use_amp=True, enable_wandb=False, project='convnext', wandb_ckpt=False, rank=0, gpu=0, distributed=True, dist_backend='nccl')
Transform = 
RandomResizedCropAndInterpolation(size=(256, 256), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=bicubic)
RandomHorizontalFlip(p=0.5)
RandAugment(n=2, ops=
	AugmentOp(name=AutoContrast, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=Equalize, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=Invert, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=Rotate, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=PosterizeIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=SolarizeIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=SolarizeAdd, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=ColorIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=ContrastIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=BrightnessIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=SharpnessIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=ShearX, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=ShearY, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=TranslateXRel, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=TranslateYRel, p=0.5, m=9, mstd=0.5))
ToTensor()
Normalize(mean=tensor([0.4850, 0.4560, 0.4060]), std=tensor([0.2290, 0.2240, 0.2250]))
RandomErasing(p=0.25, mode=pixel, count=(1, 1))
---------------------------
reading from datapath /dev/shm/imagenet
Number of the class = 1000
Transform = 
Resize(size=292, interpolation=bicubic, max_size=None, antialias=True)
CenterCrop(size=(256, 256))
ToTensor()
Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
---------------------------
reading from datapath /dev/shm/imagenet
Number of the class = 1000
Sampler_train = <torch.utils.data.distributed.DistributedSampler object at 0x7f4401db7c90>
Mixup is activated!
Model = SFCNN(
  (first_conv): ConvX(
    (conv): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (norm): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): Act(
      (act): SiLU(inplace=True)
    )
  )
  (layer1): Sequential(
    (0): DropBottleNeck(
      (ln): LayerNorm((40,), eps=1e-05, elementwise_affine=True)
      (le): ConvX(
        (conv): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
        (norm): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(240, 240, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=240, bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Sequential(
        (0): ConvX(
          (conv): Conv2d(40, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=40, bias=False)
          (norm): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
        (1): ConvX(
          (conv): Conv2d(40, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
      )
      (drop_path): Identity()
    )
    (1): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=80, bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(80, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(240, 240, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=240, bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.005)
    )
    (2): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=80, bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(80, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(240, 240, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=240, bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.011)
    )
    (3): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=80, bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(80, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(240, 240, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=240, bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.016)
    )
    (4): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=80, bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(80, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(240, 240, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=240, bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.022)
    )
    (5): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=80, bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(80, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(240, 240, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=240, bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.027)
    )
    (6): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=80, bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(80, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(240, 240, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=240, bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.032)
    )
    (7): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=80, bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(80, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(240, 240, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=240, bias=False)
        (norm): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.038)
    )
  )
  (layer2): Sequential(
    (0): DropBottleNeck(
      (ln): LayerNorm((80,), eps=1e-05, elementwise_affine=True)
      (le): ConvX(
        (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=80, bias=False)
        (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Sequential(
        (0): ConvX(
          (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=80, bias=False)
          (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
        (1): ConvX(
          (conv): Conv2d(80, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
      )
      (drop_path): DropPath(drop_prob=0.043)
    )
    (1): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.048)
    )
    (2): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.054)
    )
    (3): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.059)
    )
    (4): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.065)
    )
    (5): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.070)
    )
    (6): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.075)
    )
    (7): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.081)
    )
    (8): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.086)
    )
    (9): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.092)
    )
    (10): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.097)
    )
    (11): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.102)
    )
    (12): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.108)
    )
    (13): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.113)
    )
    (14): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (norm): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(480, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.118)
    )
  )
  (layer3): Sequential(
    (0): DropBottleNeck(
      (ln): LayerNorm((160,), eps=1e-05, elementwise_affine=True)
      (le): ConvX(
        (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=160, bias=False)
        (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Sequential(
        (0): ConvX(
          (conv): Conv2d(160, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=160, bias=False)
          (norm): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
        (1): ConvX(
          (conv): Conv2d(160, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
      )
      (drop_path): DropPath(drop_prob=0.124)
    )
    (1): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.129)
    )
    (2): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.135)
    )
    (3): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.140)
    )
    (4): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.145)
    )
    (5): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.151)
    )
    (6): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.156)
    )
    (7): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.162)
    )
    (8): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.167)
    )
    (9): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.172)
    )
    (10): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.178)
    )
    (11): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.183)
    )
    (12): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.188)
    )
    (13): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.194)
    )
    (14): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.199)
    )
    (15): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.205)
    )
    (16): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.210)
    )
    (17): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.215)
    )
    (18): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.221)
    )
    (19): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.226)
    )
    (20): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.232)
    )
    (21): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.237)
    )
    (22): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.242)
    )
    (23): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.248)
    )
    (24): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.253)
    )
    (25): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.258)
    )
    (26): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.264)
    )
    (27): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.269)
    )
    (28): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.275)
    )
    (29): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.280)
    )
    (30): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.285)
    )
    (31): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.291)
    )
    (32): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.296)
    )
    (33): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.302)
    )
    (34): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
        (norm): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.307)
    )
  )
  (layer4): Sequential(
    (0): DropBottleNeck(
      (ln): LayerNorm((320,), eps=1e-05, elementwise_affine=True)
      (le): ConvX(
        (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=320, bias=False)
        (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(320, 1920, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1920, 1920, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=1920, bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1920, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Sequential(
        (0): ConvX(
          (conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=320, bias=False)
          (norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
        (1): ConvX(
          (conv): Conv2d(320, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
      )
      (drop_path): DropPath(drop_prob=0.312)
    )
    (1): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=640, bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(640, 1920, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1920, 1920, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1920, bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1920, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.318)
    )
    (2): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=640, bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(640, 1920, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1920, 1920, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1920, bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1920, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.323)
    )
    (3): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=640, bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(640, 1920, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1920, 1920, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1920, bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1920, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.328)
    )
    (4): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=640, bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(640, 1920, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1920, 1920, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1920, bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1920, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.334)
    )
    (5): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=640, bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(640, 1920, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1920, 1920, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1920, bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1920, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.339)
    )
    (6): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=640, bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(640, 1920, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1920, 1920, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1920, bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1920, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.345)
    )
    (7): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=640, bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(640, 1920, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1920, 1920, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1920, bias=False)
        (norm): BatchNorm2d(1920, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1920, 640, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.350)
    )
  )
  (head): ConvX(
    (conv): Conv2d(640, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): Act(
      (act): SiLU(inplace=True)
    )
  )
  (gap): AdaptiveAvgPool2d(output_size=1)
  (classifier): MlpHead(
    (fc1): Linear(in_features=1024, out_features=2048, bias=False)
    (norm): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): Act(
      (act): SiLU(inplace=True)
    )
    (drop): Dropout(p=0.2, inplace=False)
    (fc2): Linear(in_features=2048, out_features=1000, bias=False)
  )
)
number of params: 49050136
LR = 0.00400000
Batch size = 2048
Update frequent = 4
Number of training examples = 1281167
Number of training training per epoch = 625
Param groups = {
  "decay": {
    "weight_decay": 0.05,
    "params": [
      "first_conv.conv.weight",
      "layer1.0.le.conv.weight",
      "layer1.0.conv_in.conv.weight",
      "layer1.0.conv.conv.weight",
      "layer1.0.conv_out.conv.weight",
      "layer1.0.skip.0.conv.weight",
      "layer1.0.skip.1.conv.weight",
      "layer1.1.le.conv.weight",
      "layer1.1.conv_in.conv.weight",
      "layer1.1.conv.conv.weight",
      "layer1.1.conv_out.conv.weight",
      "layer1.2.le.conv.weight",
      "layer1.2.conv_in.conv.weight",
      "layer1.2.conv.conv.weight",
      "layer1.2.conv_out.conv.weight",
      "layer1.3.le.conv.weight",
      "layer1.3.conv_in.conv.weight",
      "layer1.3.conv.conv.weight",
      "layer1.3.conv_out.conv.weight",
      "layer1.4.le.conv.weight",
      "layer1.4.conv_in.conv.weight",
      "layer1.4.conv.conv.weight",
      "layer1.4.conv_out.conv.weight",
      "layer1.5.le.conv.weight",
      "layer1.5.conv_in.conv.weight",
      "layer1.5.conv.conv.weight",
      "layer1.5.conv_out.conv.weight",
      "layer1.6.le.conv.weight",
      "layer1.6.conv_in.conv.weight",
      "layer1.6.conv.conv.weight",
      "layer1.6.conv_out.conv.weight",
      "layer1.7.le.conv.weight",
      "layer1.7.conv_in.conv.weight",
      "layer1.7.conv.conv.weight",
      "layer1.7.conv_out.conv.weight",
      "layer2.0.le.conv.weight",
      "layer2.0.conv_in.conv.weight",
      "layer2.0.conv.conv.weight",
      "layer2.0.conv_out.conv.weight",
      "layer2.0.skip.0.conv.weight",
      "layer2.0.skip.1.conv.weight",
      "layer2.1.le.conv.weight",
      "layer2.1.conv_in.conv.weight",
      "layer2.1.conv.conv.weight",
      "layer2.1.conv_out.conv.weight",
      "layer2.2.le.conv.weight",
      "layer2.2.conv_in.conv.weight",
      "layer2.2.conv.conv.weight",
      "layer2.2.conv_out.conv.weight",
      "layer2.3.le.conv.weight",
      "layer2.3.conv_in.conv.weight",
      "layer2.3.conv.conv.weight",
      "layer2.3.conv_out.conv.weight",
      "layer2.4.le.conv.weight",
      "layer2.4.conv_in.conv.weight",
      "layer2.4.conv.conv.weight",
      "layer2.4.conv_out.conv.weight",
      "layer2.5.le.conv.weight",
      "layer2.5.conv_in.conv.weight",
      "layer2.5.conv.conv.weight",
      "layer2.5.conv_out.conv.weight",
      "layer2.6.le.conv.weight",
      "layer2.6.conv_in.conv.weight",
      "layer2.6.conv.conv.weight",
      "layer2.6.conv_out.conv.weight",
      "layer2.7.le.conv.weight",
      "layer2.7.conv_in.conv.weight",
      "layer2.7.conv.conv.weight",
      "layer2.7.conv_out.conv.weight",
      "layer2.8.le.conv.weight",
      "layer2.8.conv_in.conv.weight",
      "layer2.8.conv.conv.weight",
      "layer2.8.conv_out.conv.weight",
      "layer2.9.le.conv.weight",
      "layer2.9.conv_in.conv.weight",
      "layer2.9.conv.conv.weight",
      "layer2.9.conv_out.conv.weight",
      "layer2.10.le.conv.weight",
      "layer2.10.conv_in.conv.weight",
      "layer2.10.conv.conv.weight",
      "layer2.10.conv_out.conv.weight",
      "layer2.11.le.conv.weight",
      "layer2.11.conv_in.conv.weight",
      "layer2.11.conv.conv.weight",
      "layer2.11.conv_out.conv.weight",
      "layer2.12.le.conv.weight",
      "layer2.12.conv_in.conv.weight",
      "layer2.12.conv.conv.weight",
      "layer2.12.conv_out.conv.weight",
      "layer2.13.le.conv.weight",
      "layer2.13.conv_in.conv.weight",
      "layer2.13.conv.conv.weight",
      "layer2.13.conv_out.conv.weight",
      "layer2.14.le.conv.weight",
      "layer2.14.conv_in.conv.weight",
      "layer2.14.conv.conv.weight",
      "layer2.14.conv_out.conv.weight",
      "layer3.0.le.conv.weight",
      "layer3.0.conv_in.conv.weight",
      "layer3.0.conv.conv.weight",
      "layer3.0.conv_out.conv.weight",
      "layer3.0.skip.0.conv.weight",
      "layer3.0.skip.1.conv.weight",
      "layer3.1.le.conv.weight",
      "layer3.1.conv_in.conv.weight",
      "layer3.1.conv.conv.weight",
      "layer3.1.conv_out.conv.weight",
      "layer3.2.le.conv.weight",
      "layer3.2.conv_in.conv.weight",
      "layer3.2.conv.conv.weight",
      "layer3.2.conv_out.conv.weight",
      "layer3.3.le.conv.weight",
      "layer3.3.conv_in.conv.weight",
      "layer3.3.conv.conv.weight",
      "layer3.3.conv_out.conv.weight",
      "layer3.4.le.conv.weight",
      "layer3.4.conv_in.conv.weight",
      "layer3.4.conv.conv.weight",
      "layer3.4.conv_out.conv.weight",
      "layer3.5.le.conv.weight",
      "layer3.5.conv_in.conv.weight",
      "layer3.5.conv.conv.weight",
      "layer3.5.conv_out.conv.weight",
      "layer3.6.le.conv.weight",
      "layer3.6.conv_in.conv.weight",
      "layer3.6.conv.conv.weight",
      "layer3.6.conv_out.conv.weight",
      "layer3.7.le.conv.weight",
      "layer3.7.conv_in.conv.weight",
      "layer3.7.conv.conv.weight",
      "layer3.7.conv_out.conv.weight",
      "layer3.8.le.conv.weight",
      "layer3.8.conv_in.conv.weight",
      "layer3.8.conv.conv.weight",
      "layer3.8.conv_out.conv.weight",
      "layer3.9.le.conv.weight",
      "layer3.9.conv_in.conv.weight",
      "layer3.9.conv.conv.weight",
      "layer3.9.conv_out.conv.weight",
      "layer3.10.le.conv.weight",
      "layer3.10.conv_in.conv.weight",
      "layer3.10.conv.conv.weight",
      "layer3.10.conv_out.conv.weight",
      "layer3.11.le.conv.weight",
      "layer3.11.conv_in.conv.weight",
      "layer3.11.conv.conv.weight",
      "layer3.11.conv_out.conv.weight",
      "layer3.12.le.conv.weight",
      "layer3.12.conv_in.conv.weight",
      "layer3.12.conv.conv.weight",
      "layer3.12.conv_out.conv.weight",
      "layer3.13.le.conv.weight",
      "layer3.13.conv_in.conv.weight",
      "layer3.13.conv.conv.weight",
      "layer3.13.conv_out.conv.weight",
      "layer3.14.le.conv.weight",
      "layer3.14.conv_in.conv.weight",
      "layer3.14.conv.conv.weight",
      "layer3.14.conv_out.conv.weight",
      "layer3.15.le.conv.weight",
      "layer3.15.conv_in.conv.weight",
      "layer3.15.conv.conv.weight",
      "layer3.15.conv_out.conv.weight",
      "layer3.16.le.conv.weight",
      "layer3.16.conv_in.conv.weight",
      "layer3.16.conv.conv.weight",
      "layer3.16.conv_out.conv.weight",
      "layer3.17.le.conv.weight",
      "layer3.17.conv_in.conv.weight",
      "layer3.17.conv.conv.weight",
      "layer3.17.conv_out.conv.weight",
      "layer3.18.le.conv.weight",
      "layer3.18.conv_in.conv.weight",
      "layer3.18.conv.conv.weight",
      "layer3.18.conv_out.conv.weight",
      "layer3.19.le.conv.weight",
      "layer3.19.conv_in.conv.weight",
      "layer3.19.conv.conv.weight",
      "layer3.19.conv_out.conv.weight",
      "layer3.20.le.conv.weight",
      "layer3.20.conv_in.conv.weight",
      "layer3.20.conv.conv.weight",
      "layer3.20.conv_out.conv.weight",
      "layer3.21.le.conv.weight",
      "layer3.21.conv_in.conv.weight",
      "layer3.21.conv.conv.weight",
      "layer3.21.conv_out.conv.weight",
      "layer3.22.le.conv.weight",
      "layer3.22.conv_in.conv.weight",
      "layer3.22.conv.conv.weight",
      "layer3.22.conv_out.conv.weight",
      "layer3.23.le.conv.weight",
      "layer3.23.conv_in.conv.weight",
      "layer3.23.conv.conv.weight",
      "layer3.23.conv_out.conv.weight",
      "layer3.24.le.conv.weight",
      "layer3.24.conv_in.conv.weight",
      "layer3.24.conv.conv.weight",
      "layer3.24.conv_out.conv.weight",
      "layer3.25.le.conv.weight",
      "layer3.25.conv_in.conv.weight",
      "layer3.25.conv.conv.weight",
      "layer3.25.conv_out.conv.weight",
      "layer3.26.le.conv.weight",
      "layer3.26.conv_in.conv.weight",
      "layer3.26.conv.conv.weight",
      "layer3.26.conv_out.conv.weight",
      "layer3.27.le.conv.weight",
      "layer3.27.conv_in.conv.weight",
      "layer3.27.conv.conv.weight",
      "layer3.27.conv_out.conv.weight",
      "layer3.28.le.conv.weight",
      "layer3.28.conv_in.conv.weight",
      "layer3.28.conv.conv.weight",
      "layer3.28.conv_out.conv.weight",
      "layer3.29.le.conv.weight",
      "layer3.29.conv_in.conv.weight",
      "layer3.29.conv.conv.weight",
      "layer3.29.conv_out.conv.weight",
      "layer3.30.le.conv.weight",
      "layer3.30.conv_in.conv.weight",
      "layer3.30.conv.conv.weight",
      "layer3.30.conv_out.conv.weight",
      "layer3.31.le.conv.weight",
      "layer3.31.conv_in.conv.weight",
      "layer3.31.conv.conv.weight",
      "layer3.31.conv_out.conv.weight",
      "layer3.32.le.conv.weight",
      "layer3.32.conv_in.conv.weight",
      "layer3.32.conv.conv.weight",
      "layer3.32.conv_out.conv.weight",
      "layer3.33.le.conv.weight",
      "layer3.33.conv_in.conv.weight",
      "layer3.33.conv.conv.weight",
      "layer3.33.conv_out.conv.weight",
      "layer3.34.le.conv.weight",
      "layer3.34.conv_in.conv.weight",
      "layer3.34.conv.conv.weight",
      "layer3.34.conv_out.conv.weight",
      "layer4.0.le.conv.weight",
      "layer4.0.conv_in.conv.weight",
      "layer4.0.conv.conv.weight",
      "layer4.0.conv_out.conv.weight",
      "layer4.0.skip.0.conv.weight",
      "layer4.0.skip.1.conv.weight",
      "layer4.1.le.conv.weight",
      "layer4.1.conv_in.conv.weight",
      "layer4.1.conv.conv.weight",
      "layer4.1.conv_out.conv.weight",
      "layer4.2.le.conv.weight",
      "layer4.2.conv_in.conv.weight",
      "layer4.2.conv.conv.weight",
      "layer4.2.conv_out.conv.weight",
      "layer4.3.le.conv.weight",
      "layer4.3.conv_in.conv.weight",
      "layer4.3.conv.conv.weight",
      "layer4.3.conv_out.conv.weight",
      "layer4.4.le.conv.weight",
      "layer4.4.conv_in.conv.weight",
      "layer4.4.conv.conv.weight",
      "layer4.4.conv_out.conv.weight",
      "layer4.5.le.conv.weight",
      "layer4.5.conv_in.conv.weight",
      "layer4.5.conv.conv.weight",
      "layer4.5.conv_out.conv.weight",
      "layer4.6.le.conv.weight",
      "layer4.6.conv_in.conv.weight",
      "layer4.6.conv.conv.weight",
      "layer4.6.conv_out.conv.weight",
      "layer4.7.le.conv.weight",
      "layer4.7.conv_in.conv.weight",
      "layer4.7.conv.conv.weight",
      "layer4.7.conv_out.conv.weight",
      "head.conv.weight",
      "classifier.fc1.weight",
      "classifier.fc2.weight"
    ],
    "lr_scale": 1.0
  },
  "no_decay": {
    "weight_decay": 0.0,
    "params": [
      "first_conv.norm.weight",
      "first_conv.norm.bias",
      "layer1.0.ln.weight",
      "layer1.0.ln.bias",
      "layer1.0.le.norm.weight",
      "layer1.0.le.norm.bias",
      "layer1.0.conv_in.norm.weight",
      "layer1.0.conv_in.norm.bias",
      "layer1.0.conv.norm.weight",
      "layer1.0.conv.norm.bias",
      "layer1.0.conv_out.norm.weight",
      "layer1.0.conv_out.norm.bias",
      "layer1.0.skip.0.norm.weight",
      "layer1.0.skip.0.norm.bias",
      "layer1.0.skip.1.norm.weight",
      "layer1.0.skip.1.norm.bias",
      "layer1.1.le.norm.weight",
      "layer1.1.le.norm.bias",
      "layer1.1.conv_in.norm.weight",
      "layer1.1.conv_in.norm.bias",
      "layer1.1.conv.norm.weight",
      "layer1.1.conv.norm.bias",
      "layer1.1.conv_out.norm.weight",
      "layer1.1.conv_out.norm.bias",
      "layer1.2.le.norm.weight",
      "layer1.2.le.norm.bias",
      "layer1.2.conv_in.norm.weight",
      "layer1.2.conv_in.norm.bias",
      "layer1.2.conv.norm.weight",
      "layer1.2.conv.norm.bias",
      "layer1.2.conv_out.norm.weight",
      "layer1.2.conv_out.norm.bias",
      "layer1.3.le.norm.weight",
      "layer1.3.le.norm.bias",
      "layer1.3.conv_in.norm.weight",
      "layer1.3.conv_in.norm.bias",
      "layer1.3.conv.norm.weight",
      "layer1.3.conv.norm.bias",
      "layer1.3.conv_out.norm.weight",
      "layer1.3.conv_out.norm.bias",
      "layer1.4.le.norm.weight",
      "layer1.4.le.norm.bias",
      "layer1.4.conv_in.norm.weight",
      "layer1.4.conv_in.norm.bias",
      "layer1.4.conv.norm.weight",
      "layer1.4.conv.norm.bias",
      "layer1.4.conv_out.norm.weight",
      "layer1.4.conv_out.norm.bias",
      "layer1.5.le.norm.weight",
      "layer1.5.le.norm.bias",
      "layer1.5.conv_in.norm.weight",
      "layer1.5.conv_in.norm.bias",
      "layer1.5.conv.norm.weight",
      "layer1.5.conv.norm.bias",
      "layer1.5.conv_out.norm.weight",
      "layer1.5.conv_out.norm.bias",
      "layer1.6.le.norm.weight",
      "layer1.6.le.norm.bias",
      "layer1.6.conv_in.norm.weight",
      "layer1.6.conv_in.norm.bias",
      "layer1.6.conv.norm.weight",
      "layer1.6.conv.norm.bias",
      "layer1.6.conv_out.norm.weight",
      "layer1.6.conv_out.norm.bias",
      "layer1.7.le.norm.weight",
      "layer1.7.le.norm.bias",
      "layer1.7.conv_in.norm.weight",
      "layer1.7.conv_in.norm.bias",
      "layer1.7.conv.norm.weight",
      "layer1.7.conv.norm.bias",
      "layer1.7.conv_out.norm.weight",
      "layer1.7.conv_out.norm.bias",
      "layer2.0.ln.weight",
      "layer2.0.ln.bias",
      "layer2.0.le.norm.weight",
      "layer2.0.le.norm.bias",
      "layer2.0.conv_in.norm.weight",
      "layer2.0.conv_in.norm.bias",
      "layer2.0.conv.norm.weight",
      "layer2.0.conv.norm.bias",
      "layer2.0.conv_out.norm.weight",
      "layer2.0.conv_out.norm.bias",
      "layer2.0.skip.0.norm.weight",
      "layer2.0.skip.0.norm.bias",
      "layer2.0.skip.1.norm.weight",
      "layer2.0.skip.1.norm.bias",
      "layer2.1.le.norm.weight",
      "layer2.1.le.norm.bias",
      "layer2.1.conv_in.norm.weight",
      "layer2.1.conv_in.norm.bias",
      "layer2.1.conv.norm.weight",
      "layer2.1.conv.norm.bias",
      "layer2.1.conv_out.norm.weight",
      "layer2.1.conv_out.norm.bias",
      "layer2.2.le.norm.weight",
      "layer2.2.le.norm.bias",
      "layer2.2.conv_in.norm.weight",
      "layer2.2.conv_in.norm.bias",
      "layer2.2.conv.norm.weight",
      "layer2.2.conv.norm.bias",
      "layer2.2.conv_out.norm.weight",
      "layer2.2.conv_out.norm.bias",
      "layer2.3.le.norm.weight",
      "layer2.3.le.norm.bias",
      "layer2.3.conv_in.norm.weight",
      "layer2.3.conv_in.norm.bias",
      "layer2.3.conv.norm.weight",
      "layer2.3.conv.norm.bias",
      "layer2.3.conv_out.norm.weight",
      "layer2.3.conv_out.norm.bias",
      "layer2.4.le.norm.weight",
      "layer2.4.le.norm.bias",
      "layer2.4.conv_in.norm.weight",
      "layer2.4.conv_in.norm.bias",
      "layer2.4.conv.norm.weight",
      "layer2.4.conv.norm.bias",
      "layer2.4.conv_out.norm.weight",
      "layer2.4.conv_out.norm.bias",
      "layer2.5.le.norm.weight",
      "layer2.5.le.norm.bias",
      "layer2.5.conv_in.norm.weight",
      "layer2.5.conv_in.norm.bias",
      "layer2.5.conv.norm.weight",
      "layer2.5.conv.norm.bias",
      "layer2.5.conv_out.norm.weight",
      "layer2.5.conv_out.norm.bias",
      "layer2.6.le.norm.weight",
      "layer2.6.le.norm.bias",
      "layer2.6.conv_in.norm.weight",
      "layer2.6.conv_in.norm.bias",
      "layer2.6.conv.norm.weight",
      "layer2.6.conv.norm.bias",
      "layer2.6.conv_out.norm.weight",
      "layer2.6.conv_out.norm.bias",
      "layer2.7.le.norm.weight",
      "layer2.7.le.norm.bias",
      "layer2.7.conv_in.norm.weight",
      "layer2.7.conv_in.norm.bias",
      "layer2.7.conv.norm.weight",
      "layer2.7.conv.norm.bias",
      "layer2.7.conv_out.norm.weight",
      "layer2.7.conv_out.norm.bias",
      "layer2.8.le.norm.weight",
      "layer2.8.le.norm.bias",
      "layer2.8.conv_in.norm.weight",
      "layer2.8.conv_in.norm.bias",
      "layer2.8.conv.norm.weight",
      "layer2.8.conv.norm.bias",
      "layer2.8.conv_out.norm.weight",
      "layer2.8.conv_out.norm.bias",
      "layer2.9.le.norm.weight",
      "layer2.9.le.norm.bias",
      "layer2.9.conv_in.norm.weight",
      "layer2.9.conv_in.norm.bias",
      "layer2.9.conv.norm.weight",
      "layer2.9.conv.norm.bias",
      "layer2.9.conv_out.norm.weight",
      "layer2.9.conv_out.norm.bias",
      "layer2.10.le.norm.weight",
      "layer2.10.le.norm.bias",
      "layer2.10.conv_in.norm.weight",
      "layer2.10.conv_in.norm.bias",
      "layer2.10.conv.norm.weight",
      "layer2.10.conv.norm.bias",
      "layer2.10.conv_out.norm.weight",
      "layer2.10.conv_out.norm.bias",
      "layer2.11.le.norm.weight",
      "layer2.11.le.norm.bias",
      "layer2.11.conv_in.norm.weight",
      "layer2.11.conv_in.norm.bias",
      "layer2.11.conv.norm.weight",
      "layer2.11.conv.norm.bias",
      "layer2.11.conv_out.norm.weight",
      "layer2.11.conv_out.norm.bias",
      "layer2.12.le.norm.weight",
      "layer2.12.le.norm.bias",
      "layer2.12.conv_in.norm.weight",
      "layer2.12.conv_in.norm.bias",
      "layer2.12.conv.norm.weight",
      "layer2.12.conv.norm.bias",
      "layer2.12.conv_out.norm.weight",
      "layer2.12.conv_out.norm.bias",
      "layer2.13.le.norm.weight",
      "layer2.13.le.norm.bias",
      "layer2.13.conv_in.norm.weight",
      "layer2.13.conv_in.norm.bias",
      "layer2.13.conv.norm.weight",
      "layer2.13.conv.norm.bias",
      "layer2.13.conv_out.norm.weight",
      "layer2.13.conv_out.norm.bias",
      "layer2.14.le.norm.weight",
      "layer2.14.le.norm.bias",
      "layer2.14.conv_in.norm.weight",
      "layer2.14.conv_in.norm.bias",
      "layer2.14.conv.norm.weight",
      "layer2.14.conv.norm.bias",
      "layer2.14.conv_out.norm.weight",
      "layer2.14.conv_out.norm.bias",
      "layer3.0.ln.weight",
      "layer3.0.ln.bias",
      "layer3.0.le.norm.weight",
      "layer3.0.le.norm.bias",
      "layer3.0.conv_in.norm.weight",
      "layer3.0.conv_in.norm.bias",
      "layer3.0.conv.norm.weight",
      "layer3.0.conv.norm.bias",
      "layer3.0.conv_out.norm.weight",
      "layer3.0.conv_out.norm.bias",
      "layer3.0.skip.0.norm.weight",
      "layer3.0.skip.0.norm.bias",
      "layer3.0.skip.1.norm.weight",
      "layer3.0.skip.1.norm.bias",
      "layer3.1.le.norm.weight",
      "layer3.1.le.norm.bias",
      "layer3.1.conv_in.norm.weight",
      "layer3.1.conv_in.norm.bias",
      "layer3.1.conv.norm.weight",
      "layer3.1.conv.norm.bias",
      "layer3.1.conv_out.norm.weight",
      "layer3.1.conv_out.norm.bias",
      "layer3.2.le.norm.weight",
      "layer3.2.le.norm.bias",
      "layer3.2.conv_in.norm.weight",
      "layer3.2.conv_in.norm.bias",
      "layer3.2.conv.norm.weight",
      "layer3.2.conv.norm.bias",
      "layer3.2.conv_out.norm.weight",
      "layer3.2.conv_out.norm.bias",
      "layer3.3.le.norm.weight",
      "layer3.3.le.norm.bias",
      "layer3.3.conv_in.norm.weight",
      "layer3.3.conv_in.norm.bias",
      "layer3.3.conv.norm.weight",
      "layer3.3.conv.norm.bias",
      "layer3.3.conv_out.norm.weight",
      "layer3.3.conv_out.norm.bias",
      "layer3.4.le.norm.weight",
      "layer3.4.le.norm.bias",
      "layer3.4.conv_in.norm.weight",
      "layer3.4.conv_in.norm.bias",
      "layer3.4.conv.norm.weight",
      "layer3.4.conv.norm.bias",
      "layer3.4.conv_out.norm.weight",
      "layer3.4.conv_out.norm.bias",
      "layer3.5.le.norm.weight",
      "layer3.5.le.norm.bias",
      "layer3.5.conv_in.norm.weight",
      "layer3.5.conv_in.norm.bias",
      "layer3.5.conv.norm.weight",
      "layer3.5.conv.norm.bias",
      "layer3.5.conv_out.norm.weight",
      "layer3.5.conv_out.norm.bias",
      "layer3.6.le.norm.weight",
      "layer3.6.le.norm.bias",
      "layer3.6.conv_in.norm.weight",
      "layer3.6.conv_in.norm.bias",
      "layer3.6.conv.norm.weight",
      "layer3.6.conv.norm.bias",
      "layer3.6.conv_out.norm.weight",
      "layer3.6.conv_out.norm.bias",
      "layer3.7.le.norm.weight",
      "layer3.7.le.norm.bias",
      "layer3.7.conv_in.norm.weight",
      "layer3.7.conv_in.norm.bias",
      "layer3.7.conv.norm.weight",
      "layer3.7.conv.norm.bias",
      "layer3.7.conv_out.norm.weight",
      "layer3.7.conv_out.norm.bias",
      "layer3.8.le.norm.weight",
      "layer3.8.le.norm.bias",
      "layer3.8.conv_in.norm.weight",
      "layer3.8.conv_in.norm.bias",
      "layer3.8.conv.norm.weight",
      "layer3.8.conv.norm.bias",
      "layer3.8.conv_out.norm.weight",
      "layer3.8.conv_out.norm.bias",
      "layer3.9.le.norm.weight",
      "layer3.9.le.norm.bias",
      "layer3.9.conv_in.norm.weight",
      "layer3.9.conv_in.norm.bias",
      "layer3.9.conv.norm.weight",
      "layer3.9.conv.norm.bias",
      "layer3.9.conv_out.norm.weight",
      "layer3.9.conv_out.norm.bias",
      "layer3.10.le.norm.weight",
      "layer3.10.le.norm.bias",
      "layer3.10.conv_in.norm.weight",
      "layer3.10.conv_in.norm.bias",
      "layer3.10.conv.norm.weight",
      "layer3.10.conv.norm.bias",
      "layer3.10.conv_out.norm.weight",
      "layer3.10.conv_out.norm.bias",
      "layer3.11.le.norm.weight",
      "layer3.11.le.norm.bias",
      "layer3.11.conv_in.norm.weight",
      "layer3.11.conv_in.norm.bias",
      "layer3.11.conv.norm.weight",
      "layer3.11.conv.norm.bias",
      "layer3.11.conv_out.norm.weight",
      "layer3.11.conv_out.norm.bias",
      "layer3.12.le.norm.weight",
      "layer3.12.le.norm.bias",
      "layer3.12.conv_in.norm.weight",
      "layer3.12.conv_in.norm.bias",
      "layer3.12.conv.norm.weight",
      "layer3.12.conv.norm.bias",
      "layer3.12.conv_out.norm.weight",
      "layer3.12.conv_out.norm.bias",
      "layer3.13.le.norm.weight",
      "layer3.13.le.norm.bias",
      "layer3.13.conv_in.norm.weight",
      "layer3.13.conv_in.norm.bias",
      "layer3.13.conv.norm.weight",
      "layer3.13.conv.norm.bias",
      "layer3.13.conv_out.norm.weight",
      "layer3.13.conv_out.norm.bias",
      "layer3.14.le.norm.weight",
      "layer3.14.le.norm.bias",
      "layer3.14.conv_in.norm.weight",
      "layer3.14.conv_in.norm.bias",
      "layer3.14.conv.norm.weight",
      "layer3.14.conv.norm.bias",
      "layer3.14.conv_out.norm.weight",
      "layer3.14.conv_out.norm.bias",
      "layer3.15.le.norm.weight",
      "layer3.15.le.norm.bias",
      "layer3.15.conv_in.norm.weight",
      "layer3.15.conv_in.norm.bias",
      "layer3.15.conv.norm.weight",
      "layer3.15.conv.norm.bias",
      "layer3.15.conv_out.norm.weight",
      "layer3.15.conv_out.norm.bias",
      "layer3.16.le.norm.weight",
      "layer3.16.le.norm.bias",
      "layer3.16.conv_in.norm.weight",
      "layer3.16.conv_in.norm.bias",
      "layer3.16.conv.norm.weight",
      "layer3.16.conv.norm.bias",
      "layer3.16.conv_out.norm.weight",
      "layer3.16.conv_out.norm.bias",
      "layer3.17.le.norm.weight",
      "layer3.17.le.norm.bias",
      "layer3.17.conv_in.norm.weight",
      "layer3.17.conv_in.norm.bias",
      "layer3.17.conv.norm.weight",
      "layer3.17.conv.norm.bias",
      "layer3.17.conv_out.norm.weight",
      "layer3.17.conv_out.norm.bias",
      "layer3.18.le.norm.weight",
      "layer3.18.le.norm.bias",
      "layer3.18.conv_in.norm.weight",
      "layer3.18.conv_in.norm.bias",
      "layer3.18.conv.norm.weight",
      "layer3.18.conv.norm.bias",
      "layer3.18.conv_out.norm.weight",
      "layer3.18.conv_out.norm.bias",
      "layer3.19.le.norm.weight",
      "layer3.19.le.norm.bias",
      "layer3.19.conv_in.norm.weight",
      "layer3.19.conv_in.norm.bias",
      "layer3.19.conv.norm.weight",
      "layer3.19.conv.norm.bias",
      "layer3.19.conv_out.norm.weight",
      "layer3.19.conv_out.norm.bias",
      "layer3.20.le.norm.weight",
      "layer3.20.le.norm.bias",
      "layer3.20.conv_in.norm.weight",
      "layer3.20.conv_in.norm.bias",
      "layer3.20.conv.norm.weight",
      "layer3.20.conv.norm.bias",
      "layer3.20.conv_out.norm.weight",
      "layer3.20.conv_out.norm.bias",
      "layer3.21.le.norm.weight",
      "layer3.21.le.norm.bias",
      "layer3.21.conv_in.norm.weight",
      "layer3.21.conv_in.norm.bias",
      "layer3.21.conv.norm.weight",
      "layer3.21.conv.norm.bias",
      "layer3.21.conv_out.norm.weight",
      "layer3.21.conv_out.norm.bias",
      "layer3.22.le.norm.weight",
      "layer3.22.le.norm.bias",
      "layer3.22.conv_in.norm.weight",
      "layer3.22.conv_in.norm.bias",
      "layer3.22.conv.norm.weight",
      "layer3.22.conv.norm.bias",
      "layer3.22.conv_out.norm.weight",
      "layer3.22.conv_out.norm.bias",
      "layer3.23.le.norm.weight",
      "layer3.23.le.norm.bias",
      "layer3.23.conv_in.norm.weight",
      "layer3.23.conv_in.norm.bias",
      "layer3.23.conv.norm.weight",
      "layer3.23.conv.norm.bias",
      "layer3.23.conv_out.norm.weight",
      "layer3.23.conv_out.norm.bias",
      "layer3.24.le.norm.weight",
      "layer3.24.le.norm.bias",
      "layer3.24.conv_in.norm.weight",
      "layer3.24.conv_in.norm.bias",
      "layer3.24.conv.norm.weight",
      "layer3.24.conv.norm.bias",
      "layer3.24.conv_out.norm.weight",
      "layer3.24.conv_out.norm.bias",
      "layer3.25.le.norm.weight",
      "layer3.25.le.norm.bias",
      "layer3.25.conv_in.norm.weight",
      "layer3.25.conv_in.norm.bias",
      "layer3.25.conv.norm.weight",
      "layer3.25.conv.norm.bias",
      "layer3.25.conv_out.norm.weight",
      "layer3.25.conv_out.norm.bias",
      "layer3.26.le.norm.weight",
      "layer3.26.le.norm.bias",
      "layer3.26.conv_in.norm.weight",
      "layer3.26.conv_in.norm.bias",
      "layer3.26.conv.norm.weight",
      "layer3.26.conv.norm.bias",
      "layer3.26.conv_out.norm.weight",
      "layer3.26.conv_out.norm.bias",
      "layer3.27.le.norm.weight",
      "layer3.27.le.norm.bias",
      "layer3.27.conv_in.norm.weight",
      "layer3.27.conv_in.norm.bias",
      "layer3.27.conv.norm.weight",
      "layer3.27.conv.norm.bias",
      "layer3.27.conv_out.norm.weight",
      "layer3.27.conv_out.norm.bias",
      "layer3.28.le.norm.weight",
      "layer3.28.le.norm.bias",
      "layer3.28.conv_in.norm.weight",
      "layer3.28.conv_in.norm.bias",
      "layer3.28.conv.norm.weight",
      "layer3.28.conv.norm.bias",
      "layer3.28.conv_out.norm.weight",
      "layer3.28.conv_out.norm.bias",
      "layer3.29.le.norm.weight",
      "layer3.29.le.norm.bias",
      "layer3.29.conv_in.norm.weight",
      "layer3.29.conv_in.norm.bias",
      "layer3.29.conv.norm.weight",
      "layer3.29.conv.norm.bias",
      "layer3.29.conv_out.norm.weight",
      "layer3.29.conv_out.norm.bias",
      "layer3.30.le.norm.weight",
      "layer3.30.le.norm.bias",
      "layer3.30.conv_in.norm.weight",
      "layer3.30.conv_in.norm.bias",
      "layer3.30.conv.norm.weight",
      "layer3.30.conv.norm.bias",
      "layer3.30.conv_out.norm.weight",
      "layer3.30.conv_out.norm.bias",
      "layer3.31.le.norm.weight",
      "layer3.31.le.norm.bias",
      "layer3.31.conv_in.norm.weight",
      "layer3.31.conv_in.norm.bias",
      "layer3.31.conv.norm.weight",
      "layer3.31.conv.norm.bias",
      "layer3.31.conv_out.norm.weight",
      "layer3.31.conv_out.norm.bias",
      "layer3.32.le.norm.weight",
      "layer3.32.le.norm.bias",
      "layer3.32.conv_in.norm.weight",
      "layer3.32.conv_in.norm.bias",
      "layer3.32.conv.norm.weight",
      "layer3.32.conv.norm.bias",
      "layer3.32.conv_out.norm.weight",
      "layer3.32.conv_out.norm.bias",
      "layer3.33.le.norm.weight",
      "layer3.33.le.norm.bias",
      "layer3.33.conv_in.norm.weight",
      "layer3.33.conv_in.norm.bias",
      "layer3.33.conv.norm.weight",
      "layer3.33.conv.norm.bias",
      "layer3.33.conv_out.norm.weight",
      "layer3.33.conv_out.norm.bias",
      "layer3.34.le.norm.weight",
      "layer3.34.le.norm.bias",
      "layer3.34.conv_in.norm.weight",
      "layer3.34.conv_in.norm.bias",
      "layer3.34.conv.norm.weight",
      "layer3.34.conv.norm.bias",
      "layer3.34.conv_out.norm.weight",
      "layer3.34.conv_out.norm.bias",
      "layer4.0.ln.weight",
      "layer4.0.ln.bias",
      "layer4.0.le.norm.weight",
      "layer4.0.le.norm.bias",
      "layer4.0.conv_in.norm.weight",
      "layer4.0.conv_in.norm.bias",
      "layer4.0.conv.norm.weight",
      "layer4.0.conv.norm.bias",
      "layer4.0.conv_out.norm.weight",
      "layer4.0.conv_out.norm.bias",
      "layer4.0.skip.0.norm.weight",
      "layer4.0.skip.0.norm.bias",
      "layer4.0.skip.1.norm.weight",
      "layer4.0.skip.1.norm.bias",
      "layer4.1.le.norm.weight",
      "layer4.1.le.norm.bias",
      "layer4.1.conv_in.norm.weight",
      "layer4.1.conv_in.norm.bias",
      "layer4.1.conv.norm.weight",
      "layer4.1.conv.norm.bias",
      "layer4.1.conv_out.norm.weight",
      "layer4.1.conv_out.norm.bias",
      "layer4.2.le.norm.weight",
      "layer4.2.le.norm.bias",
      "layer4.2.conv_in.norm.weight",
      "layer4.2.conv_in.norm.bias",
      "layer4.2.conv.norm.weight",
      "layer4.2.conv.norm.bias",
      "layer4.2.conv_out.norm.weight",
      "layer4.2.conv_out.norm.bias",
      "layer4.3.le.norm.weight",
      "layer4.3.le.norm.bias",
      "layer4.3.conv_in.norm.weight",
      "layer4.3.conv_in.norm.bias",
      "layer4.3.conv.norm.weight",
      "layer4.3.conv.norm.bias",
      "layer4.3.conv_out.norm.weight",
      "layer4.3.conv_out.norm.bias",
      "layer4.4.le.norm.weight",
      "layer4.4.le.norm.bias",
      "layer4.4.conv_in.norm.weight",
      "layer4.4.conv_in.norm.bias",
      "layer4.4.conv.norm.weight",
      "layer4.4.conv.norm.bias",
      "layer4.4.conv_out.norm.weight",
      "layer4.4.conv_out.norm.bias",
      "layer4.5.le.norm.weight",
      "layer4.5.le.norm.bias",
      "layer4.5.conv_in.norm.weight",
      "layer4.5.conv_in.norm.bias",
      "layer4.5.conv.norm.weight",
      "layer4.5.conv.norm.bias",
      "layer4.5.conv_out.norm.weight",
      "layer4.5.conv_out.norm.bias",
      "layer4.6.le.norm.weight",
      "layer4.6.le.norm.bias",
      "layer4.6.conv_in.norm.weight",
      "layer4.6.conv_in.norm.bias",
      "layer4.6.conv.norm.weight",
      "layer4.6.conv.norm.bias",
      "layer4.6.conv_out.norm.weight",
      "layer4.6.conv_out.norm.bias",
      "layer4.7.le.norm.weight",
      "layer4.7.le.norm.bias",
      "layer4.7.conv_in.norm.weight",
      "layer4.7.conv_in.norm.bias",
      "layer4.7.conv.norm.weight",
      "layer4.7.conv.norm.bias",
      "layer4.7.conv_out.norm.weight",
      "layer4.7.conv_out.norm.bias",
      "head.norm.weight",
      "head.norm.bias",
      "classifier.norm.weight",
      "classifier.norm.bias"
    ],
    "lr_scale": 1.0
  }
}
Use Cosine LR scheduler
Set warmup steps = 12500
Set warmup steps = 0
Max WD = 0.0500000, Min WD = 0.0500000
criterion = SoftTargetCrossEntropy()
Auto resume checkpoint: 
Start training for 300 epochs
Epoch: [0]  [   0/2502]  eta: 8:45:33  lr: 0.000000  min_lr: 0.000000  loss: 6.9646 (6.9646)  weight_decay: 0.0500 (0.0500)  time: 12.6033  data: 2.6468  max mem: 53905
Epoch: [0]  [ 200/2502]  eta: 0:25:10  lr: 0.000016  min_lr: 0.000016  loss: 6.9229 (6.9421)  weight_decay: 0.0500 (0.0500)  grad_norm: 8.2644 (12.5819)  time: 0.5954  data: 0.0004  max mem: 53905
Epoch: [0]  [ 400/2502]  eta: 0:21:56  lr: 0.000032  min_lr: 0.000032  loss: 6.8768 (6.9207)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.6594 (8.0158)  time: 0.5952  data: 0.0005  max mem: 53905
Epoch: [0]  [ 600/2502]  eta: 0:19:31  lr: 0.000048  min_lr: 0.000048  loss: 6.8008 (6.8930)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.9880 (6.3454)  time: 0.5952  data: 0.0004  max mem: 53905
Epoch: [0]  [ 800/2502]  eta: 0:17:19  lr: 0.000064  min_lr: 0.000064  loss: 6.7695 (6.8633)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.1878 (5.6847)  time: 0.5953  data: 0.0006  max mem: 53905
Epoch: [0]  [1000/2502]  eta: 0:15:12  lr: 0.000080  min_lr: 0.000080  loss: 6.6855 (6.8334)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.8340 (inf)  time: 0.5952  data: 0.0004  max mem: 53905
Epoch: [0]  [1200/2502]  eta: 0:13:08  lr: 0.000096  min_lr: 0.000096  loss: 6.6710 (6.8073)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.3202 (inf)  time: 0.5951  data: 0.0004  max mem: 53905
Epoch: [0]  [1400/2502]  eta: 0:11:05  lr: 0.000112  min_lr: 0.000112  loss: 6.6488 (6.7825)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.1653 (inf)  time: 0.5951  data: 0.0005  max mem: 53905
Epoch: [0]  [1600/2502]  eta: 0:09:04  lr: 0.000128  min_lr: 0.000128  loss: 6.6121 (6.7609)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.5940 (inf)  time: 0.5951  data: 0.0005  max mem: 53905
Epoch: [0]  [1800/2502]  eta: 0:07:02  lr: 0.000144  min_lr: 0.000144  loss: 6.5686 (6.7389)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.8294 (inf)  time: 0.5952  data: 0.0005  max mem: 53905
Epoch: [0]  [2000/2502]  eta: 0:05:01  lr: 0.000160  min_lr: 0.000160  loss: 6.5022 (6.7211)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.5253 (inf)  time: 0.5950  data: 0.0004  max mem: 53905
Epoch: [0]  [2200/2502]  eta: 0:03:01  lr: 0.000176  min_lr: 0.000176  loss: 6.5125 (6.7010)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.6432 (inf)  time: 0.5953  data: 0.0006  max mem: 53905
Epoch: [0]  [2400/2502]  eta: 0:01:01  lr: 0.000192  min_lr: 0.000192  loss: 6.4507 (6.6821)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.7618 (inf)  time: 0.5951  data: 0.0005  max mem: 53905
Epoch: [0]  [2501/2502]  eta: 0:00:00  lr: 0.000200  min_lr: 0.000200  loss: 6.5676 (6.6739)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.3506 (inf)  time: 0.5349  data: 0.0007  max mem: 53905
Epoch: [0] Total time: 0:25:01 (0.6000 s / it)
Averaged stats: lr: 0.000200  min_lr: 0.000200  loss: 6.5676 (6.6752)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.3506 (inf)
Test:  [ 0/50]  eta: 0:07:10  loss: 5.4865 (5.4865)  acc1: 2.4000 (2.4000)  acc5: 19.2000 (19.2000)  time: 8.6200  data: 3.4118  max mem: 53905
Test:  [10/50]  eta: 0:00:41  loss: 5.6439 (5.6189)  acc1: 2.4000 (3.7455)  acc5: 13.2000 (13.7091)  time: 1.0407  data: 0.3105  max mem: 53905
Test:  [20/50]  eta: 0:00:20  loss: 5.6439 (5.6173)  acc1: 2.8000 (3.7333)  acc5: 12.4000 (13.2190)  time: 0.2832  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:11  loss: 5.6680 (5.6552)  acc1: 4.0000 (3.6903)  acc5: 11.6000 (12.9161)  time: 0.2835  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:04  loss: 5.6680 (5.6691)  acc1: 3.6000 (3.7366)  acc5: 11.6000 (12.9366)  time: 0.2833  data: 0.0002  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 5.6285 (5.6294)  acc1: 4.8000 (4.2320)  acc5: 13.6000 (14.0560)  time: 0.2830  data: 0.0001  max mem: 53905
Test: Total time: 0:00:22 (0.4516 s / it)
* Acc@1 4.140 Acc@5 13.730 loss 5.644
Accuracy of the model on the 50000 test images: 4.1%
Max accuracy: 4.14%
Epoch: [1]  [   0/2502]  eta: 1:42:14  lr: 0.000200  min_lr: 0.000200  loss: 6.4117 (6.4117)  weight_decay: 0.0500 (0.0500)  time: 2.4519  data: 1.8260  max mem: 53905
Epoch: [1]  [ 200/2502]  eta: 0:23:03  lr: 0.000216  min_lr: 0.000216  loss: 6.5449 (6.4432)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.8402 (5.8280)  time: 0.5900  data: 0.0004  max mem: 53905
Epoch: [1]  [ 400/2502]  eta: 0:20:51  lr: 0.000232  min_lr: 0.000232  loss: 6.3949 (6.4344)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.6776 (6.0530)  time: 0.5904  data: 0.0005  max mem: 53905
Epoch: [1]  [ 600/2502]  eta: 0:18:50  lr: 0.000248  min_lr: 0.000248  loss: 6.4036 (6.4057)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.9458 (5.9243)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [1]  [ 800/2502]  eta: 0:16:50  lr: 0.000264  min_lr: 0.000264  loss: 6.3196 (6.3898)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.8184 (5.9357)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [1]  [1000/2502]  eta: 0:14:51  lr: 0.000280  min_lr: 0.000280  loss: 6.4584 (6.3801)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.6607 (5.8137)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [1]  [1200/2502]  eta: 0:12:52  lr: 0.000296  min_lr: 0.000296  loss: 6.4159 (6.3751)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.8399 (5.7084)  time: 0.6030  data: 0.0004  max mem: 53905
Epoch: [1]  [1400/2502]  eta: 0:10:53  lr: 0.000312  min_lr: 0.000312  loss: 6.2578 (6.3642)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.4308 (5.6247)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [1]  [1600/2502]  eta: 0:08:54  lr: 0.000328  min_lr: 0.000328  loss: 6.3152 (6.3517)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.0610 (5.5135)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [1]  [1800/2502]  eta: 0:06:56  lr: 0.000344  min_lr: 0.000344  loss: 6.3019 (6.3369)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.7951 (5.4437)  time: 0.5913  data: 0.0003  max mem: 53905
Epoch: [1]  [2000/2502]  eta: 0:04:57  lr: 0.000360  min_lr: 0.000360  loss: 6.2221 (6.3238)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.5891 (5.3605)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [1]  [2200/2502]  eta: 0:02:58  lr: 0.000376  min_lr: 0.000376  loss: 6.2429 (6.3119)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.4367 (5.3065)  time: 0.5913  data: 0.0003  max mem: 53905
Epoch: [1]  [2400/2502]  eta: 0:01:00  lr: 0.000392  min_lr: 0.000392  loss: 6.2514 (6.3017)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.7833 (5.2819)  time: 0.5913  data: 0.0003  max mem: 53905
Epoch: [1]  [2501/2502]  eta: 0:00:00  lr: 0.000400  min_lr: 0.000400  loss: 6.2225 (6.2943)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.9315 (5.2681)  time: 0.5313  data: 0.0005  max mem: 53905
Epoch: [1] Total time: 0:24:41 (0.5923 s / it)
Averaged stats: lr: 0.000400  min_lr: 0.000400  loss: 6.2225 (6.3007)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.9315 (5.2681)
Test:  [ 0/50]  eta: 0:02:40  loss: 4.5956 (4.5956)  acc1: 13.6000 (13.6000)  acc5: 38.0000 (38.0000)  time: 3.2025  data: 2.8873  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 4.7940 (4.7412)  acc1: 11.2000 (10.1818)  acc5: 30.0000 (29.5273)  time: 0.5487  data: 0.2628  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 4.6865 (4.6721)  acc1: 12.0000 (12.1714)  acc5: 29.2000 (30.5524)  time: 0.2835  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 4.7533 (4.7573)  acc1: 12.0000 (12.0387)  acc5: 29.2000 (29.7419)  time: 0.2838  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 4.9453 (4.8059)  acc1: 12.0000 (11.9610)  acc5: 26.8000 (29.0049)  time: 0.2835  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 4.9237 (4.7719)  acc1: 12.0000 (12.7040)  acc5: 27.6000 (30.1680)  time: 0.2833  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3443 s / it)
* Acc@1 12.152 Acc@5 29.716 loss 4.779
Accuracy of the model on the 50000 test images: 12.2%
Max accuracy: 12.15%
Epoch: [2]  [   0/2502]  eta: 1:41:01  lr: 0.000400  min_lr: 0.000400  loss: 5.8828 (5.8828)  weight_decay: 0.0500 (0.0500)  time: 2.4227  data: 1.8144  max mem: 53905
Epoch: [2]  [ 200/2502]  eta: 0:23:01  lr: 0.000416  min_lr: 0.000416  loss: 6.1893 (6.1565)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.1965 (4.3867)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [2]  [ 400/2502]  eta: 0:20:53  lr: 0.000432  min_lr: 0.000432  loss: 5.9801 (6.1453)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.4113 (4.4671)  time: 0.5900  data: 0.0004  max mem: 53905
Epoch: [2]  [ 600/2502]  eta: 0:18:50  lr: 0.000448  min_lr: 0.000448  loss: 5.9989 (6.1227)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.0023 (4.4061)  time: 0.5901  data: 0.0004  max mem: 53905
Epoch: [2]  [ 800/2502]  eta: 0:16:50  lr: 0.000464  min_lr: 0.000464  loss: 5.8092 (6.1025)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.1578 (4.4117)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [2]  [1000/2502]  eta: 0:14:50  lr: 0.000480  min_lr: 0.000480  loss: 5.9599 (6.0869)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.7165 (4.3838)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [2]  [1200/2502]  eta: 0:12:51  lr: 0.000496  min_lr: 0.000496  loss: 5.9531 (6.0738)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.1488 (4.3487)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [2]  [1400/2502]  eta: 0:10:52  lr: 0.000512  min_lr: 0.000512  loss: 6.0930 (6.0662)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.7420 (4.3003)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [2]  [1600/2502]  eta: 0:08:54  lr: 0.000528  min_lr: 0.000528  loss: 6.0125 (6.0514)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.3504 (4.2781)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [2]  [1800/2502]  eta: 0:06:55  lr: 0.000544  min_lr: 0.000544  loss: 5.8809 (6.0396)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.8026 (4.2375)  time: 0.5907  data: 0.0003  max mem: 53905
Epoch: [2]  [2000/2502]  eta: 0:04:57  lr: 0.000560  min_lr: 0.000560  loss: 5.8412 (6.0244)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.9500 (4.2216)  time: 0.5991  data: 0.0004  max mem: 53905
Epoch: [2]  [2200/2502]  eta: 0:02:58  lr: 0.000576  min_lr: 0.000576  loss: 5.8612 (6.0114)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.8482 (4.1923)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [2]  [2400/2502]  eta: 0:01:00  lr: 0.000592  min_lr: 0.000592  loss: 5.6273 (6.0040)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.7435 (4.1907)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [2]  [2501/2502]  eta: 0:00:00  lr: 0.000600  min_lr: 0.000600  loss: 5.9575 (5.9963)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.2879 (4.1647)  time: 0.5308  data: 0.0005  max mem: 53905
Epoch: [2] Total time: 0:24:40 (0.5917 s / it)
Averaged stats: lr: 0.000600  min_lr: 0.000600  loss: 5.9575 (5.9925)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.2879 (4.1647)
Test:  [ 0/50]  eta: 0:02:37  loss: 3.4996 (3.4996)  acc1: 28.4000 (28.4000)  acc5: 56.0000 (56.0000)  time: 3.1467  data: 2.8444  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 3.8862 (3.8197)  acc1: 26.4000 (22.9091)  acc5: 46.0000 (48.0000)  time: 0.5438  data: 0.2590  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 3.8720 (3.8192)  acc1: 19.6000 (23.1048)  acc5: 46.0000 (48.2286)  time: 0.2838  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 4.1300 (3.9910)  acc1: 19.2000 (22.2065)  acc5: 42.0000 (45.3419)  time: 0.2840  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 4.3758 (4.0752)  acc1: 19.2000 (21.4341)  acc5: 39.2000 (43.9610)  time: 0.2836  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 4.2038 (4.0412)  acc1: 19.2000 (21.6960)  acc5: 41.2000 (44.5600)  time: 0.2833  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3439 s / it)
* Acc@1 21.552 Acc@5 44.614 loss 4.041
Accuracy of the model on the 50000 test images: 21.6%
Max accuracy: 21.55%
Epoch: [3]  [   0/2502]  eta: 1:41:28  lr: 0.000600  min_lr: 0.000600  loss: 5.7883 (5.7883)  weight_decay: 0.0500 (0.0500)  time: 2.4335  data: 1.8425  max mem: 53905
Epoch: [3]  [ 200/2502]  eta: 0:23:05  lr: 0.000616  min_lr: 0.000616  loss: 6.0597 (5.8340)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.6089 (3.6395)  time: 0.5905  data: 0.0003  max mem: 53905
Epoch: [3]  [ 400/2502]  eta: 0:20:53  lr: 0.000632  min_lr: 0.000632  loss: 5.8253 (5.8013)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.4056 (3.7250)  time: 0.5905  data: 0.0003  max mem: 53905
Epoch: [3]  [ 600/2502]  eta: 0:18:51  lr: 0.000648  min_lr: 0.000648  loss: 5.7653 (5.7966)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.9111 (3.6946)  time: 0.5993  data: 0.0004  max mem: 53905
Epoch: [3]  [ 800/2502]  eta: 0:16:50  lr: 0.000664  min_lr: 0.000664  loss: 5.7797 (5.7939)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.2727 (3.6850)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [3]  [1000/2502]  eta: 0:14:50  lr: 0.000680  min_lr: 0.000680  loss: 5.9724 (5.7903)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.6823 (3.6697)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [3]  [1200/2502]  eta: 0:12:51  lr: 0.000696  min_lr: 0.000696  loss: 5.7257 (5.7840)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.4300 (3.6383)  time: 0.5908  data: 0.0003  max mem: 53905
Epoch: [3]  [1400/2502]  eta: 0:10:53  lr: 0.000712  min_lr: 0.000712  loss: 5.7587 (5.7783)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.1580 (3.6160)  time: 0.5904  data: 0.0003  max mem: 53905
Epoch: [3]  [1600/2502]  eta: 0:08:54  lr: 0.000728  min_lr: 0.000728  loss: 5.5743 (5.7676)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.3303 (3.6189)  time: 0.5895  data: 0.0003  max mem: 53905
Epoch: [3]  [1800/2502]  eta: 0:06:55  lr: 0.000744  min_lr: 0.000744  loss: 5.7898 (5.7535)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.8507 (3.6328)  time: 0.5898  data: 0.0003  max mem: 53905
Epoch: [3]  [2000/2502]  eta: 0:04:57  lr: 0.000760  min_lr: 0.000760  loss: 5.6511 (5.7372)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.3172 (3.6104)  time: 0.5901  data: 0.0004  max mem: 53905
Epoch: [3]  [2200/2502]  eta: 0:02:58  lr: 0.000776  min_lr: 0.000776  loss: 5.8309 (5.7253)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.3930 (3.5790)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [3]  [2400/2502]  eta: 0:01:00  lr: 0.000792  min_lr: 0.000792  loss: 5.7900 (5.7129)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.3149 (3.5498)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [3]  [2501/2502]  eta: 0:00:00  lr: 0.000800  min_lr: 0.000800  loss: 5.8021 (5.7084)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.0661 (3.5376)  time: 0.5316  data: 0.0007  max mem: 53905
Epoch: [3] Total time: 0:24:40 (0.5916 s / it)
Averaged stats: lr: 0.000800  min_lr: 0.000800  loss: 5.8021 (5.7097)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.0661 (3.5376)
Test:  [ 0/50]  eta: 0:02:55  loss: 2.7446 (2.7446)  acc1: 42.0000 (42.0000)  acc5: 74.0000 (74.0000)  time: 3.5087  data: 3.1852  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 3.0439 (3.0518)  acc1: 36.0000 (35.0182)  acc5: 63.2000 (63.7091)  time: 0.5766  data: 0.2899  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 3.0677 (3.0598)  acc1: 33.2000 (35.4095)  acc5: 63.2000 (64.0381)  time: 0.2835  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 3.3582 (3.2778)  acc1: 32.0000 (33.2000)  acc5: 54.8000 (60.1032)  time: 0.2838  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 3.8330 (3.4062)  acc1: 26.0000 (31.6000)  acc5: 49.6000 (57.4244)  time: 0.2836  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 3.7810 (3.3973)  acc1: 26.4000 (31.8080)  acc5: 49.6000 (57.2400)  time: 0.2833  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3503 s / it)
* Acc@1 31.664 Acc@5 57.060 loss 3.400
Accuracy of the model on the 50000 test images: 31.7%
Max accuracy: 31.66%
Epoch: [4]  [   0/2502]  eta: 1:40:04  lr: 0.000800  min_lr: 0.000800  loss: 6.1019 (6.1019)  weight_decay: 0.0500 (0.0500)  time: 2.3999  data: 1.8093  max mem: 53905
Epoch: [4]  [ 200/2502]  eta: 0:22:59  lr: 0.000816  min_lr: 0.000816  loss: 5.8099 (5.5740)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.2631 (3.3248)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [4]  [ 400/2502]  eta: 0:20:53  lr: 0.000832  min_lr: 0.000832  loss: 5.7335 (5.5587)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.9060 (3.2653)  time: 0.5903  data: 0.0003  max mem: 53905
Epoch: [4]  [ 600/2502]  eta: 0:18:49  lr: 0.000848  min_lr: 0.000848  loss: 5.4561 (5.5601)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.3374 (3.2517)  time: 0.5896  data: 0.0004  max mem: 53905
Epoch: [4]  [ 800/2502]  eta: 0:16:49  lr: 0.000864  min_lr: 0.000864  loss: 5.2846 (5.5475)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.6706 (3.2952)  time: 0.5899  data: 0.0003  max mem: 53905
Epoch: [4]  [1000/2502]  eta: 0:14:50  lr: 0.000880  min_lr: 0.000880  loss: 5.6816 (5.5379)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.0432 (3.2656)  time: 0.5902  data: 0.0004  max mem: 53905
Epoch: [4]  [1200/2502]  eta: 0:12:51  lr: 0.000896  min_lr: 0.000896  loss: 5.6599 (5.5344)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.0390 (3.2521)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [4]  [1400/2502]  eta: 0:10:52  lr: 0.000912  min_lr: 0.000912  loss: 5.6557 (5.5247)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.8664 (3.2116)  time: 0.6035  data: 0.0003  max mem: 53905
Epoch: [4]  [1600/2502]  eta: 0:08:54  lr: 0.000928  min_lr: 0.000928  loss: 5.4707 (5.5202)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.6363 (3.1817)  time: 0.5909  data: 0.0003  max mem: 53905
Epoch: [4]  [1800/2502]  eta: 0:06:55  lr: 0.000944  min_lr: 0.000944  loss: 5.3330 (5.5126)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.0803 (3.1716)  time: 0.5905  data: 0.0003  max mem: 53905
Epoch: [4]  [2000/2502]  eta: 0:04:57  lr: 0.000960  min_lr: 0.000960  loss: 5.0993 (5.5004)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.3885 (3.1355)  time: 0.5907  data: 0.0003  max mem: 53905
Epoch: [4]  [2200/2502]  eta: 0:02:58  lr: 0.000976  min_lr: 0.000976  loss: 5.1630 (5.4851)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.6198 (3.0954)  time: 0.5908  data: 0.0003  max mem: 53905
Epoch: [4]  [2400/2502]  eta: 0:01:00  lr: 0.000992  min_lr: 0.000992  loss: 5.5491 (5.4755)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.6177 (3.0869)  time: 0.5966  data: 0.0004  max mem: 53905
Epoch: [4]  [2501/2502]  eta: 0:00:00  lr: 0.001000  min_lr: 0.001000  loss: 5.4763 (5.4740)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.5589 (3.0700)  time: 0.5327  data: 0.0007  max mem: 53905
Epoch: [4] Total time: 0:24:40 (0.5917 s / it)
Averaged stats: lr: 0.001000  min_lr: 0.001000  loss: 5.4763 (5.4684)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.5589 (3.0700)
Test:  [ 0/50]  eta: 0:03:15  loss: 2.3174 (2.3174)  acc1: 49.2000 (49.2000)  acc5: 80.8000 (80.8000)  time: 3.9050  data: 3.6054  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 2.8074 (2.7126)  acc1: 43.6000 (41.9273)  acc5: 69.6000 (70.1818)  time: 0.6126  data: 0.3282  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 2.6713 (2.6724)  acc1: 40.4000 (42.5524)  acc5: 71.2000 (71.3714)  time: 0.2836  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 2.9506 (2.8976)  acc1: 38.0000 (40.0774)  acc5: 64.4000 (67.0581)  time: 0.2838  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 3.4372 (3.0273)  acc1: 32.0000 (38.0976)  acc5: 56.4000 (64.5073)  time: 0.2835  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 3.4000 (3.0258)  acc1: 32.4000 (38.1680)  acc5: 56.4000 (64.3280)  time: 0.2832  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3592 s / it)
* Acc@1 38.412 Acc@5 64.388 loss 3.021
Accuracy of the model on the 50000 test images: 38.4%
Max accuracy: 38.41%
Epoch: [5]  [   0/2502]  eta: 1:30:54  lr: 0.001000  min_lr: 0.001000  loss: 5.5978 (5.5978)  weight_decay: 0.0500 (0.0500)  time: 2.1801  data: 1.5784  max mem: 53905
Epoch: [5]  [ 200/2502]  eta: 0:23:01  lr: 0.001016  min_lr: 0.001016  loss: 5.6864 (5.3504)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.7118 (2.7625)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [5]  [ 400/2502]  eta: 0:20:53  lr: 0.001032  min_lr: 0.001032  loss: 5.5644 (5.3642)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.2974 (2.6397)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [5]  [ 600/2502]  eta: 0:18:51  lr: 0.001048  min_lr: 0.001048  loss: 5.5426 (5.3529)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.6562 (2.6384)  time: 0.5924  data: 0.0008  max mem: 53905
Epoch: [5]  [ 800/2502]  eta: 0:16:51  lr: 0.001064  min_lr: 0.001064  loss: 5.2037 (5.3495)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.3662 (2.6199)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [5]  [1000/2502]  eta: 0:14:52  lr: 0.001080  min_lr: 0.001080  loss: 5.4446 (5.3406)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.4128 (2.6118)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [5]  [1200/2502]  eta: 0:12:53  lr: 0.001096  min_lr: 0.001096  loss: 5.2203 (5.3343)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.3505 (2.6005)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [5]  [1400/2502]  eta: 0:10:54  lr: 0.001112  min_lr: 0.001112  loss: 5.4354 (5.3188)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.4313 (2.5868)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [5]  [1600/2502]  eta: 0:08:55  lr: 0.001128  min_lr: 0.001128  loss: 5.4570 (5.3158)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.2016 (2.5887)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [5]  [1800/2502]  eta: 0:06:56  lr: 0.001144  min_lr: 0.001144  loss: 5.3432 (5.3108)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.5448 (2.5857)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [5]  [2000/2502]  eta: 0:04:57  lr: 0.001160  min_lr: 0.001160  loss: 4.8899 (5.3026)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.1367 (2.5598)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [5]  [2200/2502]  eta: 0:02:59  lr: 0.001176  min_lr: 0.001176  loss: 5.3959 (5.2964)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.6585 (2.5564)  time: 0.5959  data: 0.0005  max mem: 53905
Epoch: [5]  [2400/2502]  eta: 0:01:00  lr: 0.001192  min_lr: 0.001192  loss: 5.1458 (5.2858)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.3024 (2.5375)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [5]  [2501/2502]  eta: 0:00:00  lr: 0.001200  min_lr: 0.001200  loss: 5.2407 (5.2829)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.0700 (2.5258)  time: 0.5327  data: 0.0008  max mem: 53905
Epoch: [5] Total time: 0:24:43 (0.5927 s / it)
Averaged stats: lr: 0.001200  min_lr: 0.001200  loss: 5.2407 (5.2694)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.0700 (2.5258)
Test:  [ 0/50]  eta: 0:02:38  loss: 1.9675 (1.9675)  acc1: 56.0000 (56.0000)  acc5: 84.4000 (84.4000)  time: 3.1798  data: 2.8521  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 2.3777 (2.4049)  acc1: 49.6000 (48.5455)  acc5: 76.8000 (77.0909)  time: 0.5579  data: 0.2711  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 2.3520 (2.3326)  acc1: 49.6000 (49.9048)  acc5: 78.0000 (78.2286)  time: 0.2895  data: 0.0068  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 2.4620 (2.5757)  acc1: 43.2000 (46.3742)  acc5: 67.2000 (73.2129)  time: 0.2834  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 3.1708 (2.7271)  acc1: 36.8000 (43.6976)  acc5: 60.4000 (70.0293)  time: 0.2832  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 3.1108 (2.7419)  acc1: 37.2000 (43.6480)  acc5: 60.8000 (69.4560)  time: 0.2829  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3462 s / it)
* Acc@1 43.624 Acc@5 69.484 loss 2.743
Accuracy of the model on the 50000 test images: 43.6%
Max accuracy: 43.62%
Epoch: [6]  [   0/2502]  eta: 1:39:29  lr: 0.001200  min_lr: 0.001200  loss: 4.5005 (4.5005)  weight_decay: 0.0500 (0.0500)  time: 2.3859  data: 1.7942  max mem: 53905
Epoch: [6]  [ 200/2502]  eta: 0:23:01  lr: 0.001216  min_lr: 0.001216  loss: 5.4390 (5.2063)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.1599 (2.3302)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [6]  [ 400/2502]  eta: 0:20:53  lr: 0.001232  min_lr: 0.001232  loss: 5.3274 (5.1691)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.2012 (2.3670)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [6]  [ 600/2502]  eta: 0:18:51  lr: 0.001248  min_lr: 0.001248  loss: 5.5248 (5.1654)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.0245 (2.2920)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [6]  [ 800/2502]  eta: 0:16:50  lr: 0.001264  min_lr: 0.001264  loss: 5.3631 (5.1666)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.0595 (2.2738)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [6]  [1000/2502]  eta: 0:14:51  lr: 0.001280  min_lr: 0.001280  loss: 5.3484 (5.1365)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8649 (2.2604)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [6]  [1200/2502]  eta: 0:12:52  lr: 0.001296  min_lr: 0.001296  loss: 5.2121 (5.1351)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.0522 (2.2541)  time: 0.5912  data: 0.0006  max mem: 53905
Epoch: [6]  [1400/2502]  eta: 0:10:53  lr: 0.001312  min_lr: 0.001312  loss: 4.7812 (5.1218)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.0956 (2.2341)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [6]  [1600/2502]  eta: 0:08:54  lr: 0.001328  min_lr: 0.001328  loss: 5.3943 (5.1161)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.1494 (2.2369)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [6]  [1800/2502]  eta: 0:06:56  lr: 0.001344  min_lr: 0.001344  loss: 5.2478 (5.1149)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8539 (2.2173)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [6]  [2000/2502]  eta: 0:04:57  lr: 0.001360  min_lr: 0.001360  loss: 5.3032 (5.1206)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9569 (2.1990)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [6]  [2200/2502]  eta: 0:02:58  lr: 0.001376  min_lr: 0.001376  loss: 5.0313 (5.1102)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8765 (2.1812)  time: 0.5904  data: 0.0005  max mem: 53905
Epoch: [6]  [2400/2502]  eta: 0:01:00  lr: 0.001392  min_lr: 0.001392  loss: 5.3318 (5.1094)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.0996 (2.1652)  time: 0.5903  data: 0.0005  max mem: 53905
Epoch: [6]  [2501/2502]  eta: 0:00:00  lr: 0.001400  min_lr: 0.001400  loss: 4.9227 (5.1064)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8977 (2.1563)  time: 0.5320  data: 0.0007  max mem: 53905
Epoch: [6] Total time: 0:24:41 (0.5921 s / it)
Averaged stats: lr: 0.001400  min_lr: 0.001400  loss: 4.9227 (5.0990)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8977 (2.1563)
Test:  [ 0/50]  eta: 0:02:30  loss: 1.6725 (1.6725)  acc1: 65.2000 (65.2000)  acc5: 86.0000 (86.0000)  time: 3.0139  data: 2.6894  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 2.2115 (2.1256)  acc1: 57.6000 (54.1455)  acc5: 79.6000 (80.1091)  time: 0.5598  data: 0.2743  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 2.0483 (2.0551)  acc1: 53.6000 (54.8000)  acc5: 82.0000 (81.4857)  time: 0.2985  data: 0.0167  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 2.2532 (2.2883)  acc1: 49.6000 (51.8710)  acc5: 73.2000 (77.1871)  time: 0.2826  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 2.9207 (2.4424)  acc1: 43.2000 (49.2390)  acc5: 66.4000 (74.4781)  time: 0.2823  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 2.9159 (2.4644)  acc1: 42.0000 (48.7680)  acc5: 66.4000 (74.1120)  time: 0.2820  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3460 s / it)
* Acc@1 48.530 Acc@5 74.184 loss 2.463
Accuracy of the model on the 50000 test images: 48.5%
Max accuracy: 48.53%
Epoch: [7]  [   0/2502]  eta: 1:50:13  lr: 0.001400  min_lr: 0.001400  loss: 5.7226 (5.7226)  weight_decay: 0.0500 (0.0500)  time: 2.6432  data: 2.0365  max mem: 53905
Epoch: [7]  [ 200/2502]  eta: 0:23:06  lr: 0.001416  min_lr: 0.001416  loss: 5.2530 (4.9537)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9981 (2.0007)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [7]  [ 400/2502]  eta: 0:20:54  lr: 0.001432  min_lr: 0.001432  loss: 4.8411 (4.9609)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9599 (2.0261)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [7]  [ 600/2502]  eta: 0:18:52  lr: 0.001448  min_lr: 0.001448  loss: 4.7066 (4.9679)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8251 (2.0005)  time: 0.5915  data: 0.0006  max mem: 53905
Epoch: [7]  [ 800/2502]  eta: 0:16:52  lr: 0.001464  min_lr: 0.001464  loss: 5.0833 (4.9891)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.1556 (1.9956)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [7]  [1000/2502]  eta: 0:14:52  lr: 0.001480  min_lr: 0.001480  loss: 4.8799 (4.9794)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7207 (1.9669)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [7]  [1200/2502]  eta: 0:12:53  lr: 0.001496  min_lr: 0.001496  loss: 4.6664 (4.9702)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8525 (1.9706)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [7]  [1400/2502]  eta: 0:10:54  lr: 0.001512  min_lr: 0.001512  loss: 5.1052 (4.9772)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7163 (1.9376)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [7]  [1600/2502]  eta: 0:08:55  lr: 0.001528  min_lr: 0.001528  loss: 5.0918 (4.9668)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7055 (1.9352)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [7]  [1800/2502]  eta: 0:06:56  lr: 0.001544  min_lr: 0.001544  loss: 4.8111 (4.9627)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6639 (1.9310)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [7]  [2000/2502]  eta: 0:04:57  lr: 0.001560  min_lr: 0.001560  loss: 5.0107 (4.9541)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9776 (1.9190)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [7]  [2200/2502]  eta: 0:02:59  lr: 0.001576  min_lr: 0.001576  loss: 5.1009 (4.9634)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6308 (1.9090)  time: 0.5970  data: 0.0005  max mem: 53905
Epoch: [7]  [2400/2502]  eta: 0:01:00  lr: 0.001592  min_lr: 0.001592  loss: 4.8877 (4.9593)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7526 (1.8996)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [7]  [2501/2502]  eta: 0:00:00  lr: 0.001600  min_lr: 0.001600  loss: 5.2173 (4.9552)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8178 (1.8978)  time: 0.5321  data: 0.0006  max mem: 53905
Epoch: [7] Total time: 0:24:42 (0.5926 s / it)
Averaged stats: lr: 0.001600  min_lr: 0.001600  loss: 5.2173 (4.9582)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8178 (1.8978)
Test:  [ 0/50]  eta: 0:02:31  loss: 1.6151 (1.6151)  acc1: 70.4000 (70.4000)  acc5: 88.8000 (88.8000)  time: 3.0254  data: 2.7095  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.9289 (1.9494)  acc1: 61.6000 (59.6000)  acc5: 84.8000 (83.3818)  time: 0.5547  data: 0.2674  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.8797 (1.8897)  acc1: 56.8000 (59.9429)  acc5: 84.8000 (84.0381)  time: 0.3020  data: 0.0118  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.9563 (2.1224)  acc1: 53.6000 (56.1032)  acc5: 77.6000 (80.0387)  time: 0.2918  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 2.6631 (2.2692)  acc1: 44.8000 (53.0927)  acc5: 70.0000 (77.3756)  time: 0.2850  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 2.6628 (2.2917)  acc1: 46.0000 (52.5600)  acc5: 70.8000 (76.9440)  time: 0.2827  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3482 s / it)
* Acc@1 52.364 Acc@5 77.302 loss 2.291
Accuracy of the model on the 50000 test images: 52.4%
Max accuracy: 52.36%
Epoch: [8]  [   0/2502]  eta: 1:42:48  lr: 0.001600  min_lr: 0.001600  loss: 5.6053 (5.6053)  weight_decay: 0.0500 (0.0500)  time: 2.4652  data: 1.8561  max mem: 53905
Epoch: [8]  [ 200/2502]  eta: 0:23:02  lr: 0.001616  min_lr: 0.001616  loss: 4.7826 (4.9271)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8838 (1.7863)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [8]  [ 400/2502]  eta: 0:20:55  lr: 0.001632  min_lr: 0.001632  loss: 4.8267 (4.8802)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6672 (1.7338)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [8]  [ 600/2502]  eta: 0:18:51  lr: 0.001648  min_lr: 0.001648  loss: 4.9328 (4.8708)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7092 (1.7179)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [8]  [ 800/2502]  eta: 0:16:52  lr: 0.001664  min_lr: 0.001664  loss: 5.0651 (4.8958)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5184 (1.6821)  time: 0.6052  data: 0.0004  max mem: 53905
Epoch: [8]  [1000/2502]  eta: 0:14:52  lr: 0.001680  min_lr: 0.001680  loss: 4.9537 (4.8839)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6921 (1.6821)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [8]  [1200/2502]  eta: 0:12:53  lr: 0.001696  min_lr: 0.001696  loss: 4.8013 (4.8811)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5879 (1.6857)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [8]  [1400/2502]  eta: 0:10:54  lr: 0.001712  min_lr: 0.001712  loss: 5.0997 (4.8770)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5286 (1.6782)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [8]  [1600/2502]  eta: 0:08:55  lr: 0.001728  min_lr: 0.001728  loss: 5.0076 (4.8727)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4442 (1.6568)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [8]  [1800/2502]  eta: 0:06:56  lr: 0.001744  min_lr: 0.001744  loss: 5.0554 (4.8643)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4236 (1.6482)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [8]  [2000/2502]  eta: 0:04:57  lr: 0.001760  min_lr: 0.001760  loss: 4.8379 (4.8556)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3754 (1.6367)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [8]  [2200/2502]  eta: 0:02:59  lr: 0.001776  min_lr: 0.001776  loss: 4.6344 (4.8582)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4784 (1.6181)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [8]  [2400/2502]  eta: 0:01:00  lr: 0.001792  min_lr: 0.001792  loss: 4.6525 (4.8525)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6048 (1.6156)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [8]  [2501/2502]  eta: 0:00:00  lr: 0.001800  min_lr: 0.001800  loss: 4.8518 (4.8515)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4337 (1.6106)  time: 0.5407  data: 0.0007  max mem: 53905
Epoch: [8] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.001800  min_lr: 0.001800  loss: 4.8518 (4.8516)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4337 (1.6106)
Test:  [ 0/50]  eta: 0:02:38  loss: 1.4085 (1.4085)  acc1: 75.6000 (75.6000)  acc5: 91.6000 (91.6000)  time: 3.1734  data: 2.8543  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 1.9164 (1.8288)  acc1: 62.8000 (61.7455)  acc5: 85.2000 (86.5455)  time: 0.5456  data: 0.2600  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.8670 (1.8183)  acc1: 59.2000 (62.0000)  acc5: 86.8000 (87.0857)  time: 0.2829  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.9568 (2.0371)  acc1: 52.8000 (58.0516)  acc5: 82.0000 (82.7742)  time: 0.2831  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 2.5952 (2.1843)  acc1: 47.6000 (55.2195)  acc5: 71.2000 (79.9317)  time: 0.2829  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 2.5826 (2.2026)  acc1: 47.6000 (54.7120)  acc5: 71.6000 (79.3280)  time: 0.2827  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3437 s / it)
* Acc@1 54.640 Acc@5 79.176 loss 2.202
Accuracy of the model on the 50000 test images: 54.6%
Max accuracy: 54.64%
Epoch: [9]  [   0/2502]  eta: 1:45:34  lr: 0.001800  min_lr: 0.001800  loss: 5.0951 (5.0951)  weight_decay: 0.0500 (0.0500)  time: 2.5319  data: 1.9357  max mem: 53905
Epoch: [9]  [ 200/2502]  eta: 0:23:02  lr: 0.001816  min_lr: 0.001816  loss: 4.7755 (4.7950)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3643 (1.4420)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [9]  [ 400/2502]  eta: 0:20:53  lr: 0.001832  min_lr: 0.001832  loss: 4.7205 (4.7871)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4604 (1.4509)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [9]  [ 600/2502]  eta: 0:18:52  lr: 0.001848  min_lr: 0.001848  loss: 5.0076 (4.7838)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3070 (1.4513)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [9]  [ 800/2502]  eta: 0:16:51  lr: 0.001864  min_lr: 0.001864  loss: 4.7205 (4.7816)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3822 (1.4781)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [9]  [1000/2502]  eta: 0:14:51  lr: 0.001880  min_lr: 0.001880  loss: 4.9822 (4.7864)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1542 (1.4467)  time: 0.5915  data: 0.0006  max mem: 53905
Epoch: [9]  [1200/2502]  eta: 0:12:53  lr: 0.001896  min_lr: 0.001896  loss: 4.5908 (4.7764)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4779 (1.4387)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [9]  [1400/2502]  eta: 0:10:53  lr: 0.001912  min_lr: 0.001912  loss: 4.6738 (4.7764)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2728 (1.4339)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [9]  [1600/2502]  eta: 0:08:55  lr: 0.001928  min_lr: 0.001928  loss: 4.7081 (4.7708)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3384 (1.4263)  time: 0.5951  data: 0.0005  max mem: 53905
Epoch: [9]  [1800/2502]  eta: 0:06:56  lr: 0.001944  min_lr: 0.001944  loss: 4.7304 (4.7729)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4006 (1.4185)  time: 0.5905  data: 0.0005  max mem: 53905
Epoch: [9]  [2000/2502]  eta: 0:04:57  lr: 0.001960  min_lr: 0.001960  loss: 4.7373 (4.7664)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2567 (1.4087)  time: 0.5902  data: 0.0004  max mem: 53905
Epoch: [9]  [2200/2502]  eta: 0:02:59  lr: 0.001976  min_lr: 0.001976  loss: 4.7791 (4.7605)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2150 (1.3970)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [9]  [2400/2502]  eta: 0:01:00  lr: 0.001992  min_lr: 0.001992  loss: 4.5090 (4.7538)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2567 (1.3877)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [9]  [2501/2502]  eta: 0:00:00  lr: 0.002000  min_lr: 0.002000  loss: 5.0425 (4.7551)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3668 (inf)  time: 0.5329  data: 0.0009  max mem: 53905
Epoch: [9] Total time: 0:24:42 (0.5926 s / it)
Averaged stats: lr: 0.002000  min_lr: 0.002000  loss: 5.0425 (4.7540)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3668 (inf)
Test:  [ 0/50]  eta: 0:02:54  loss: 1.4347 (1.4347)  acc1: 76.0000 (76.0000)  acc5: 93.6000 (93.6000)  time: 3.4825  data: 3.1733  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.8840 (1.8440)  acc1: 66.4000 (62.9091)  acc5: 87.6000 (87.3091)  time: 0.5854  data: 0.3007  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.8007 (1.7925)  acc1: 60.0000 (62.8762)  acc5: 88.4000 (87.8476)  time: 0.2896  data: 0.0070  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.9285 (2.0094)  acc1: 56.0000 (59.1097)  acc5: 78.0000 (83.8452)  time: 0.2833  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 2.4815 (2.1470)  acc1: 49.6000 (56.6537)  acc5: 73.6000 (81.2293)  time: 0.2830  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 2.4551 (2.1809)  acc1: 50.8000 (56.2240)  acc5: 74.4000 (80.7040)  time: 0.2827  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3517 s / it)
* Acc@1 56.096 Acc@5 80.636 loss 2.184
Accuracy of the model on the 50000 test images: 56.1%
Max accuracy: 56.10%
Epoch: [10]  [   0/2502]  eta: 1:50:18  lr: 0.002000  min_lr: 0.002000  loss: 4.8163 (4.8163)  weight_decay: 0.0500 (0.0500)  time: 2.6451  data: 2.0373  max mem: 53905
Epoch: [10]  [ 200/2502]  eta: 0:23:10  lr: 0.002016  min_lr: 0.002016  loss: 4.8780 (4.7247)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2480 (1.2817)  time: 0.5992  data: 0.0004  max mem: 53905
Epoch: [10]  [ 400/2502]  eta: 0:20:56  lr: 0.002032  min_lr: 0.002032  loss: 4.6750 (4.7293)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1943 (1.3012)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [10]  [ 600/2502]  eta: 0:18:53  lr: 0.002048  min_lr: 0.002048  loss: 4.8555 (4.7104)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2909 (1.3085)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [10]  [ 800/2502]  eta: 0:16:52  lr: 0.002064  min_lr: 0.002064  loss: 4.6536 (4.6963)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2815 (1.3144)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [10]  [1000/2502]  eta: 0:14:52  lr: 0.002080  min_lr: 0.002080  loss: 4.7442 (4.6856)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1410 (1.2968)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [10]  [1200/2502]  eta: 0:12:53  lr: 0.002096  min_lr: 0.002096  loss: 4.9660 (4.6757)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1466 (1.2937)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [10]  [1400/2502]  eta: 0:10:54  lr: 0.002112  min_lr: 0.002112  loss: 4.5203 (4.6655)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1164 (1.2743)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [10]  [1600/2502]  eta: 0:08:55  lr: 0.002128  min_lr: 0.002128  loss: 4.9405 (4.6730)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0603 (1.2641)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [10]  [1800/2502]  eta: 0:06:56  lr: 0.002144  min_lr: 0.002144  loss: 5.0286 (4.6765)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0802 (1.2503)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [10]  [2000/2502]  eta: 0:04:57  lr: 0.002160  min_lr: 0.002160  loss: 4.8506 (4.6803)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1752 (1.2464)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [10]  [2200/2502]  eta: 0:02:59  lr: 0.002176  min_lr: 0.002176  loss: 4.7732 (4.6783)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0934 (1.2358)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [10]  [2400/2502]  eta: 0:01:00  lr: 0.002192  min_lr: 0.002192  loss: 4.6070 (4.6706)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1480 (1.2278)  time: 0.6032  data: 0.0005  max mem: 53905
Epoch: [10]  [2501/2502]  eta: 0:00:00  lr: 0.002200  min_lr: 0.002200  loss: 4.6805 (4.6690)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0531 (1.2237)  time: 0.5321  data: 0.0006  max mem: 53905
Epoch: [10] Total time: 0:24:42 (0.5926 s / it)
Averaged stats: lr: 0.002200  min_lr: 0.002200  loss: 4.6805 (4.6568)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0531 (1.2237)
Test:  [ 0/50]  eta: 0:02:40  loss: 1.4062 (1.4062)  acc1: 72.0000 (72.0000)  acc5: 89.6000 (89.6000)  time: 3.2130  data: 2.9065  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 1.5695 (1.5975)  acc1: 68.8000 (65.3818)  acc5: 88.0000 (88.3273)  time: 0.5494  data: 0.2647  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.5254 (1.5940)  acc1: 64.0000 (65.9429)  acc5: 88.4000 (88.7238)  time: 0.2832  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.8778 (1.8069)  acc1: 60.4000 (62.0129)  acc5: 81.6000 (85.2258)  time: 0.2833  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 2.3076 (1.9368)  acc1: 51.6000 (59.4049)  acc5: 76.0000 (82.9951)  time: 0.2830  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 2.3076 (1.9705)  acc1: 51.6000 (58.5920)  acc5: 76.0000 (82.4160)  time: 0.2827  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3451 s / it)
* Acc@1 58.240 Acc@5 82.164 loss 1.976
Accuracy of the model on the 50000 test images: 58.2%
Max accuracy: 58.24%
Epoch: [11]  [   0/2502]  eta: 1:31:58  lr: 0.002200  min_lr: 0.002200  loss: 5.3128 (5.3128)  weight_decay: 0.0500 (0.0500)  time: 2.2055  data: 1.5997  max mem: 53905
Epoch: [11]  [ 200/2502]  eta: 0:23:00  lr: 0.002216  min_lr: 0.002216  loss: 4.3327 (4.4975)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1003 (1.1294)  time: 0.5910  data: 0.0008  max mem: 53905
Epoch: [11]  [ 400/2502]  eta: 0:20:51  lr: 0.002232  min_lr: 0.002232  loss: 4.5325 (4.5491)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0598 (1.1127)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [11]  [ 600/2502]  eta: 0:18:52  lr: 0.002248  min_lr: 0.002248  loss: 4.7227 (4.5459)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1358 (1.1069)  time: 0.5927  data: 0.0006  max mem: 53905
Epoch: [11]  [ 800/2502]  eta: 0:16:51  lr: 0.002264  min_lr: 0.002264  loss: 4.8587 (4.5753)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0980 (1.1230)  time: 0.5911  data: 0.0008  max mem: 53905
Epoch: [11]  [1000/2502]  eta: 0:14:52  lr: 0.002280  min_lr: 0.002280  loss: 4.7748 (4.5705)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9995 (1.1132)  time: 0.6045  data: 0.0007  max mem: 53905
Epoch: [11]  [1200/2502]  eta: 0:12:53  lr: 0.002296  min_lr: 0.002296  loss: 4.9544 (4.5840)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9502 (1.0991)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [11]  [1400/2502]  eta: 0:10:54  lr: 0.002312  min_lr: 0.002312  loss: 4.4346 (4.5814)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9194 (nan)  time: 0.5967  data: 0.0008  max mem: 53905
Epoch: [11]  [1600/2502]  eta: 0:08:55  lr: 0.002328  min_lr: 0.002328  loss: 4.8920 (4.5841)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0112 (nan)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [11]  [1800/2502]  eta: 0:06:56  lr: 0.002344  min_lr: 0.002344  loss: 4.7814 (4.5804)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0662 (nan)  time: 0.5907  data: 0.0006  max mem: 53905
Epoch: [11]  [2000/2502]  eta: 0:04:57  lr: 0.002360  min_lr: 0.002360  loss: 4.2784 (4.5741)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9744 (nan)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [11]  [2200/2502]  eta: 0:02:59  lr: 0.002376  min_lr: 0.002376  loss: 4.5157 (4.5762)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0154 (nan)  time: 0.5908  data: 0.0006  max mem: 53905
Epoch: [11]  [2400/2502]  eta: 0:01:00  lr: 0.002392  min_lr: 0.002392  loss: 4.5316 (4.5746)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9773 (nan)  time: 0.5906  data: 0.0006  max mem: 53905
Epoch: [11]  [2501/2502]  eta: 0:00:00  lr: 0.002400  min_lr: 0.002400  loss: 4.8207 (4.5745)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8947 (nan)  time: 0.5327  data: 0.0008  max mem: 53905
Epoch: [11] Total time: 0:24:42 (0.5926 s / it)
Averaged stats: lr: 0.002400  min_lr: 0.002400  loss: 4.8207 (4.5888)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8947 (nan)
Test:  [ 0/50]  eta: 0:03:05  loss: 1.3872 (1.3872)  acc1: 77.6000 (77.6000)  acc5: 91.6000 (91.6000)  time: 3.7188  data: 3.3825  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.5729 (1.5924)  acc1: 70.0000 (65.6727)  acc5: 89.6000 (88.8727)  time: 0.5951  data: 0.3080  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.4629 (1.5578)  acc1: 68.4000 (66.5714)  acc5: 90.0000 (89.6000)  time: 0.2829  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.7215 (1.7536)  acc1: 60.4000 (62.8258)  acc5: 84.4000 (86.3097)  time: 0.2831  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 2.2614 (1.8852)  acc1: 52.0000 (60.0976)  acc5: 77.2000 (83.9902)  time: 0.2828  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 2.2535 (1.9173)  acc1: 52.8000 (59.4720)  acc5: 78.4000 (83.5120)  time: 0.2825  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3543 s / it)
* Acc@1 59.450 Acc@5 83.122 loss 1.918
Accuracy of the model on the 50000 test images: 59.5%
Max accuracy: 59.45%
Epoch: [12]  [   0/2502]  eta: 1:41:15  lr: 0.002400  min_lr: 0.002400  loss: 5.3509 (5.3509)  weight_decay: 0.0500 (0.0500)  time: 2.4281  data: 1.8199  max mem: 53905
Epoch: [12]  [ 200/2502]  eta: 0:23:06  lr: 0.002416  min_lr: 0.002416  loss: 4.6623 (4.6385)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0048 (nan)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [12]  [ 400/2502]  eta: 0:20:54  lr: 0.002432  min_lr: 0.002432  loss: 4.5509 (4.6215)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8986 (nan)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [12]  [ 600/2502]  eta: 0:18:51  lr: 0.002448  min_lr: 0.002448  loss: 4.5812 (4.6100)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9910 (nan)  time: 0.5913  data: 0.0006  max mem: 53905
Epoch: [12]  [ 800/2502]  eta: 0:16:52  lr: 0.002464  min_lr: 0.002464  loss: 4.8034 (4.6066)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8742 (nan)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [12]  [1000/2502]  eta: 0:14:52  lr: 0.002480  min_lr: 0.002480  loss: 4.5340 (4.5960)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9051 (nan)  time: 0.5915  data: 0.0006  max mem: 53905
Epoch: [12]  [1200/2502]  eta: 0:12:52  lr: 0.002496  min_lr: 0.002496  loss: 4.7105 (4.5949)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9232 (nan)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [12]  [1400/2502]  eta: 0:10:54  lr: 0.002512  min_lr: 0.002512  loss: 4.7033 (4.5928)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8920 (nan)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [12]  [1600/2502]  eta: 0:08:55  lr: 0.002528  min_lr: 0.002528  loss: 4.5594 (4.5886)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8412 (nan)  time: 0.5929  data: 0.0008  max mem: 53905
Epoch: [12]  [1800/2502]  eta: 0:06:56  lr: 0.002544  min_lr: 0.002544  loss: 4.4078 (4.5864)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8871 (nan)  time: 0.5988  data: 0.0007  max mem: 53905
Epoch: [12]  [2000/2502]  eta: 0:04:57  lr: 0.002560  min_lr: 0.002560  loss: 4.4457 (4.5764)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8351 (nan)  time: 0.5920  data: 0.0008  max mem: 53905
Epoch: [12]  [2200/2502]  eta: 0:02:59  lr: 0.002576  min_lr: 0.002576  loss: 4.5888 (4.5662)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8316 (nan)  time: 0.6019  data: 0.0011  max mem: 53905
Epoch: [12]  [2400/2502]  eta: 0:01:00  lr: 0.002592  min_lr: 0.002592  loss: 4.7249 (4.5596)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8427 (nan)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [12]  [2501/2502]  eta: 0:00:00  lr: 0.002600  min_lr: 0.002600  loss: 4.7028 (4.5602)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8403 (nan)  time: 0.5330  data: 0.0008  max mem: 53905
Epoch: [12] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.002600  min_lr: 0.002600  loss: 4.7028 (4.5358)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8403 (nan)
Test:  [ 0/50]  eta: 0:03:17  loss: 1.3186 (1.3186)  acc1: 78.0000 (78.0000)  acc5: 93.2000 (93.2000)  time: 3.9403  data: 3.6239  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 1.4939 (1.5386)  acc1: 70.8000 (67.0182)  acc5: 92.4000 (90.5091)  time: 0.6137  data: 0.3299  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.4960 (1.5407)  acc1: 66.8000 (67.2762)  acc5: 91.2000 (90.6286)  time: 0.2811  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.6998 (1.7542)  acc1: 59.2000 (63.4194)  acc5: 83.6000 (87.1097)  time: 0.2813  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 2.3529 (1.9009)  acc1: 51.6000 (60.6146)  acc5: 77.6000 (84.7707)  time: 0.2810  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 2.3473 (1.9338)  acc1: 51.6000 (59.9840)  acc5: 77.6000 (84.1520)  time: 0.2808  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3572 s / it)
* Acc@1 60.124 Acc@5 83.578 loss 1.938
Accuracy of the model on the 50000 test images: 60.1%
Max accuracy: 60.12%
Epoch: [13]  [   0/2502]  eta: 1:40:50  lr: 0.002600  min_lr: 0.002600  loss: 4.8195 (4.8195)  weight_decay: 0.0500 (0.0500)  time: 2.4184  data: 1.8134  max mem: 53905
Epoch: [13]  [ 200/2502]  eta: 0:23:01  lr: 0.002616  min_lr: 0.002616  loss: 4.4762 (4.5579)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8141 (0.8413)  time: 0.5906  data: 0.0006  max mem: 53905
Epoch: [13]  [ 400/2502]  eta: 0:20:54  lr: 0.002632  min_lr: 0.002632  loss: 4.8785 (4.5224)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7737 (0.8290)  time: 0.6006  data: 0.0006  max mem: 53905
Epoch: [13]  [ 600/2502]  eta: 0:18:52  lr: 0.002648  min_lr: 0.002648  loss: 4.6172 (4.5221)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7487 (0.8041)  time: 0.5905  data: 0.0006  max mem: 53905
Epoch: [13]  [ 800/2502]  eta: 0:16:51  lr: 0.002664  min_lr: 0.002664  loss: 4.3430 (4.4999)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8344 (0.8101)  time: 0.5966  data: 0.0006  max mem: 53905
Epoch: [13]  [1000/2502]  eta: 0:14:52  lr: 0.002680  min_lr: 0.002680  loss: 4.5011 (4.5067)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7590 (0.8086)  time: 0.5907  data: 0.0006  max mem: 53905
Epoch: [13]  [1200/2502]  eta: 0:12:52  lr: 0.002696  min_lr: 0.002696  loss: 4.4018 (4.4956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7796 (0.8102)  time: 0.5900  data: 0.0005  max mem: 53905
Epoch: [13]  [1400/2502]  eta: 0:10:53  lr: 0.002712  min_lr: 0.002712  loss: 4.5400 (4.4880)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7532 (0.8059)  time: 0.5903  data: 0.0007  max mem: 53905
Epoch: [13]  [1600/2502]  eta: 0:08:54  lr: 0.002728  min_lr: 0.002728  loss: 4.3578 (4.4801)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7539 (0.8052)  time: 0.5905  data: 0.0006  max mem: 53905
Epoch: [13]  [1800/2502]  eta: 0:06:56  lr: 0.002744  min_lr: 0.002744  loss: 4.3622 (4.4759)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7515 (0.8030)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [13]  [2000/2502]  eta: 0:04:57  lr: 0.002760  min_lr: 0.002760  loss: 4.4256 (4.4698)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7047 (0.7978)  time: 0.5910  data: 0.0007  max mem: 53905
Epoch: [13]  [2200/2502]  eta: 0:02:59  lr: 0.002776  min_lr: 0.002776  loss: 4.4825 (4.4633)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7743 (0.7969)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [13]  [2400/2502]  eta: 0:01:00  lr: 0.002792  min_lr: 0.002792  loss: 4.7488 (4.4594)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7958 (0.7974)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [13]  [2501/2502]  eta: 0:00:00  lr: 0.002800  min_lr: 0.002800  loss: 4.5490 (4.4588)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7662 (0.7968)  time: 0.5381  data: 0.0007  max mem: 53905
Epoch: [13] Total time: 0:24:42 (0.5925 s / it)
Averaged stats: lr: 0.002800  min_lr: 0.002800  loss: 4.5490 (4.4686)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7662 (0.7968)
Test:  [ 0/50]  eta: 0:02:43  loss: 1.1319 (1.1319)  acc1: 80.0000 (80.0000)  acc5: 93.6000 (93.6000)  time: 3.2637  data: 2.9337  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.4061 (1.4434)  acc1: 73.2000 (68.9818)  acc5: 92.0000 (90.6909)  time: 0.5537  data: 0.2671  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.3486 (1.4181)  acc1: 69.2000 (68.9714)  acc5: 92.0000 (91.0286)  time: 0.2830  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.6143 (1.6415)  acc1: 62.4000 (65.4065)  acc5: 84.4000 (87.5742)  time: 0.2832  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 2.1573 (1.7771)  acc1: 55.2000 (62.5073)  acc5: 78.8000 (85.4927)  time: 0.2828  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 2.1285 (1.8066)  acc1: 55.2000 (61.9520)  acc5: 80.4000 (84.8400)  time: 0.2827  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3453 s / it)
* Acc@1 61.700 Acc@5 84.614 loss 1.816
Accuracy of the model on the 50000 test images: 61.7%
Max accuracy: 61.70%
Epoch: [14]  [   0/2502]  eta: 1:40:59  lr: 0.002800  min_lr: 0.002800  loss: 3.8094 (3.8094)  weight_decay: 0.0500 (0.0500)  time: 2.4217  data: 1.8159  max mem: 53905
Epoch: [14]  [ 200/2502]  eta: 0:23:06  lr: 0.002816  min_lr: 0.002816  loss: 4.6349 (4.4857)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7814 (0.7890)  time: 0.5908  data: 0.0007  max mem: 53905
Epoch: [14]  [ 400/2502]  eta: 0:20:55  lr: 0.002832  min_lr: 0.002832  loss: 4.3775 (4.4439)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7776 (0.7844)  time: 0.5925  data: 0.0006  max mem: 53905
Epoch: [14]  [ 600/2502]  eta: 0:18:52  lr: 0.002848  min_lr: 0.002848  loss: 4.5990 (4.4581)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7498 (0.7763)  time: 0.5899  data: 0.0004  max mem: 53905
Epoch: [14]  [ 800/2502]  eta: 0:16:52  lr: 0.002864  min_lr: 0.002864  loss: 4.6567 (4.4569)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7164 (0.7755)  time: 0.5913  data: 0.0006  max mem: 53905
Epoch: [14]  [1000/2502]  eta: 0:14:52  lr: 0.002880  min_lr: 0.002880  loss: 4.6554 (4.4489)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7168 (0.7672)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [14]  [1200/2502]  eta: 0:12:53  lr: 0.002896  min_lr: 0.002896  loss: 4.5707 (4.4353)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6913 (0.7592)  time: 0.5986  data: 0.0006  max mem: 53905
Epoch: [14]  [1400/2502]  eta: 0:10:54  lr: 0.002912  min_lr: 0.002912  loss: 4.5380 (4.4305)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6876 (0.7470)  time: 0.5909  data: 0.0007  max mem: 53905
Epoch: [14]  [1600/2502]  eta: 0:08:55  lr: 0.002928  min_lr: 0.002928  loss: 4.7059 (4.4272)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6428 (0.7396)  time: 0.5908  data: 0.0007  max mem: 53905
Epoch: [14]  [1800/2502]  eta: 0:06:56  lr: 0.002944  min_lr: 0.002944  loss: 4.6406 (4.4256)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6562 (0.7370)  time: 0.5901  data: 0.0006  max mem: 53905
Epoch: [14]  [2000/2502]  eta: 0:04:57  lr: 0.002960  min_lr: 0.002960  loss: 4.4385 (4.4222)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7066 (0.7366)  time: 0.5904  data: 0.0006  max mem: 53905
Epoch: [14]  [2200/2502]  eta: 0:02:59  lr: 0.002976  min_lr: 0.002976  loss: 4.5884 (4.4164)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6937 (0.7357)  time: 0.5908  data: 0.0006  max mem: 53905
Epoch: [14]  [2400/2502]  eta: 0:01:00  lr: 0.002992  min_lr: 0.002992  loss: 4.3494 (4.4140)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7060 (0.7315)  time: 0.5907  data: 0.0006  max mem: 53905
Epoch: [14]  [2501/2502]  eta: 0:00:00  lr: 0.003000  min_lr: 0.003000  loss: 4.6905 (4.4125)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6462 (0.7294)  time: 0.5320  data: 0.0008  max mem: 53905
Epoch: [14] Total time: 0:24:42 (0.5927 s / it)
Averaged stats: lr: 0.003000  min_lr: 0.003000  loss: 4.6905 (4.4088)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6462 (0.7294)
Test:  [ 0/50]  eta: 0:02:39  loss: 1.1385 (1.1385)  acc1: 79.2000 (79.2000)  acc5: 93.6000 (93.6000)  time: 3.1844  data: 2.8613  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.4413 (1.3995)  acc1: 74.0000 (71.2727)  acc5: 92.0000 (91.2727)  time: 0.5505  data: 0.2644  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.3694 (1.4080)  acc1: 69.2000 (69.9619)  acc5: 90.8000 (91.0857)  time: 0.2851  data: 0.0027  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.5853 (1.6161)  acc1: 61.6000 (66.0387)  acc5: 84.8000 (87.7677)  time: 0.2832  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 2.1514 (1.7458)  acc1: 56.0000 (63.6195)  acc5: 78.8000 (85.6488)  time: 0.2828  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 2.1424 (1.7747)  acc1: 57.2000 (62.9440)  acc5: 79.2000 (85.1840)  time: 0.2825  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3449 s / it)
* Acc@1 62.594 Acc@5 85.382 loss 1.779
Accuracy of the model on the 50000 test images: 62.6%
Max accuracy: 62.59%
Epoch: [15]  [   0/2502]  eta: 1:54:55  lr: 0.003000  min_lr: 0.003000  loss: 4.3744 (4.3744)  weight_decay: 0.0500 (0.0500)  time: 2.7561  data: 2.1507  max mem: 53905
Epoch: [15]  [ 200/2502]  eta: 0:23:04  lr: 0.003016  min_lr: 0.003016  loss: 4.3330 (4.2984)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6645 (0.6895)  time: 0.5904  data: 0.0005  max mem: 53905
Epoch: [15]  [ 400/2502]  eta: 0:20:55  lr: 0.003032  min_lr: 0.003032  loss: 4.6756 (4.3250)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6792 (0.6868)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [15]  [ 600/2502]  eta: 0:18:52  lr: 0.003048  min_lr: 0.003048  loss: 4.4655 (4.3517)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5882 (0.6724)  time: 0.5906  data: 0.0006  max mem: 53905
Epoch: [15]  [ 800/2502]  eta: 0:16:51  lr: 0.003064  min_lr: 0.003064  loss: 4.4875 (4.3509)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6342 (0.6685)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [15]  [1000/2502]  eta: 0:14:51  lr: 0.003080  min_lr: 0.003080  loss: 4.6989 (4.3483)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6330 (0.6686)  time: 0.5906  data: 0.0007  max mem: 53905
Epoch: [15]  [1200/2502]  eta: 0:12:52  lr: 0.003096  min_lr: 0.003096  loss: 3.8818 (4.3409)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6473 (0.6625)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [15]  [1400/2502]  eta: 0:10:53  lr: 0.003112  min_lr: 0.003112  loss: 4.6270 (4.3514)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5935 (0.6600)  time: 0.5902  data: 0.0005  max mem: 53905
Epoch: [15]  [1600/2502]  eta: 0:08:54  lr: 0.003128  min_lr: 0.003128  loss: 4.6086 (4.3579)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5931 (0.6568)  time: 0.5972  data: 0.0006  max mem: 53905
Epoch: [15]  [1800/2502]  eta: 0:06:56  lr: 0.003144  min_lr: 0.003144  loss: 4.0840 (4.3501)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6361 (0.6532)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [15]  [2000/2502]  eta: 0:04:57  lr: 0.003160  min_lr: 0.003160  loss: 4.3270 (4.3475)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5492 (0.6495)  time: 0.5952  data: 0.0007  max mem: 53905
Epoch: [15]  [2200/2502]  eta: 0:02:58  lr: 0.003176  min_lr: 0.003176  loss: 4.5326 (4.3470)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5545 (0.6466)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [15]  [2400/2502]  eta: 0:01:00  lr: 0.003192  min_lr: 0.003192  loss: 4.0836 (4.3471)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6118 (0.6456)  time: 0.5909  data: 0.0006  max mem: 53905
Epoch: [15]  [2501/2502]  eta: 0:00:00  lr: 0.003200  min_lr: 0.003200  loss: 4.3466 (4.3469)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6216 (0.6453)  time: 0.5319  data: 0.0006  max mem: 53905
Epoch: [15] Total time: 0:24:41 (0.5921 s / it)
Averaged stats: lr: 0.003200  min_lr: 0.003200  loss: 4.3466 (4.3436)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6216 (0.6453)
Test:  [ 0/50]  eta: 0:02:34  loss: 1.0593 (1.0593)  acc1: 82.4000 (82.4000)  acc5: 94.0000 (94.0000)  time: 3.0812  data: 2.7477  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 1.5033 (1.4535)  acc1: 71.2000 (69.6727)  acc5: 92.4000 (91.3455)  time: 0.5454  data: 0.2585  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.4742 (1.4507)  acc1: 68.8000 (69.4476)  acc5: 92.4000 (91.5619)  time: 0.2874  data: 0.0051  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.6516 (1.6258)  acc1: 65.2000 (66.2194)  acc5: 86.8000 (88.7742)  time: 0.2830  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 2.0315 (1.7323)  acc1: 57.2000 (64.1659)  acc5: 80.8000 (87.0634)  time: 0.2828  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 2.0315 (1.7555)  acc1: 58.0000 (63.5280)  acc5: 81.2000 (86.6400)  time: 0.2824  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3427 s / it)
* Acc@1 63.474 Acc@5 86.106 loss 1.766
Accuracy of the model on the 50000 test images: 63.5%
Max accuracy: 63.47%
Epoch: [16]  [   0/2502]  eta: 1:34:17  lr: 0.003200  min_lr: 0.003200  loss: 4.0563 (4.0563)  weight_decay: 0.0500 (0.0500)  time: 2.2614  data: 1.6552  max mem: 53905
Epoch: [16]  [ 200/2502]  eta: 0:23:07  lr: 0.003216  min_lr: 0.003216  loss: 4.7049 (4.3498)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6084 (0.5645)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [16]  [ 400/2502]  eta: 0:20:54  lr: 0.003232  min_lr: 0.003232  loss: 4.4653 (4.3412)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5577 (0.5750)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [16]  [ 600/2502]  eta: 0:18:52  lr: 0.003248  min_lr: 0.003248  loss: 4.4769 (4.3032)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5972  data: 0.0004  max mem: 53905
Epoch: [16]  [ 800/2502]  eta: 0:16:51  lr: 0.003264  min_lr: 0.003264  loss: 4.4463 (4.3272)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6104 (nan)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [16]  [1000/2502]  eta: 0:14:51  lr: 0.003280  min_lr: 0.003280  loss: 4.2093 (4.3213)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5758 (nan)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [16]  [1200/2502]  eta: 0:12:53  lr: 0.003296  min_lr: 0.003296  loss: 4.4865 (4.3193)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5304 (nan)  time: 0.5995  data: 0.0004  max mem: 53905
Epoch: [16]  [1400/2502]  eta: 0:10:53  lr: 0.003312  min_lr: 0.003312  loss: 4.6529 (4.3228)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5534 (nan)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [16]  [1600/2502]  eta: 0:08:54  lr: 0.003328  min_lr: 0.003328  loss: 4.1911 (4.3303)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5835 (nan)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [16]  [1800/2502]  eta: 0:06:56  lr: 0.003344  min_lr: 0.003344  loss: 4.1745 (4.3247)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5897 (nan)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [16]  [2000/2502]  eta: 0:04:57  lr: 0.003360  min_lr: 0.003360  loss: 4.1914 (4.3169)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5486 (nan)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [16]  [2200/2502]  eta: 0:02:59  lr: 0.003376  min_lr: 0.003376  loss: 4.3511 (4.3113)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5670 (nan)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [16]  [2400/2502]  eta: 0:01:00  lr: 0.003392  min_lr: 0.003392  loss: 4.4725 (4.3136)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5536 (nan)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [16]  [2501/2502]  eta: 0:00:00  lr: 0.003400  min_lr: 0.003400  loss: 4.3578 (4.3148)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5505 (nan)  time: 0.5321  data: 0.0007  max mem: 53905
Epoch: [16] Total time: 0:24:42 (0.5927 s / it)
Averaged stats: lr: 0.003400  min_lr: 0.003400  loss: 4.3578 (4.3036)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5505 (nan)
Test:  [ 0/50]  eta: 0:03:22  loss: 1.0353 (1.0353)  acc1: 84.0000 (84.0000)  acc5: 94.4000 (94.4000)  time: 4.0415  data: 3.7114  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 1.4187 (1.4291)  acc1: 70.8000 (71.3455)  acc5: 92.4000 (91.8182)  time: 0.6243  data: 0.3378  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.4952 (1.4758)  acc1: 69.2000 (70.7810)  acc5: 92.0000 (91.7714)  time: 0.2828  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.7634 (1.6588)  acc1: 61.2000 (67.0065)  acc5: 85.2000 (88.5419)  time: 0.2829  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 2.0933 (1.7699)  acc1: 57.2000 (64.1463)  acc5: 80.4000 (86.7317)  time: 0.2826  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 2.0606 (1.7825)  acc1: 58.0000 (63.7760)  acc5: 81.6000 (86.3280)  time: 0.2823  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3602 s / it)
* Acc@1 63.776 Acc@5 86.194 loss 1.793
Accuracy of the model on the 50000 test images: 63.8%
Max accuracy: 63.78%
Epoch: [17]  [   0/2502]  eta: 1:36:44  lr: 0.003400  min_lr: 0.003400  loss: 4.2333 (4.2333)  weight_decay: 0.0500 (0.0500)  time: 2.3200  data: 1.7171  max mem: 53905
Epoch: [17]  [ 200/2502]  eta: 0:23:02  lr: 0.003416  min_lr: 0.003416  loss: 4.6802 (4.3107)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5549 (0.5726)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [17]  [ 400/2502]  eta: 0:20:55  lr: 0.003432  min_lr: 0.003432  loss: 4.6196 (4.3059)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5434 (0.5531)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [17]  [ 600/2502]  eta: 0:18:52  lr: 0.003448  min_lr: 0.003448  loss: 3.9240 (4.2779)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5415 (0.5466)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [17]  [ 800/2502]  eta: 0:16:51  lr: 0.003464  min_lr: 0.003464  loss: 4.4179 (4.2719)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5376 (0.5476)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [17]  [1000/2502]  eta: 0:14:51  lr: 0.003480  min_lr: 0.003480  loss: 4.6359 (4.2819)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5511 (0.5495)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [17]  [1200/2502]  eta: 0:12:52  lr: 0.003496  min_lr: 0.003496  loss: 4.4564 (4.2976)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5131 (0.5424)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [17]  [1400/2502]  eta: 0:10:53  lr: 0.003512  min_lr: 0.003512  loss: 4.3561 (4.2925)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5459 (0.5427)  time: 0.6004  data: 0.0005  max mem: 53905
Epoch: [17]  [1600/2502]  eta: 0:08:55  lr: 0.003528  min_lr: 0.003528  loss: 4.2452 (4.2893)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5097 (0.5414)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [17]  [1800/2502]  eta: 0:06:56  lr: 0.003544  min_lr: 0.003544  loss: 4.3938 (4.2824)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5142 (0.5373)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [17]  [2000/2502]  eta: 0:04:57  lr: 0.003560  min_lr: 0.003560  loss: 4.4059 (4.2802)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5104 (0.5371)  time: 0.5956  data: 0.0004  max mem: 53905
Epoch: [17]  [2200/2502]  eta: 0:02:59  lr: 0.003576  min_lr: 0.003576  loss: 4.1612 (4.2802)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5195 (0.5371)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [17]  [2400/2502]  eta: 0:01:00  lr: 0.003592  min_lr: 0.003592  loss: 4.3714 (4.2749)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4810 (0.5343)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [17]  [2501/2502]  eta: 0:00:00  lr: 0.003600  min_lr: 0.003600  loss: 4.3458 (4.2716)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5097 (0.5333)  time: 0.5320  data: 0.0007  max mem: 53905
Epoch: [17] Total time: 0:24:42 (0.5924 s / it)
Averaged stats: lr: 0.003600  min_lr: 0.003600  loss: 4.3458 (4.2678)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5097 (0.5333)
Test:  [ 0/50]  eta: 0:02:45  loss: 1.1915 (1.1915)  acc1: 81.6000 (81.6000)  acc5: 92.8000 (92.8000)  time: 3.3154  data: 2.9864  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.2561 (1.3370)  acc1: 74.8000 (72.6182)  acc5: 92.8000 (92.3273)  time: 0.5585  data: 0.2719  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.3440 (1.3710)  acc1: 70.0000 (71.6381)  acc5: 92.4000 (92.2857)  time: 0.2830  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.6269 (1.5368)  acc1: 66.4000 (68.4258)  acc5: 87.6000 (89.2774)  time: 0.2832  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.9483 (1.6555)  acc1: 58.0000 (65.6781)  acc5: 81.2000 (87.2195)  time: 0.2830  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.9483 (1.6825)  acc1: 58.0000 (64.9680)  acc5: 82.4000 (86.8720)  time: 0.2826  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3465 s / it)
* Acc@1 64.840 Acc@5 86.944 loss 1.686
Accuracy of the model on the 50000 test images: 64.8%
Max accuracy: 64.84%
Epoch: [18]  [   0/2502]  eta: 1:33:26  lr: 0.003600  min_lr: 0.003600  loss: 4.9025 (4.9025)  weight_decay: 0.0500 (0.0500)  time: 2.2409  data: 1.6414  max mem: 53905
Epoch: [18]  [ 200/2502]  eta: 0:23:02  lr: 0.003616  min_lr: 0.003616  loss: 4.4415 (4.1975)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5086 (0.4979)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [18]  [ 400/2502]  eta: 0:20:54  lr: 0.003632  min_lr: 0.003632  loss: 4.4399 (4.2165)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4840 (0.5057)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [18]  [ 600/2502]  eta: 0:18:52  lr: 0.003648  min_lr: 0.003648  loss: 4.1707 (4.2226)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5123 (0.5160)  time: 0.5970  data: 0.0004  max mem: 53905
Epoch: [18]  [ 800/2502]  eta: 0:16:51  lr: 0.003664  min_lr: 0.003664  loss: 4.3002 (4.2243)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4879 (0.5127)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [18]  [1000/2502]  eta: 0:14:51  lr: 0.003680  min_lr: 0.003680  loss: 4.4464 (4.2343)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4808 (0.5105)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [18]  [1200/2502]  eta: 0:12:52  lr: 0.003696  min_lr: 0.003696  loss: 4.4620 (4.2407)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5255 (0.5151)  time: 0.5902  data: 0.0004  max mem: 53905
Epoch: [18]  [1400/2502]  eta: 0:10:53  lr: 0.003712  min_lr: 0.003712  loss: 4.3524 (4.2377)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4735 (0.5133)  time: 0.5938  data: 0.0004  max mem: 53905
Epoch: [18]  [1600/2502]  eta: 0:08:54  lr: 0.003728  min_lr: 0.003728  loss: 4.2734 (4.2468)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4698 (0.5131)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [18]  [1800/2502]  eta: 0:06:55  lr: 0.003744  min_lr: 0.003744  loss: 4.5451 (4.2455)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5067 (0.5117)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [18]  [2000/2502]  eta: 0:04:57  lr: 0.003760  min_lr: 0.003760  loss: 4.2892 (4.2466)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4726 (0.5097)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [18]  [2200/2502]  eta: 0:02:58  lr: 0.003776  min_lr: 0.003776  loss: 3.9019 (4.2382)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4634 (0.5098)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [18]  [2400/2502]  eta: 0:01:00  lr: 0.003792  min_lr: 0.003792  loss: 4.0782 (4.2347)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4651 (0.5082)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [18]  [2501/2502]  eta: 0:00:00  lr: 0.003800  min_lr: 0.003800  loss: 4.4240 (4.2351)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4912 (0.5077)  time: 0.5322  data: 0.0007  max mem: 53905
Epoch: [18] Total time: 0:24:40 (0.5919 s / it)
Averaged stats: lr: 0.003800  min_lr: 0.003800  loss: 4.4240 (4.2448)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4912 (0.5077)
Test:  [ 0/50]  eta: 0:02:53  loss: 1.1895 (1.1895)  acc1: 81.2000 (81.2000)  acc5: 96.0000 (96.0000)  time: 3.4765  data: 3.1561  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 1.4250 (1.5196)  acc1: 73.6000 (72.1455)  acc5: 91.6000 (91.4909)  time: 0.6031  data: 0.3172  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.4250 (1.4883)  acc1: 70.4000 (71.8476)  acc5: 92.0000 (91.7714)  time: 0.2994  data: 0.0169  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.6644 (1.6524)  acc1: 64.8000 (68.3742)  acc5: 86.8000 (88.9936)  time: 0.2830  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 2.0406 (1.7536)  acc1: 58.4000 (65.9024)  acc5: 81.2000 (87.3756)  time: 0.2825  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.9956 (1.7736)  acc1: 59.2000 (65.5120)  acc5: 82.8000 (87.0160)  time: 0.2820  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3560 s / it)
* Acc@1 64.970 Acc@5 86.994 loss 1.774
Accuracy of the model on the 50000 test images: 65.0%
Max accuracy: 64.97%
Epoch: [19]  [   0/2502]  eta: 1:38:55  lr: 0.003800  min_lr: 0.003800  loss: 4.1879 (4.1879)  weight_decay: 0.0500 (0.0500)  time: 2.3722  data: 1.7671  max mem: 53905
Epoch: [19]  [ 200/2502]  eta: 0:22:59  lr: 0.003816  min_lr: 0.003816  loss: 4.1860 (4.1660)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4562 (0.4904)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [19]  [ 400/2502]  eta: 0:20:54  lr: 0.003832  min_lr: 0.003832  loss: 4.5504 (4.2382)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4591 (0.5013)  time: 0.6001  data: 0.0004  max mem: 53905
Epoch: [19]  [ 600/2502]  eta: 0:18:51  lr: 0.003848  min_lr: 0.003848  loss: 4.3724 (4.2588)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4855 (0.4942)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [19]  [ 800/2502]  eta: 0:16:51  lr: 0.003864  min_lr: 0.003864  loss: 4.2061 (4.2590)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4265 (0.4922)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [19]  [1000/2502]  eta: 0:14:52  lr: 0.003880  min_lr: 0.003880  loss: 4.1960 (4.2501)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4818 (0.4901)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [19]  [1200/2502]  eta: 0:12:53  lr: 0.003896  min_lr: 0.003896  loss: 4.5356 (4.2513)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4632 (0.4885)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [19]  [1400/2502]  eta: 0:10:54  lr: 0.003912  min_lr: 0.003912  loss: 4.1789 (4.2490)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5214 (0.4909)  time: 0.6081  data: 0.0005  max mem: 53905
Epoch: [19]  [1600/2502]  eta: 0:08:55  lr: 0.003928  min_lr: 0.003928  loss: 4.0543 (4.2387)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4733 (0.4950)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [19]  [1800/2502]  eta: 0:06:56  lr: 0.003944  min_lr: 0.003944  loss: 4.2504 (4.2351)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4588 (0.4922)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [19]  [2000/2502]  eta: 0:04:57  lr: 0.003960  min_lr: 0.003960  loss: 4.1782 (4.2296)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4909 (0.4920)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [19]  [2200/2502]  eta: 0:02:59  lr: 0.003976  min_lr: 0.003976  loss: 4.3401 (4.2322)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4544 (0.4913)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [19]  [2400/2502]  eta: 0:01:00  lr: 0.003992  min_lr: 0.003992  loss: 4.4027 (4.2327)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4840 (0.4909)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [19]  [2501/2502]  eta: 0:00:00  lr: 0.004000  min_lr: 0.004000  loss: 4.2513 (4.2354)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5032 (0.4915)  time: 0.5387  data: 0.0008  max mem: 53905
Epoch: [19] Total time: 0:24:43 (0.5930 s / it)
Averaged stats: lr: 0.004000  min_lr: 0.004000  loss: 4.2513 (4.2214)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5032 (0.4915)
Test:  [ 0/50]  eta: 0:02:38  loss: 1.2710 (1.2710)  acc1: 79.2000 (79.2000)  acc5: 93.6000 (93.6000)  time: 3.1768  data: 2.8492  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 1.2710 (1.2999)  acc1: 76.0000 (73.9273)  acc5: 93.6000 (92.9091)  time: 0.6129  data: 0.3264  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.3009 (1.3146)  acc1: 72.4000 (72.7048)  acc5: 93.6000 (92.9143)  time: 0.3200  data: 0.0373  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.6205 (1.5327)  acc1: 64.0000 (68.6452)  acc5: 88.8000 (89.6000)  time: 0.2834  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.9700 (1.6470)  acc1: 60.8000 (66.3610)  acc5: 82.0000 (87.8439)  time: 0.2837  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.9849 (1.6787)  acc1: 60.8000 (65.6960)  acc5: 82.0000 (87.2960)  time: 0.2868  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3597 s / it)
* Acc@1 65.452 Acc@5 87.270 loss 1.683
Accuracy of the model on the 50000 test images: 65.5%
Max accuracy: 65.45%
Epoch: [20]  [   0/2502]  eta: 1:35:06  lr: 0.004000  min_lr: 0.004000  loss: 4.1597 (4.1597)  weight_decay: 0.0500 (0.0500)  time: 2.2806  data: 1.6707  max mem: 53905
Epoch: [20]  [ 200/2502]  eta: 0:22:58  lr: 0.004000  min_lr: 0.004000  loss: 4.3712 (4.2081)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4912 (0.4962)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [20]  [ 400/2502]  eta: 0:20:50  lr: 0.004000  min_lr: 0.004000  loss: 4.5037 (4.2390)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4669 (0.4840)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [20]  [ 600/2502]  eta: 0:18:50  lr: 0.004000  min_lr: 0.004000  loss: 4.3577 (4.2018)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5002 (0.4899)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [20]  [ 800/2502]  eta: 0:16:50  lr: 0.004000  min_lr: 0.004000  loss: 4.0537 (4.1930)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4559 (0.4866)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [20]  [1000/2502]  eta: 0:14:50  lr: 0.004000  min_lr: 0.004000  loss: 4.5712 (4.1956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4826 (0.4850)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [20]  [1200/2502]  eta: 0:12:51  lr: 0.004000  min_lr: 0.004000  loss: 4.3262 (4.1915)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4575 (0.4856)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [20]  [1400/2502]  eta: 0:10:52  lr: 0.004000  min_lr: 0.004000  loss: 4.2894 (4.1840)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4685 (0.4841)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [20]  [1600/2502]  eta: 0:08:54  lr: 0.004000  min_lr: 0.004000  loss: 4.4373 (4.1839)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4658 (0.4815)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [20]  [1800/2502]  eta: 0:06:55  lr: 0.004000  min_lr: 0.004000  loss: 4.1541 (4.1783)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4365 (0.4817)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [20]  [2000/2502]  eta: 0:04:57  lr: 0.004000  min_lr: 0.004000  loss: 4.3374 (4.1757)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4818 (0.4840)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [20]  [2200/2502]  eta: 0:02:58  lr: 0.004000  min_lr: 0.004000  loss: 4.0287 (4.1741)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4692 (0.4849)  time: 0.5906  data: 0.0006  max mem: 53905
Epoch: [20]  [2400/2502]  eta: 0:01:00  lr: 0.004000  min_lr: 0.004000  loss: 3.9820 (4.1688)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4486 (0.4841)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [20]  [2501/2502]  eta: 0:00:00  lr: 0.004000  min_lr: 0.004000  loss: 4.2226 (4.1693)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4743 (0.4844)  time: 0.5320  data: 0.0006  max mem: 53905
Epoch: [20] Total time: 0:24:40 (0.5919 s / it)
Averaged stats: lr: 0.004000  min_lr: 0.004000  loss: 4.2226 (4.1780)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4743 (0.4844)
Test:  [ 0/50]  eta: 0:02:27  loss: 1.0743 (1.0743)  acc1: 82.8000 (82.8000)  acc5: 94.0000 (94.0000)  time: 2.9474  data: 2.6104  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 1.2792 (1.3106)  acc1: 75.6000 (72.9091)  acc5: 93.6000 (92.2909)  time: 0.5282  data: 0.2405  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.2512 (1.3220)  acc1: 73.6000 (72.8000)  acc5: 93.2000 (92.5143)  time: 0.2849  data: 0.0020  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.5024 (1.4995)  acc1: 66.0000 (69.5742)  acc5: 86.0000 (89.8065)  time: 0.2836  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 2.0081 (1.6288)  acc1: 59.6000 (66.8585)  acc5: 84.0000 (88.0488)  time: 0.2833  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.9953 (1.6598)  acc1: 59.6000 (66.3280)  acc5: 83.6000 (87.5840)  time: 0.2831  data: 0.0002  max mem: 53905
Test: Total time: 0:00:16 (0.3395 s / it)
* Acc@1 66.264 Acc@5 87.602 loss 1.664
Accuracy of the model on the 50000 test images: 66.3%
Max accuracy: 66.26%
Epoch: [21]  [   0/2502]  eta: 1:35:44  lr: 0.004000  min_lr: 0.004000  loss: 4.4657 (4.4657)  weight_decay: 0.0500 (0.0500)  time: 2.2958  data: 1.6992  max mem: 53905
Epoch: [21]  [ 200/2502]  eta: 0:23:01  lr: 0.004000  min_lr: 0.004000  loss: 4.3709 (4.1545)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4895 (0.4916)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [21]  [ 400/2502]  eta: 0:20:53  lr: 0.004000  min_lr: 0.004000  loss: 4.2353 (4.1851)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4655 (0.4843)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [21]  [ 600/2502]  eta: 0:18:50  lr: 0.004000  min_lr: 0.004000  loss: 4.4415 (4.1985)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4910 (0.4893)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [21]  [ 800/2502]  eta: 0:16:50  lr: 0.004000  min_lr: 0.004000  loss: 3.9640 (4.1595)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4612 (0.4873)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [21]  [1000/2502]  eta: 0:14:51  lr: 0.004000  min_lr: 0.004000  loss: 4.0973 (4.1561)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4612 (0.4847)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [21]  [1200/2502]  eta: 0:12:51  lr: 0.004000  min_lr: 0.004000  loss: 4.0825 (4.1596)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4832 (0.4849)  time: 0.5900  data: 0.0005  max mem: 53905
Epoch: [21]  [1400/2502]  eta: 0:10:53  lr: 0.004000  min_lr: 0.004000  loss: 4.2715 (4.1568)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4580 (0.4823)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [21]  [1600/2502]  eta: 0:08:54  lr: 0.004000  min_lr: 0.004000  loss: 4.1596 (4.1516)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5106 (0.4822)  time: 0.5900  data: 0.0004  max mem: 53905
Epoch: [21]  [1800/2502]  eta: 0:06:55  lr: 0.004000  min_lr: 0.004000  loss: 4.2652 (4.1550)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4453 (0.4828)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [21]  [2000/2502]  eta: 0:04:57  lr: 0.004000  min_lr: 0.004000  loss: 3.8793 (4.1523)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4556 (0.4808)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [21]  [2200/2502]  eta: 0:02:58  lr: 0.004000  min_lr: 0.004000  loss: 4.3061 (4.1489)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4683 (0.4815)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [21]  [2400/2502]  eta: 0:01:00  lr: 0.004000  min_lr: 0.004000  loss: 3.5695 (4.1447)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5277 (0.4829)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [21]  [2501/2502]  eta: 0:00:00  lr: 0.003999  min_lr: 0.003999  loss: 3.9905 (4.1448)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4671 (0.4830)  time: 0.5318  data: 0.0008  max mem: 53905
Epoch: [21] Total time: 0:24:41 (0.5920 s / it)
Averaged stats: lr: 0.003999  min_lr: 0.003999  loss: 3.9905 (4.1447)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4671 (0.4830)
Test:  [ 0/50]  eta: 0:02:38  loss: 1.1506 (1.1506)  acc1: 81.6000 (81.6000)  acc5: 96.8000 (96.8000)  time: 3.1748  data: 2.8380  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.5176 (1.4531)  acc1: 74.4000 (72.7636)  acc5: 93.6000 (93.1273)  time: 0.5509  data: 0.2627  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.3934 (1.4287)  acc1: 72.0000 (72.3048)  acc5: 92.8000 (92.9714)  time: 0.2865  data: 0.0028  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.6331 (1.6058)  acc1: 66.4000 (68.7871)  acc5: 86.4000 (89.8194)  time: 0.2844  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 2.0223 (1.7046)  acc1: 59.2000 (66.5561)  acc5: 83.6000 (87.9317)  time: 0.2840  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.9622 (1.7314)  acc1: 59.2000 (65.7280)  acc5: 83.6000 (87.4640)  time: 0.2837  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3450 s / it)
* Acc@1 65.730 Acc@5 87.322 loss 1.737
Accuracy of the model on the 50000 test images: 65.7%
Max accuracy: 66.26%
Epoch: [22]  [   0/2502]  eta: 1:55:16  lr: 0.003999  min_lr: 0.003999  loss: 3.5112 (3.5112)  weight_decay: 0.0500 (0.0500)  time: 2.7645  data: 1.8936  max mem: 53905
Epoch: [22]  [ 200/2502]  eta: 0:23:04  lr: 0.003999  min_lr: 0.003999  loss: 4.3332 (4.1374)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4542 (0.4714)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [22]  [ 400/2502]  eta: 0:20:55  lr: 0.003999  min_lr: 0.003999  loss: 3.9748 (4.1076)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4664 (0.4875)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [22]  [ 600/2502]  eta: 0:18:54  lr: 0.003999  min_lr: 0.003999  loss: 4.2038 (4.0895)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4541 (0.4788)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [22]  [ 800/2502]  eta: 0:16:52  lr: 0.003999  min_lr: 0.003999  loss: 4.1450 (4.1011)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4360 (0.4760)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [22]  [1000/2502]  eta: 0:14:52  lr: 0.003999  min_lr: 0.003999  loss: 4.1623 (4.0961)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4987 (0.4849)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [22]  [1200/2502]  eta: 0:12:53  lr: 0.003999  min_lr: 0.003999  loss: 4.2794 (4.1044)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4972 (0.4868)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [22]  [1400/2502]  eta: 0:10:54  lr: 0.003999  min_lr: 0.003999  loss: 4.3004 (4.1035)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4542 (0.4853)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [22]  [1600/2502]  eta: 0:08:55  lr: 0.003999  min_lr: 0.003999  loss: 4.4135 (4.1165)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5000 (0.4849)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [22]  [1800/2502]  eta: 0:06:56  lr: 0.003999  min_lr: 0.003999  loss: 4.2099 (4.1109)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4453 (0.4833)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [22]  [2000/2502]  eta: 0:04:57  lr: 0.003999  min_lr: 0.003999  loss: 4.1517 (4.1072)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4463 (0.4834)  time: 0.5970  data: 0.0005  max mem: 53905
Epoch: [22]  [2200/2502]  eta: 0:02:59  lr: 0.003999  min_lr: 0.003999  loss: 4.3894 (4.1135)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4772 (0.4824)  time: 0.5967  data: 0.0005  max mem: 53905
Epoch: [22]  [2400/2502]  eta: 0:01:00  lr: 0.003999  min_lr: 0.003999  loss: 4.1534 (4.1178)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4506 (0.4818)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [22]  [2501/2502]  eta: 0:00:00  lr: 0.003999  min_lr: 0.003999  loss: 4.3769 (4.1181)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4563 (0.4814)  time: 0.5322  data: 0.0008  max mem: 53905
Epoch: [22] Total time: 0:24:42 (0.5924 s / it)
Averaged stats: lr: 0.003999  min_lr: 0.003999  loss: 4.3769 (4.1142)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4563 (0.4814)
Test:  [ 0/50]  eta: 0:02:29  loss: 0.9667 (0.9667)  acc1: 83.2000 (83.2000)  acc5: 96.4000 (96.4000)  time: 2.9836  data: 2.6583  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 1.2659 (1.2875)  acc1: 76.8000 (74.0000)  acc5: 94.0000 (93.2000)  time: 0.5478  data: 0.2611  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.2659 (1.2989)  acc1: 74.0000 (74.0381)  acc5: 92.8000 (93.2952)  time: 0.2995  data: 0.0109  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.4757 (1.4985)  acc1: 64.4000 (70.1161)  acc5: 89.2000 (90.4129)  time: 0.2910  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.9425 (1.6130)  acc1: 60.4000 (67.9317)  acc5: 84.4000 (88.8585)  time: 0.2853  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.9425 (1.6486)  acc1: 61.2000 (67.2880)  acc5: 84.0000 (88.3520)  time: 0.2833  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3466 s / it)
* Acc@1 67.152 Acc@5 88.474 loss 1.648
Accuracy of the model on the 50000 test images: 67.2%
Max accuracy: 67.15%
Epoch: [23]  [   0/2502]  eta: 1:32:50  lr: 0.003999  min_lr: 0.003999  loss: 4.2083 (4.2083)  weight_decay: 0.0500 (0.0500)  time: 2.2266  data: 1.6269  max mem: 53905
Epoch: [23]  [ 200/2502]  eta: 0:22:59  lr: 0.003999  min_lr: 0.003999  loss: 4.2024 (4.1241)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4779 (0.4829)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [23]  [ 400/2502]  eta: 0:20:52  lr: 0.003999  min_lr: 0.003999  loss: 3.7910 (4.1032)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4384 (0.4740)  time: 0.5899  data: 0.0005  max mem: 53905
Epoch: [23]  [ 600/2502]  eta: 0:18:50  lr: 0.003999  min_lr: 0.003999  loss: 4.0849 (4.0919)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5075 (0.4835)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [23]  [ 800/2502]  eta: 0:16:50  lr: 0.003999  min_lr: 0.003999  loss: 4.4025 (4.0956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4732 (0.4845)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [23]  [1000/2502]  eta: 0:14:51  lr: 0.003999  min_lr: 0.003999  loss: 4.3290 (4.0844)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4780 (0.4883)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [23]  [1200/2502]  eta: 0:12:52  lr: 0.003998  min_lr: 0.003998  loss: 4.2624 (4.0963)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4987 (0.4887)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [23]  [1400/2502]  eta: 0:10:53  lr: 0.003998  min_lr: 0.003998  loss: 4.0369 (4.0966)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4741 (0.4881)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [23]  [1600/2502]  eta: 0:08:54  lr: 0.003998  min_lr: 0.003998  loss: 4.4417 (4.1031)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4835 (0.4863)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [23]  [1800/2502]  eta: 0:06:56  lr: 0.003998  min_lr: 0.003998  loss: 3.9708 (4.1002)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4957 (0.4872)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [23]  [2000/2502]  eta: 0:04:57  lr: 0.003998  min_lr: 0.003998  loss: 4.0675 (4.0967)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4641 (0.4867)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [23]  [2200/2502]  eta: 0:02:58  lr: 0.003998  min_lr: 0.003998  loss: 4.1605 (4.0955)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4864 (0.4859)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [23]  [2400/2502]  eta: 0:01:00  lr: 0.003998  min_lr: 0.003998  loss: 4.0841 (4.0945)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4761 (0.4851)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [23]  [2501/2502]  eta: 0:00:00  lr: 0.003998  min_lr: 0.003998  loss: 3.8751 (4.0934)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4613 (0.4846)  time: 0.5307  data: 0.0008  max mem: 53905
Epoch: [23] Total time: 0:24:41 (0.5921 s / it)
Averaged stats: lr: 0.003998  min_lr: 0.003998  loss: 3.8751 (4.0882)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4613 (0.4846)
Test:  [ 0/50]  eta: 0:03:23  loss: 0.7436 (0.7436)  acc1: 88.0000 (88.0000)  acc5: 97.6000 (97.6000)  time: 4.0673  data: 3.7349  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 1.2087 (1.2207)  acc1: 78.0000 (75.7091)  acc5: 93.6000 (93.5273)  time: 0.6252  data: 0.3399  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.2094 (1.2269)  acc1: 75.6000 (75.7143)  acc5: 93.6000 (94.0000)  time: 0.2811  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.3663 (1.3803)  acc1: 69.6000 (72.2452)  acc5: 90.0000 (91.6645)  time: 0.2813  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.8224 (1.4940)  acc1: 62.4000 (69.5512)  acc5: 84.8000 (89.8829)  time: 0.2810  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.8224 (1.5297)  acc1: 62.4000 (68.7760)  acc5: 84.4000 (89.5760)  time: 0.2808  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3590 s / it)
* Acc@1 68.442 Acc@5 89.146 loss 1.541
Accuracy of the model on the 50000 test images: 68.4%
Max accuracy: 68.44%
Epoch: [24]  [   0/2502]  eta: 1:49:06  lr: 0.003998  min_lr: 0.003998  loss: 4.8084 (4.8084)  weight_decay: 0.0500 (0.0500)  time: 2.6166  data: 2.0173  max mem: 53905
Epoch: [24]  [ 200/2502]  eta: 0:23:02  lr: 0.003998  min_lr: 0.003998  loss: 4.2363 (4.1253)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4701 (0.5072)  time: 0.5901  data: 0.0006  max mem: 53905
Epoch: [24]  [ 400/2502]  eta: 0:20:51  lr: 0.003998  min_lr: 0.003998  loss: 3.8433 (4.0596)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4873 (0.5045)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [24]  [ 600/2502]  eta: 0:18:52  lr: 0.003998  min_lr: 0.003998  loss: 4.2125 (4.0551)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4403 (0.4979)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [24]  [ 800/2502]  eta: 0:16:51  lr: 0.003998  min_lr: 0.003998  loss: 4.3130 (4.0599)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4611 (0.4913)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [24]  [1000/2502]  eta: 0:14:51  lr: 0.003998  min_lr: 0.003998  loss: 3.9716 (4.0564)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4470 (0.4859)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [24]  [1200/2502]  eta: 0:12:52  lr: 0.003997  min_lr: 0.003997  loss: 4.0344 (4.0609)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4404 (0.4859)  time: 0.5902  data: 0.0005  max mem: 53905
Epoch: [24]  [1400/2502]  eta: 0:10:53  lr: 0.003997  min_lr: 0.003997  loss: 4.2249 (4.0613)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4744 (0.4842)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [24]  [1600/2502]  eta: 0:08:54  lr: 0.003997  min_lr: 0.003997  loss: 3.9814 (4.0597)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4482 (0.4830)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [24]  [1800/2502]  eta: 0:06:56  lr: 0.003997  min_lr: 0.003997  loss: 4.1760 (4.0533)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4614 (0.4828)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [24]  [2000/2502]  eta: 0:04:57  lr: 0.003997  min_lr: 0.003997  loss: 4.1724 (4.0511)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4934 (0.4823)  time: 0.5925  data: 0.0006  max mem: 53905
Epoch: [24]  [2200/2502]  eta: 0:02:59  lr: 0.003997  min_lr: 0.003997  loss: 4.4625 (4.0535)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5134 (0.4862)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [24]  [2400/2502]  eta: 0:01:00  lr: 0.003997  min_lr: 0.003997  loss: 4.2868 (4.0625)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4447 (0.4856)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [24]  [2501/2502]  eta: 0:00:00  lr: 0.003997  min_lr: 0.003997  loss: 4.0408 (4.0572)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4427 (0.4844)  time: 0.5334  data: 0.0007  max mem: 53905
Epoch: [24] Total time: 0:24:43 (0.5928 s / it)
Averaged stats: lr: 0.003997  min_lr: 0.003997  loss: 4.0408 (4.0559)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4427 (0.4844)
Test:  [ 0/50]  eta: 0:02:40  loss: 0.8696 (0.8696)  acc1: 86.8000 (86.8000)  acc5: 97.2000 (97.2000)  time: 3.2193  data: 2.9101  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.2220 (1.2484)  acc1: 75.2000 (76.2182)  acc5: 94.4000 (93.4545)  time: 0.5701  data: 0.2847  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.2374 (1.2507)  acc1: 72.4000 (74.6476)  acc5: 94.4000 (94.0762)  time: 0.2976  data: 0.0113  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.4587 (1.4010)  acc1: 66.8000 (71.1226)  acc5: 90.4000 (91.4839)  time: 0.2916  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.8090 (1.5071)  acc1: 62.8000 (68.8390)  acc5: 85.6000 (89.8829)  time: 0.2884  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.7710 (1.5435)  acc1: 62.8000 (68.0880)  acc5: 85.6000 (89.4480)  time: 0.2836  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3519 s / it)
* Acc@1 68.478 Acc@5 89.198 loss 1.548
Accuracy of the model on the 50000 test images: 68.5%
Max accuracy: 68.48%
Epoch: [25]  [   0/2502]  eta: 1:44:09  lr: 0.003997  min_lr: 0.003997  loss: 4.2819 (4.2819)  weight_decay: 0.0500 (0.0500)  time: 2.4978  data: 1.8941  max mem: 53905
Epoch: [25]  [ 200/2502]  eta: 0:23:10  lr: 0.003997  min_lr: 0.003997  loss: 4.3213 (4.1047)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5162 (0.5091)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [25]  [ 400/2502]  eta: 0:20:57  lr: 0.003997  min_lr: 0.003997  loss: 3.6618 (4.0733)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4908 (0.4948)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [25]  [ 600/2502]  eta: 0:18:53  lr: 0.003997  min_lr: 0.003997  loss: 4.2137 (4.0500)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4929 (0.4922)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [25]  [ 800/2502]  eta: 0:16:53  lr: 0.003996  min_lr: 0.003996  loss: 4.1672 (4.0401)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4726 (0.4917)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [25]  [1000/2502]  eta: 0:14:53  lr: 0.003996  min_lr: 0.003996  loss: 4.1894 (4.0444)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4731 (0.4914)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [25]  [1200/2502]  eta: 0:12:54  lr: 0.003996  min_lr: 0.003996  loss: 4.3100 (4.0483)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4692 (0.4894)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [25]  [1400/2502]  eta: 0:10:55  lr: 0.003996  min_lr: 0.003996  loss: 4.2309 (4.0418)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5022 (0.4910)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [25]  [1600/2502]  eta: 0:08:55  lr: 0.003996  min_lr: 0.003996  loss: 4.3620 (4.0409)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4868 (0.4901)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [25]  [1800/2502]  eta: 0:06:56  lr: 0.003996  min_lr: 0.003996  loss: 3.5469 (4.0365)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4689 (0.4917)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [25]  [2000/2502]  eta: 0:04:57  lr: 0.003996  min_lr: 0.003996  loss: 4.2641 (4.0367)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4577 (0.4925)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [25]  [2200/2502]  eta: 0:02:59  lr: 0.003996  min_lr: 0.003996  loss: 4.2645 (4.0401)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4656 (0.4907)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [25]  [2400/2502]  eta: 0:01:00  lr: 0.003996  min_lr: 0.003996  loss: 3.6332 (4.0364)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4446 (0.4896)  time: 0.5979  data: 0.0004  max mem: 53905
Epoch: [25]  [2501/2502]  eta: 0:00:00  lr: 0.003995  min_lr: 0.003995  loss: 4.1933 (4.0383)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4583 (0.4893)  time: 0.5339  data: 0.0007  max mem: 53905
Epoch: [25] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.003995  min_lr: 0.003995  loss: 4.1933 (4.0381)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4583 (0.4893)
Test:  [ 0/50]  eta: 0:02:58  loss: 0.8122 (0.8122)  acc1: 85.6000 (85.6000)  acc5: 96.4000 (96.4000)  time: 3.5670  data: 3.2642  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.1425 (1.2094)  acc1: 76.4000 (75.3818)  acc5: 94.8000 (93.5273)  time: 0.5834  data: 0.2972  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.1673 (1.2068)  acc1: 75.2000 (75.0476)  acc5: 94.4000 (93.6952)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3487 (1.4333)  acc1: 67.2000 (71.7548)  acc5: 88.8000 (90.8129)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.9525 (1.5624)  acc1: 62.4000 (69.4049)  acc5: 84.8000 (89.4146)  time: 0.2851  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.9525 (1.5985)  acc1: 62.0000 (68.6240)  acc5: 84.8000 (88.8960)  time: 0.2849  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3530 s / it)
* Acc@1 68.368 Acc@5 88.936 loss 1.606
Accuracy of the model on the 50000 test images: 68.4%
Max accuracy: 68.48%
Epoch: [26]  [   0/2502]  eta: 1:57:56  lr: 0.003995  min_lr: 0.003995  loss: 4.6640 (4.6640)  weight_decay: 0.0500 (0.0500)  time: 2.8282  data: 1.9178  max mem: 53905
Epoch: [26]  [ 200/2502]  eta: 0:23:10  lr: 0.003995  min_lr: 0.003995  loss: 3.9586 (3.9807)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5286 (0.5133)  time: 0.5937  data: 0.0005  max mem: 53905
Epoch: [26]  [ 400/2502]  eta: 0:20:57  lr: 0.003995  min_lr: 0.003995  loss: 4.1388 (3.9807)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5162 (0.5050)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [26]  [ 600/2502]  eta: 0:18:55  lr: 0.003995  min_lr: 0.003995  loss: 3.9798 (3.9856)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5035 (0.5019)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [26]  [ 800/2502]  eta: 0:16:53  lr: 0.003995  min_lr: 0.003995  loss: 4.3403 (4.0080)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4668 (0.5036)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [26]  [1000/2502]  eta: 0:14:54  lr: 0.003995  min_lr: 0.003995  loss: 4.0145 (4.0021)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5090 (0.5025)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [26]  [1200/2502]  eta: 0:12:54  lr: 0.003995  min_lr: 0.003995  loss: 4.2846 (4.0160)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4799 (0.4970)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [26]  [1400/2502]  eta: 0:10:55  lr: 0.003995  min_lr: 0.003995  loss: 3.9165 (4.0093)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4961 (0.4963)  time: 0.5986  data: 0.0005  max mem: 53905
Epoch: [26]  [1600/2502]  eta: 0:08:56  lr: 0.003994  min_lr: 0.003994  loss: 4.0090 (4.0052)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5133 (0.4940)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [26]  [1800/2502]  eta: 0:06:57  lr: 0.003994  min_lr: 0.003994  loss: 3.9666 (4.0034)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4919 (0.4941)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [26]  [2000/2502]  eta: 0:04:58  lr: 0.003994  min_lr: 0.003994  loss: 4.0324 (4.0051)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4978 (0.4949)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [26]  [2200/2502]  eta: 0:02:59  lr: 0.003994  min_lr: 0.003994  loss: 4.2489 (4.0048)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4845 (0.4965)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [26]  [2400/2502]  eta: 0:01:00  lr: 0.003994  min_lr: 0.003994  loss: 4.2064 (4.0046)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4548 (0.4949)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [26]  [2501/2502]  eta: 0:00:00  lr: 0.003994  min_lr: 0.003994  loss: 3.8306 (4.0006)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4603 (0.4941)  time: 0.5338  data: 0.0007  max mem: 53905
Epoch: [26] Total time: 0:24:46 (0.5939 s / it)
Averaged stats: lr: 0.003994  min_lr: 0.003994  loss: 3.8306 (4.0060)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4603 (0.4941)
Test:  [ 0/50]  eta: 0:02:49  loss: 0.9235 (0.9235)  acc1: 83.6000 (83.6000)  acc5: 96.0000 (96.0000)  time: 3.3849  data: 3.0658  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.1297 (1.1862)  acc1: 78.0000 (76.8000)  acc5: 95.2000 (94.0000)  time: 0.5667  data: 0.2791  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.1664 (1.1934)  acc1: 74.8000 (76.0000)  acc5: 94.4000 (94.2476)  time: 0.2851  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.4438 (1.3624)  acc1: 68.8000 (72.2968)  acc5: 90.4000 (91.7161)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.7302 (1.4687)  acc1: 64.4000 (70.0000)  acc5: 85.2000 (90.0293)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.7302 (1.4839)  acc1: 64.8000 (69.6000)  acc5: 85.2000 (89.9200)  time: 0.2847  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3493 s / it)
* Acc@1 69.422 Acc@5 89.762 loss 1.495
Accuracy of the model on the 50000 test images: 69.4%
Max accuracy: 69.42%
Epoch: [27]  [   0/2502]  eta: 1:46:06  lr: 0.003994  min_lr: 0.003994  loss: 3.8692 (3.8692)  weight_decay: 0.0500 (0.0500)  time: 2.5446  data: 1.9374  max mem: 53905
Epoch: [27]  [ 200/2502]  eta: 0:23:09  lr: 0.003994  min_lr: 0.003994  loss: 3.8617 (3.9416)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4965 (0.4969)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [27]  [ 400/2502]  eta: 0:20:56  lr: 0.003994  min_lr: 0.003994  loss: 3.9755 (3.9527)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4483 (0.4893)  time: 0.5924  data: 0.0007  max mem: 53905
Epoch: [27]  [ 600/2502]  eta: 0:18:54  lr: 0.003993  min_lr: 0.003993  loss: 3.9841 (3.9624)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4665 (0.4912)  time: 0.5924  data: 0.0006  max mem: 53905
Epoch: [27]  [ 800/2502]  eta: 0:16:54  lr: 0.003993  min_lr: 0.003993  loss: 4.2483 (3.9782)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4892 (0.4920)  time: 0.5926  data: 0.0006  max mem: 53905
Epoch: [27]  [1000/2502]  eta: 0:14:54  lr: 0.003993  min_lr: 0.003993  loss: 4.2164 (3.9947)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4972 (0.4922)  time: 0.5940  data: 0.0007  max mem: 53905
Epoch: [27]  [1200/2502]  eta: 0:12:54  lr: 0.003993  min_lr: 0.003993  loss: 4.1142 (3.9964)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4560 (0.4905)  time: 0.5921  data: 0.0007  max mem: 53905
Epoch: [27]  [1400/2502]  eta: 0:10:55  lr: 0.003993  min_lr: 0.003993  loss: 3.9972 (3.9924)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4818 (0.4883)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [27]  [1600/2502]  eta: 0:08:56  lr: 0.003993  min_lr: 0.003993  loss: 4.3897 (4.0138)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5160 (0.4889)  time: 0.5905  data: 0.0007  max mem: 53905
Epoch: [27]  [1800/2502]  eta: 0:06:56  lr: 0.003993  min_lr: 0.003993  loss: 4.2706 (4.0095)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4626 (0.4883)  time: 0.5943  data: 0.0007  max mem: 53905
Epoch: [27]  [2000/2502]  eta: 0:04:58  lr: 0.003992  min_lr: 0.003992  loss: 4.0389 (4.0110)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4702 (0.4891)  time: 0.5912  data: 0.0007  max mem: 53905
Epoch: [27]  [2200/2502]  eta: 0:02:59  lr: 0.003992  min_lr: 0.003992  loss: 3.9242 (4.0101)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4762 (0.4890)  time: 0.5925  data: 0.0006  max mem: 53905
Epoch: [27]  [2400/2502]  eta: 0:01:00  lr: 0.003992  min_lr: 0.003992  loss: 4.1318 (4.0019)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4856 (0.4903)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [27]  [2501/2502]  eta: 0:00:00  lr: 0.003992  min_lr: 0.003992  loss: 4.2538 (3.9996)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5188 (0.4921)  time: 0.5332  data: 0.0007  max mem: 53905
Epoch: [27] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.003992  min_lr: 0.003992  loss: 4.2538 (3.9934)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5188 (0.4921)
Test:  [ 0/50]  eta: 0:03:05  loss: 1.0884 (1.0884)  acc1: 85.6000 (85.6000)  acc5: 97.2000 (97.2000)  time: 3.7160  data: 3.4047  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.2564 (1.3293)  acc1: 75.6000 (76.1091)  acc5: 95.2000 (94.0000)  time: 0.5967  data: 0.3099  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.3088 (1.3502)  acc1: 74.4000 (75.2000)  acc5: 94.8000 (94.2286)  time: 0.2849  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.5753 (1.4830)  acc1: 68.4000 (72.1936)  acc5: 90.0000 (91.7161)  time: 0.2851  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.8290 (1.5830)  acc1: 63.2000 (69.7366)  acc5: 86.0000 (90.0488)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.8290 (1.6000)  acc1: 63.2000 (69.2480)  acc5: 86.0000 (89.8080)  time: 0.2845  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3561 s / it)
* Acc@1 69.150 Acc@5 89.676 loss 1.606
Accuracy of the model on the 50000 test images: 69.2%
Max accuracy: 69.42%
Epoch: [28]  [   0/2502]  eta: 1:58:25  lr: 0.003992  min_lr: 0.003992  loss: 3.2914 (3.2914)  weight_decay: 0.0500 (0.0500)  time: 2.8398  data: 2.1448  max mem: 53905
Epoch: [28]  [ 200/2502]  eta: 0:23:10  lr: 0.003992  min_lr: 0.003992  loss: 4.1683 (3.9982)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4966 (0.4963)  time: 0.5935  data: 0.0006  max mem: 53905
Epoch: [28]  [ 400/2502]  eta: 0:20:59  lr: 0.003992  min_lr: 0.003992  loss: 3.9658 (3.9623)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4732 (0.4943)  time: 0.6098  data: 0.0008  max mem: 53905
Epoch: [28]  [ 600/2502]  eta: 0:18:56  lr: 0.003991  min_lr: 0.003991  loss: 4.1001 (3.9713)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4753 (0.4891)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [28]  [ 800/2502]  eta: 0:16:54  lr: 0.003991  min_lr: 0.003991  loss: 4.1410 (3.9711)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4367 (0.4864)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [28]  [1000/2502]  eta: 0:14:54  lr: 0.003991  min_lr: 0.003991  loss: 4.1320 (3.9775)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4542 (0.4914)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [28]  [1200/2502]  eta: 0:12:54  lr: 0.003991  min_lr: 0.003991  loss: 3.8471 (3.9770)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4921 (0.4907)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [28]  [1400/2502]  eta: 0:10:55  lr: 0.003991  min_lr: 0.003991  loss: 4.0911 (3.9762)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4333 (0.4880)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [28]  [1600/2502]  eta: 0:08:56  lr: 0.003991  min_lr: 0.003991  loss: 3.8883 (3.9752)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4822 (0.4886)  time: 0.5925  data: 0.0006  max mem: 53905
Epoch: [28]  [1800/2502]  eta: 0:06:57  lr: 0.003990  min_lr: 0.003990  loss: 4.0100 (3.9797)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4673 (0.4870)  time: 0.5941  data: 0.0006  max mem: 53905
Epoch: [28]  [2000/2502]  eta: 0:04:58  lr: 0.003990  min_lr: 0.003990  loss: 3.5600 (3.9703)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4859 (0.4867)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [28]  [2200/2502]  eta: 0:02:59  lr: 0.003990  min_lr: 0.003990  loss: 4.1410 (3.9707)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4594 (0.4854)  time: 0.5920  data: 0.0007  max mem: 53905
Epoch: [28]  [2400/2502]  eta: 0:01:00  lr: 0.003990  min_lr: 0.003990  loss: 4.0664 (3.9652)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5212 (0.4872)  time: 0.5918  data: 0.0008  max mem: 53905
Epoch: [28]  [2501/2502]  eta: 0:00:00  lr: 0.003990  min_lr: 0.003990  loss: 4.2304 (3.9650)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4430 (0.4856)  time: 0.5329  data: 0.0011  max mem: 53905
Epoch: [28] Total time: 0:24:45 (0.5935 s / it)
Averaged stats: lr: 0.003990  min_lr: 0.003990  loss: 4.2304 (3.9668)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4430 (0.4856)
Test:  [ 0/50]  eta: 0:02:28  loss: 0.9206 (0.9206)  acc1: 87.6000 (87.6000)  acc5: 97.6000 (97.6000)  time: 2.9735  data: 2.6362  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.1475 (1.2231)  acc1: 78.4000 (76.9091)  acc5: 94.4000 (94.4000)  time: 0.5680  data: 0.2811  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.1475 (1.2390)  acc1: 74.0000 (75.6381)  acc5: 94.4000 (94.2857)  time: 0.3051  data: 0.0230  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.4926 (1.3982)  acc1: 68.8000 (72.6452)  acc5: 89.2000 (91.8710)  time: 0.2828  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.7651 (1.4935)  acc1: 65.2000 (70.5561)  acc5: 86.0000 (90.5073)  time: 0.2828  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.7651 (1.5233)  acc1: 65.2000 (69.8960)  acc5: 86.8000 (90.1840)  time: 0.2833  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3477 s / it)
* Acc@1 69.590 Acc@5 90.098 loss 1.526
Accuracy of the model on the 50000 test images: 69.6%
Max accuracy: 69.59%
Epoch: [29]  [   0/2502]  eta: 1:34:48  lr: 0.003990  min_lr: 0.003990  loss: 3.8003 (3.8003)  weight_decay: 0.0500 (0.0500)  time: 2.2737  data: 1.6807  max mem: 53905
Epoch: [29]  [ 200/2502]  eta: 0:23:05  lr: 0.003990  min_lr: 0.003990  loss: 3.7060 (3.9607)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4588 (0.4790)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [29]  [ 400/2502]  eta: 0:20:54  lr: 0.003989  min_lr: 0.003989  loss: 3.9125 (3.9210)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4996 (0.4884)  time: 0.5904  data: 0.0006  max mem: 53905
Epoch: [29]  [ 600/2502]  eta: 0:18:51  lr: 0.003989  min_lr: 0.003989  loss: 3.8224 (3.9147)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4569 (0.4876)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [29]  [ 800/2502]  eta: 0:16:51  lr: 0.003989  min_lr: 0.003989  loss: 3.8149 (3.9167)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5287 (0.4908)  time: 0.5914  data: 0.0008  max mem: 53905
Epoch: [29]  [1000/2502]  eta: 0:14:51  lr: 0.003989  min_lr: 0.003989  loss: 4.1501 (3.9143)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4716 (0.4939)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [29]  [1200/2502]  eta: 0:12:52  lr: 0.003989  min_lr: 0.003989  loss: 4.2848 (3.9175)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4455 (0.4911)  time: 0.5952  data: 0.0005  max mem: 53905
Epoch: [29]  [1400/2502]  eta: 0:10:54  lr: 0.003989  min_lr: 0.003989  loss: 4.0794 (3.9226)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5046 (0.4959)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [29]  [1600/2502]  eta: 0:08:55  lr: 0.003988  min_lr: 0.003988  loss: 4.2998 (3.9327)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4829 (0.4970)  time: 0.5905  data: 0.0006  max mem: 53905
Epoch: [29]  [1800/2502]  eta: 0:06:56  lr: 0.003988  min_lr: 0.003988  loss: 3.9937 (3.9355)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4571 (0.4986)  time: 0.5910  data: 0.0007  max mem: 53905
Epoch: [29]  [2000/2502]  eta: 0:04:57  lr: 0.003988  min_lr: 0.003988  loss: 3.6346 (3.9389)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4634 (0.4956)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [29]  [2200/2502]  eta: 0:02:59  lr: 0.003988  min_lr: 0.003988  loss: 4.1472 (3.9433)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4389 (0.4947)  time: 0.5914  data: 0.0008  max mem: 53905
Epoch: [29]  [2400/2502]  eta: 0:01:00  lr: 0.003988  min_lr: 0.003988  loss: 4.0359 (3.9454)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4728 (0.4937)  time: 0.5916  data: 0.0008  max mem: 53905
Epoch: [29]  [2501/2502]  eta: 0:00:00  lr: 0.003987  min_lr: 0.003987  loss: 4.0877 (3.9428)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4910 (0.4940)  time: 0.5329  data: 0.0006  max mem: 53905
Epoch: [29] Total time: 0:24:43 (0.5928 s / it)
Averaged stats: lr: 0.003987  min_lr: 0.003987  loss: 4.0877 (3.9527)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4910 (0.4940)
Test:  [ 0/50]  eta: 0:02:35  loss: 0.9506 (0.9506)  acc1: 86.8000 (86.8000)  acc5: 96.4000 (96.4000)  time: 3.1172  data: 2.7917  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.2237 (1.2554)  acc1: 78.4000 (76.0000)  acc5: 95.2000 (93.9636)  time: 0.5698  data: 0.2821  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.2270 (1.2660)  acc1: 73.2000 (75.5619)  acc5: 94.4000 (94.1905)  time: 0.3001  data: 0.0158  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.5050 (1.4362)  acc1: 71.2000 (72.5290)  acc5: 89.6000 (91.7677)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.8438 (1.5482)  acc1: 64.0000 (70.1854)  acc5: 85.6000 (90.2634)  time: 0.2847  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.8438 (1.5797)  acc1: 64.4000 (69.6880)  acc5: 85.6000 (89.8480)  time: 0.2844  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3496 s / it)
* Acc@1 69.690 Acc@5 90.112 loss 1.579
Accuracy of the model on the 50000 test images: 69.7%
Max accuracy: 69.69%
Epoch: [30]  [   0/2502]  eta: 1:30:37  lr: 0.003987  min_lr: 0.003987  loss: 4.4600 (4.4600)  weight_decay: 0.0500 (0.0500)  time: 2.1734  data: 1.5805  max mem: 53905
Epoch: [30]  [ 200/2502]  eta: 0:23:03  lr: 0.003987  min_lr: 0.003987  loss: 4.0448 (3.9501)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4309 (0.4849)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [30]  [ 400/2502]  eta: 0:20:57  lr: 0.003987  min_lr: 0.003987  loss: 4.0679 (3.9362)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4948 (0.4933)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [30]  [ 600/2502]  eta: 0:18:53  lr: 0.003987  min_lr: 0.003987  loss: 4.0493 (3.9394)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4805 (0.4972)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [30]  [ 800/2502]  eta: 0:16:52  lr: 0.003987  min_lr: 0.003987  loss: 3.9902 (3.9410)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5179 (0.4977)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [30]  [1000/2502]  eta: 0:14:53  lr: 0.003986  min_lr: 0.003986  loss: 4.0132 (3.9338)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5141 (0.5008)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [30]  [1200/2502]  eta: 0:12:53  lr: 0.003986  min_lr: 0.003986  loss: 4.0361 (3.9390)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4712 (0.4985)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [30]  [1400/2502]  eta: 0:10:54  lr: 0.003986  min_lr: 0.003986  loss: 4.0996 (3.9352)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5204 (0.5007)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [30]  [1600/2502]  eta: 0:08:55  lr: 0.003986  min_lr: 0.003986  loss: 3.9409 (3.9420)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4745 (0.5005)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [30]  [1800/2502]  eta: 0:06:56  lr: 0.003986  min_lr: 0.003986  loss: 4.1144 (3.9386)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4772 (0.4976)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [30]  [2000/2502]  eta: 0:04:58  lr: 0.003985  min_lr: 0.003985  loss: 3.9147 (3.9353)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4983 (0.4967)  time: 0.6000  data: 0.0004  max mem: 53905
Epoch: [30]  [2200/2502]  eta: 0:02:59  lr: 0.003985  min_lr: 0.003985  loss: 3.7176 (3.9360)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5054 (0.4972)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [30]  [2400/2502]  eta: 0:01:00  lr: 0.003985  min_lr: 0.003985  loss: 4.0468 (3.9356)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5272 (0.4997)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [30]  [2501/2502]  eta: 0:00:00  lr: 0.003985  min_lr: 0.003985  loss: 4.0188 (3.9380)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4713 (0.4996)  time: 0.5323  data: 0.0008  max mem: 53905
Epoch: [30] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.003985  min_lr: 0.003985  loss: 4.0188 (3.9323)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4713 (0.4996)
Test:  [ 0/50]  eta: 0:03:04  loss: 0.7369 (0.7369)  acc1: 87.2000 (87.2000)  acc5: 97.2000 (97.2000)  time: 3.6870  data: 3.3653  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.0650 (1.0934)  acc1: 78.8000 (78.1455)  acc5: 94.4000 (94.2545)  time: 0.5912  data: 0.3063  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0650 (1.1217)  acc1: 75.2000 (76.7238)  acc5: 94.4000 (94.3429)  time: 0.2818  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.5343 (1.3024)  acc1: 69.6000 (73.6903)  acc5: 88.4000 (92.1548)  time: 0.2822  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.7558 (1.4314)  acc1: 65.2000 (71.2195)  acc5: 85.6000 (90.5171)  time: 0.2821  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.7704 (1.4739)  acc1: 64.8000 (70.4720)  acc5: 85.2000 (90.1600)  time: 0.2819  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3526 s / it)
* Acc@1 70.320 Acc@5 90.204 loss 1.476
Accuracy of the model on the 50000 test images: 70.3%
Max accuracy: 70.32%
Epoch: [31]  [   0/2502]  eta: 1:39:47  lr: 0.003985  min_lr: 0.003985  loss: 4.5473 (4.5473)  weight_decay: 0.0500 (0.0500)  time: 2.3931  data: 1.7859  max mem: 53905
Epoch: [31]  [ 200/2502]  eta: 0:23:04  lr: 0.003985  min_lr: 0.003985  loss: 4.0817 (3.9736)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4897 (0.4774)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [31]  [ 400/2502]  eta: 0:20:53  lr: 0.003984  min_lr: 0.003984  loss: 4.1795 (3.9557)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4823 (0.4847)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [31]  [ 600/2502]  eta: 0:18:51  lr: 0.003984  min_lr: 0.003984  loss: 3.9687 (3.9237)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5089 (0.4901)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [31]  [ 800/2502]  eta: 0:16:51  lr: 0.003984  min_lr: 0.003984  loss: 4.0392 (3.9194)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5142 (0.4948)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [31]  [1000/2502]  eta: 0:14:51  lr: 0.003984  min_lr: 0.003984  loss: 4.2392 (3.9165)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4517 (0.4948)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [31]  [1200/2502]  eta: 0:12:53  lr: 0.003983  min_lr: 0.003983  loss: 3.9101 (3.9198)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.6033  data: 0.0004  max mem: 53905
Epoch: [31]  [1400/2502]  eta: 0:10:54  lr: 0.003983  min_lr: 0.003983  loss: 3.9636 (3.9120)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4991 (nan)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [31]  [1600/2502]  eta: 0:08:55  lr: 0.003983  min_lr: 0.003983  loss: 3.7627 (3.9056)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5001 (nan)  time: 0.5977  data: 0.0004  max mem: 53905
Epoch: [31]  [1800/2502]  eta: 0:06:56  lr: 0.003983  min_lr: 0.003983  loss: 4.0162 (3.9023)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4999 (nan)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [31]  [2000/2502]  eta: 0:04:57  lr: 0.003983  min_lr: 0.003983  loss: 4.0351 (3.9100)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4901 (nan)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [31]  [2200/2502]  eta: 0:02:59  lr: 0.003982  min_lr: 0.003982  loss: 4.1281 (3.9097)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4850 (nan)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [31]  [2400/2502]  eta: 0:01:00  lr: 0.003982  min_lr: 0.003982  loss: 4.0847 (3.9131)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4909 (nan)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [31]  [2501/2502]  eta: 0:00:00  lr: 0.003982  min_lr: 0.003982  loss: 3.7605 (3.9104)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4683 (nan)  time: 0.5327  data: 0.0008  max mem: 53905
Epoch: [31] Total time: 0:24:43 (0.5929 s / it)
Averaged stats: lr: 0.003982  min_lr: 0.003982  loss: 3.7605 (3.9249)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4683 (nan)
Test:  [ 0/50]  eta: 0:02:27  loss: 0.8085 (0.8085)  acc1: 86.4000 (86.4000)  acc5: 98.4000 (98.4000)  time: 2.9530  data: 2.6345  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 1.0894 (1.1531)  acc1: 79.2000 (76.6182)  acc5: 94.0000 (94.1091)  time: 0.5256  data: 0.2400  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.0985 (1.1432)  acc1: 76.4000 (76.7048)  acc5: 94.0000 (94.5905)  time: 0.2830  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3593 (1.3095)  acc1: 69.6000 (73.7548)  acc5: 91.6000 (92.2323)  time: 0.2833  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.6686 (1.4028)  acc1: 67.2000 (71.7463)  acc5: 86.0000 (90.8878)  time: 0.2831  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5961 (1.4199)  acc1: 65.6000 (71.4000)  acc5: 88.0000 (90.8080)  time: 0.2830  data: 0.0002  max mem: 53905
Test: Total time: 0:00:16 (0.3388 s / it)
* Acc@1 71.220 Acc@5 90.866 loss 1.424
Accuracy of the model on the 50000 test images: 71.2%
Max accuracy: 71.22%
Epoch: [32]  [   0/2502]  eta: 1:37:58  lr: 0.003982  min_lr: 0.003982  loss: 4.4836 (4.4836)  weight_decay: 0.0500 (0.0500)  time: 2.3497  data: 1.7451  max mem: 53905
Epoch: [32]  [ 200/2502]  eta: 0:23:03  lr: 0.003982  min_lr: 0.003982  loss: 4.1019 (3.8877)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4954 (0.4901)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [32]  [ 400/2502]  eta: 0:20:56  lr: 0.003981  min_lr: 0.003981  loss: 3.7964 (3.9347)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4980 (0.5006)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [32]  [ 600/2502]  eta: 0:18:53  lr: 0.003981  min_lr: 0.003981  loss: 4.2618 (3.9436)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4823 (0.5023)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [32]  [ 800/2502]  eta: 0:16:52  lr: 0.003981  min_lr: 0.003981  loss: 3.8962 (3.9389)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4414 (0.4980)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [32]  [1000/2502]  eta: 0:14:53  lr: 0.003981  min_lr: 0.003981  loss: 4.1873 (3.9457)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4807 (0.4960)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [32]  [1200/2502]  eta: 0:12:53  lr: 0.003980  min_lr: 0.003980  loss: 4.2098 (3.9339)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4812 (0.4998)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [32]  [1400/2502]  eta: 0:10:54  lr: 0.003980  min_lr: 0.003980  loss: 3.8765 (3.9307)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4709 (0.5007)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [32]  [1600/2502]  eta: 0:08:55  lr: 0.003980  min_lr: 0.003980  loss: 4.0597 (3.9331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5294 (0.5035)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [32]  [1800/2502]  eta: 0:06:56  lr: 0.003980  min_lr: 0.003980  loss: 4.0468 (3.9351)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4508 (0.5014)  time: 0.5905  data: 0.0005  max mem: 53905
Epoch: [32]  [2000/2502]  eta: 0:04:58  lr: 0.003979  min_lr: 0.003979  loss: 3.9801 (3.9362)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4950 (0.5001)  time: 0.6071  data: 0.0006  max mem: 53905
Epoch: [32]  [2200/2502]  eta: 0:02:59  lr: 0.003979  min_lr: 0.003979  loss: 3.9037 (3.9324)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4936 (0.5000)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [32]  [2400/2502]  eta: 0:01:00  lr: 0.003979  min_lr: 0.003979  loss: 4.0792 (3.9291)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4953 (0.5011)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [32]  [2501/2502]  eta: 0:00:00  lr: 0.003979  min_lr: 0.003979  loss: 3.7147 (3.9263)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4715 (0.5010)  time: 0.5331  data: 0.0007  max mem: 53905
Epoch: [32] Total time: 0:24:43 (0.5930 s / it)
Averaged stats: lr: 0.003979  min_lr: 0.003979  loss: 3.7147 (3.9117)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4715 (0.5010)
Test:  [ 0/50]  eta: 0:02:31  loss: 0.8865 (0.8865)  acc1: 87.6000 (87.6000)  acc5: 97.2000 (97.2000)  time: 3.0292  data: 2.6978  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 1.0608 (1.1013)  acc1: 78.8000 (78.1091)  acc5: 95.6000 (94.7636)  time: 0.5427  data: 0.2538  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.1311 (1.1268)  acc1: 76.8000 (77.6191)  acc5: 95.2000 (94.7810)  time: 0.2899  data: 0.0050  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2869 (1.2910)  acc1: 70.0000 (74.3355)  acc5: 90.4000 (92.5936)  time: 0.2856  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.7011 (1.4007)  acc1: 65.6000 (71.9805)  acc5: 87.2000 (91.1512)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.7011 (1.4304)  acc1: 65.6000 (70.9920)  acc5: 87.2000 (90.7200)  time: 0.2855  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3441 s / it)
* Acc@1 70.724 Acc@5 90.650 loss 1.437
Accuracy of the model on the 50000 test images: 70.7%
Max accuracy: 71.22%
Epoch: [33]  [   0/2502]  eta: 1:52:29  lr: 0.003979  min_lr: 0.003979  loss: 3.1533 (3.1533)  weight_decay: 0.0500 (0.0500)  time: 2.6977  data: 1.7482  max mem: 53905
Epoch: [33]  [ 200/2502]  eta: 0:23:11  lr: 0.003979  min_lr: 0.003979  loss: 3.9600 (3.8962)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4826 (0.5107)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [33]  [ 400/2502]  eta: 0:20:58  lr: 0.003978  min_lr: 0.003978  loss: 4.2258 (3.9331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5127 (0.5214)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [33]  [ 600/2502]  eta: 0:18:54  lr: 0.003978  min_lr: 0.003978  loss: 3.9304 (3.9433)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5087 (0.5160)  time: 0.5967  data: 0.0004  max mem: 53905
Epoch: [33]  [ 800/2502]  eta: 0:16:53  lr: 0.003978  min_lr: 0.003978  loss: 3.9465 (3.9226)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5356 (0.5179)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [33]  [1000/2502]  eta: 0:14:53  lr: 0.003977  min_lr: 0.003977  loss: 3.6782 (3.9082)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5089 (0.5136)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [33]  [1200/2502]  eta: 0:12:54  lr: 0.003977  min_lr: 0.003977  loss: 4.0880 (3.9103)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5221 (0.5146)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [33]  [1400/2502]  eta: 0:10:55  lr: 0.003977  min_lr: 0.003977  loss: 4.1244 (3.9075)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5518 (0.5168)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [33]  [1600/2502]  eta: 0:08:56  lr: 0.003977  min_lr: 0.003977  loss: 3.4615 (3.9057)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5549 (0.5176)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [33]  [1800/2502]  eta: 0:06:57  lr: 0.003976  min_lr: 0.003976  loss: 3.7795 (3.9063)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4596 (0.5143)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [33]  [2000/2502]  eta: 0:04:58  lr: 0.003976  min_lr: 0.003976  loss: 4.1926 (3.9019)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4646 (0.5116)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [33]  [2200/2502]  eta: 0:02:59  lr: 0.003976  min_lr: 0.003976  loss: 4.1683 (3.9034)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4739 (0.5102)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [33]  [2400/2502]  eta: 0:01:00  lr: 0.003976  min_lr: 0.003976  loss: 4.1458 (3.9056)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4946 (0.5104)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [33]  [2501/2502]  eta: 0:00:00  lr: 0.003975  min_lr: 0.003975  loss: 3.9728 (3.9036)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5183 (0.5115)  time: 0.5409  data: 0.0008  max mem: 53905
Epoch: [33] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.003975  min_lr: 0.003975  loss: 3.9728 (3.8956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5183 (0.5115)
Test:  [ 0/50]  eta: 0:04:42  loss: 0.9228 (0.9228)  acc1: 86.8000 (86.8000)  acc5: 97.6000 (97.6000)  time: 5.6504  data: 5.3033  max mem: 53905
Test:  [10/50]  eta: 0:00:34  loss: 1.1934 (1.2297)  acc1: 76.8000 (77.2364)  acc5: 95.2000 (94.3273)  time: 0.8628  data: 0.5608  max mem: 53905
Test:  [20/50]  eta: 0:00:18  loss: 1.2276 (1.2596)  acc1: 74.4000 (76.5143)  acc5: 94.4000 (94.3810)  time: 0.3514  data: 0.0570  max mem: 53905
Test:  [30/50]  eta: 0:00:10  loss: 1.5040 (1.3957)  acc1: 70.4000 (73.2387)  acc5: 90.0000 (92.0645)  time: 0.3059  data: 0.0140  max mem: 53905
Test:  [40/50]  eta: 0:00:04  loss: 1.7120 (1.4806)  acc1: 65.2000 (71.2781)  acc5: 86.8000 (90.8098)  time: 0.2918  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.6823 (1.4975)  acc1: 66.4000 (70.8720)  acc5: 87.6000 (90.6000)  time: 0.2879  data: 0.0002  max mem: 53905
Test: Total time: 0:00:21 (0.4238 s / it)
* Acc@1 70.854 Acc@5 90.796 loss 1.500
Accuracy of the model on the 50000 test images: 70.9%
Max accuracy: 71.22%
Epoch: [34]  [   0/2502]  eta: 1:41:56  lr: 0.003975  min_lr: 0.003975  loss: 4.2687 (4.2687)  weight_decay: 0.0500 (0.0500)  time: 2.4446  data: 1.8461  max mem: 53905
Epoch: [34]  [ 200/2502]  eta: 0:23:02  lr: 0.003975  min_lr: 0.003975  loss: 3.9058 (3.8690)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4842 (0.4899)  time: 0.5923  data: 0.0008  max mem: 53905
Epoch: [34]  [ 400/2502]  eta: 0:20:54  lr: 0.003975  min_lr: 0.003975  loss: 3.7851 (3.8402)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5000 (nan)  time: 0.5909  data: 0.0008  max mem: 53905
Epoch: [34]  [ 600/2502]  eta: 0:18:52  lr: 0.003975  min_lr: 0.003975  loss: 3.8518 (3.8623)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5115 (nan)  time: 0.5911  data: 0.0007  max mem: 53905
Epoch: [34]  [ 800/2502]  eta: 0:16:51  lr: 0.003974  min_lr: 0.003974  loss: 3.6663 (3.8710)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4879 (nan)  time: 0.5925  data: 0.0006  max mem: 53905
Epoch: [34]  [1000/2502]  eta: 0:14:52  lr: 0.003974  min_lr: 0.003974  loss: 3.9763 (3.8797)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4883 (nan)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [34]  [1200/2502]  eta: 0:12:53  lr: 0.003974  min_lr: 0.003974  loss: 3.9515 (3.8654)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4869 (nan)  time: 0.5924  data: 0.0009  max mem: 53905
Epoch: [34]  [1400/2502]  eta: 0:10:54  lr: 0.003973  min_lr: 0.003973  loss: 3.9069 (3.8711)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4915 (nan)  time: 0.5978  data: 0.0006  max mem: 53905
Epoch: [34]  [1600/2502]  eta: 0:08:55  lr: 0.003973  min_lr: 0.003973  loss: 4.1047 (3.8741)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5132 (nan)  time: 0.5916  data: 0.0007  max mem: 53905
Epoch: [34]  [1800/2502]  eta: 0:06:56  lr: 0.003973  min_lr: 0.003973  loss: 3.6472 (3.8749)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4897 (nan)  time: 0.5923  data: 0.0007  max mem: 53905
Epoch: [34]  [2000/2502]  eta: 0:04:58  lr: 0.003972  min_lr: 0.003972  loss: 4.0967 (3.8810)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4751 (nan)  time: 0.5936  data: 0.0006  max mem: 53905
Epoch: [34]  [2200/2502]  eta: 0:02:59  lr: 0.003972  min_lr: 0.003972  loss: 4.0471 (3.8769)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5120 (nan)  time: 0.5927  data: 0.0006  max mem: 53905
Epoch: [34]  [2400/2502]  eta: 0:01:00  lr: 0.003972  min_lr: 0.003972  loss: 3.6523 (3.8784)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5291 (nan)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [34]  [2501/2502]  eta: 0:00:00  lr: 0.003972  min_lr: 0.003972  loss: 4.0276 (3.8778)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4612 (nan)  time: 0.5381  data: 0.0009  max mem: 53905
Epoch: [34] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.003972  min_lr: 0.003972  loss: 4.0276 (3.8912)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4612 (nan)
Test:  [ 0/50]  eta: 0:03:03  loss: 0.8125 (0.8125)  acc1: 89.6000 (89.6000)  acc5: 96.4000 (96.4000)  time: 3.6621  data: 3.3465  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.1284 (1.1667)  acc1: 79.2000 (78.8727)  acc5: 94.4000 (94.3636)  time: 0.5917  data: 0.3046  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.1080 (1.1659)  acc1: 75.2000 (77.5048)  acc5: 94.4000 (94.6095)  time: 0.2849  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3476 (1.3294)  acc1: 70.0000 (74.1677)  acc5: 90.4000 (92.3613)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.7008 (1.4317)  acc1: 65.6000 (71.7756)  acc5: 86.8000 (90.9463)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.6783 (1.4585)  acc1: 64.4000 (70.9440)  acc5: 86.4000 (90.6880)  time: 0.2845  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3550 s / it)
* Acc@1 71.160 Acc@5 91.006 loss 1.453
Accuracy of the model on the 50000 test images: 71.2%
Max accuracy: 71.22%
Epoch: [35]  [   0/2502]  eta: 1:45:59  lr: 0.003972  min_lr: 0.003972  loss: 4.5909 (4.5909)  weight_decay: 0.0500 (0.0500)  time: 2.5417  data: 1.5412  max mem: 53905
Epoch: [35]  [ 200/2502]  eta: 0:23:09  lr: 0.003971  min_lr: 0.003971  loss: 3.8933 (3.8166)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5185 (0.5290)  time: 0.5918  data: 0.0008  max mem: 53905
Epoch: [35]  [ 400/2502]  eta: 0:20:56  lr: 0.003971  min_lr: 0.003971  loss: 3.9852 (3.8316)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5006 (0.5151)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [35]  [ 600/2502]  eta: 0:18:53  lr: 0.003971  min_lr: 0.003971  loss: 4.0254 (3.8394)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5527 (0.5296)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [35]  [ 800/2502]  eta: 0:16:53  lr: 0.003971  min_lr: 0.003971  loss: 4.0526 (3.8482)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4878 (0.5271)  time: 0.5927  data: 0.0008  max mem: 53905
Epoch: [35]  [1000/2502]  eta: 0:14:53  lr: 0.003970  min_lr: 0.003970  loss: 4.2444 (3.8522)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5082 (0.5264)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [35]  [1200/2502]  eta: 0:12:53  lr: 0.003970  min_lr: 0.003970  loss: 3.7727 (3.8560)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4839 (0.5235)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [35]  [1400/2502]  eta: 0:10:54  lr: 0.003970  min_lr: 0.003970  loss: 3.8365 (3.8600)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4641 (0.5192)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [35]  [1600/2502]  eta: 0:08:55  lr: 0.003969  min_lr: 0.003969  loss: 4.1377 (3.8561)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4953 (0.5173)  time: 0.5917  data: 0.0007  max mem: 53905
Epoch: [35]  [1800/2502]  eta: 0:06:56  lr: 0.003969  min_lr: 0.003969  loss: 3.9053 (3.8580)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4923 (0.5152)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [35]  [2000/2502]  eta: 0:04:57  lr: 0.003969  min_lr: 0.003969  loss: 3.9612 (3.8579)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5597 (0.5182)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [35]  [2200/2502]  eta: 0:02:59  lr: 0.003968  min_lr: 0.003968  loss: 3.4525 (3.8637)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4761 (0.5161)  time: 0.5986  data: 0.0005  max mem: 53905
Epoch: [35]  [2400/2502]  eta: 0:01:00  lr: 0.003968  min_lr: 0.003968  loss: 3.9487 (3.8641)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4847 (0.5159)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [35]  [2501/2502]  eta: 0:00:00  lr: 0.003968  min_lr: 0.003968  loss: 3.6765 (3.8636)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5077 (0.5167)  time: 0.5328  data: 0.0008  max mem: 53905
Epoch: [35] Total time: 0:24:43 (0.5930 s / it)
Averaged stats: lr: 0.003968  min_lr: 0.003968  loss: 3.6765 (3.8690)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5077 (0.5167)
Test:  [ 0/50]  eta: 0:02:31  loss: 0.8272 (0.8272)  acc1: 89.2000 (89.2000)  acc5: 96.8000 (96.8000)  time: 3.0345  data: 2.7141  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.0820 (1.0929)  acc1: 80.8000 (79.6364)  acc5: 94.8000 (94.8364)  time: 0.5601  data: 0.2727  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.0824 (1.1125)  acc1: 77.6000 (78.6095)  acc5: 95.2000 (95.0095)  time: 0.2989  data: 0.0145  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3152 (1.2379)  acc1: 74.0000 (75.4323)  acc5: 91.2000 (92.9936)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5621 (1.3200)  acc1: 66.8000 (73.0537)  acc5: 88.0000 (91.7171)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5786 (1.3487)  acc1: 66.4000 (72.3040)  acc5: 88.0000 (91.3680)  time: 0.2846  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3476 s / it)
* Acc@1 71.870 Acc@5 91.280 loss 1.362
Accuracy of the model on the 50000 test images: 71.9%
Max accuracy: 71.87%
Epoch: [36]  [   0/2502]  eta: 1:35:42  lr: 0.003968  min_lr: 0.003968  loss: 3.4358 (3.4358)  weight_decay: 0.0500 (0.0500)  time: 2.2950  data: 1.6997  max mem: 53905
Epoch: [36]  [ 200/2502]  eta: 0:23:04  lr: 0.003968  min_lr: 0.003968  loss: 3.9063 (3.8348)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4893 (0.5062)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [36]  [ 400/2502]  eta: 0:20:56  lr: 0.003967  min_lr: 0.003967  loss: 4.0122 (3.8481)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4616 (0.5059)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [36]  [ 600/2502]  eta: 0:18:53  lr: 0.003967  min_lr: 0.003967  loss: 4.0066 (3.8373)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5094 (0.5099)  time: 0.5912  data: 0.0006  max mem: 53905
Epoch: [36]  [ 800/2502]  eta: 0:16:52  lr: 0.003967  min_lr: 0.003967  loss: 3.7685 (3.8140)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4552 (0.5020)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [36]  [1000/2502]  eta: 0:14:53  lr: 0.003966  min_lr: 0.003966  loss: 3.8335 (3.8138)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5080 (0.5027)  time: 0.5918  data: 0.0007  max mem: 53905
Epoch: [36]  [1200/2502]  eta: 0:12:53  lr: 0.003966  min_lr: 0.003966  loss: 3.5569 (3.8194)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4949 (0.5043)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [36]  [1400/2502]  eta: 0:10:54  lr: 0.003966  min_lr: 0.003966  loss: 4.0866 (3.8300)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4867 (0.5064)  time: 0.6019  data: 0.0005  max mem: 53905
Epoch: [36]  [1600/2502]  eta: 0:08:55  lr: 0.003965  min_lr: 0.003965  loss: 4.1738 (3.8342)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4598 (0.5043)  time: 0.6000  data: 0.0009  max mem: 53905
Epoch: [36]  [1800/2502]  eta: 0:06:56  lr: 0.003965  min_lr: 0.003965  loss: 4.1464 (3.8368)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4554 (0.5020)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [36]  [2000/2502]  eta: 0:04:58  lr: 0.003965  min_lr: 0.003965  loss: 3.7829 (3.8367)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5136 (0.5051)  time: 0.5914  data: 0.0007  max mem: 53905
Epoch: [36]  [2200/2502]  eta: 0:02:59  lr: 0.003964  min_lr: 0.003964  loss: 3.8530 (3.8387)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4711 (0.5044)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [36]  [2400/2502]  eta: 0:01:00  lr: 0.003964  min_lr: 0.003964  loss: 4.2036 (3.8397)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5049 (0.5049)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [36]  [2501/2502]  eta: 0:00:00  lr: 0.003964  min_lr: 0.003964  loss: 3.9918 (3.8393)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5097 (0.5057)  time: 0.5388  data: 0.0008  max mem: 53905
Epoch: [36] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.003964  min_lr: 0.003964  loss: 3.9918 (3.8547)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5097 (0.5057)
Test:  [ 0/50]  eta: 0:02:31  loss: 0.8136 (0.8136)  acc1: 88.0000 (88.0000)  acc5: 98.4000 (98.4000)  time: 3.0329  data: 2.7017  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 1.0905 (1.1604)  acc1: 80.8000 (78.8364)  acc5: 96.0000 (95.1636)  time: 0.5406  data: 0.2518  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.2152 (1.1930)  acc1: 76.4000 (77.4667)  acc5: 95.6000 (95.1429)  time: 0.2885  data: 0.0037  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3653 (1.3401)  acc1: 71.2000 (74.6065)  acc5: 91.6000 (92.9806)  time: 0.2855  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.7158 (1.4394)  acc1: 65.2000 (72.1073)  acc5: 87.6000 (91.5317)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.7158 (1.4563)  acc1: 65.6000 (71.7600)  acc5: 88.8000 (91.3040)  time: 0.2849  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3434 s / it)
* Acc@1 71.922 Acc@5 91.236 loss 1.461
Accuracy of the model on the 50000 test images: 71.9%
Max accuracy: 71.92%
Epoch: [37]  [   0/2502]  eta: 1:49:56  lr: 0.003964  min_lr: 0.003964  loss: 3.2037 (3.2037)  weight_decay: 0.0500 (0.0500)  time: 2.6366  data: 2.0423  max mem: 53905
Epoch: [37]  [ 200/2502]  eta: 0:23:07  lr: 0.003963  min_lr: 0.003963  loss: 3.8046 (3.8254)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4667 (0.5071)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [37]  [ 400/2502]  eta: 0:20:57  lr: 0.003963  min_lr: 0.003963  loss: 3.8366 (3.8260)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5271 (0.5107)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [37]  [ 600/2502]  eta: 0:18:55  lr: 0.003963  min_lr: 0.003963  loss: 4.0773 (3.8308)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4957 (0.5176)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [37]  [ 800/2502]  eta: 0:16:54  lr: 0.003962  min_lr: 0.003962  loss: 3.5814 (3.8385)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4865 (0.5191)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [37]  [1000/2502]  eta: 0:14:54  lr: 0.003962  min_lr: 0.003962  loss: 4.0552 (3.8291)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4779 (0.5176)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [37]  [1200/2502]  eta: 0:12:54  lr: 0.003962  min_lr: 0.003962  loss: 3.8396 (3.8268)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5199 (0.5188)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [37]  [1400/2502]  eta: 0:10:55  lr: 0.003961  min_lr: 0.003961  loss: 3.9699 (3.8265)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5503 (0.5204)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [37]  [1600/2502]  eta: 0:08:56  lr: 0.003961  min_lr: 0.003961  loss: 3.3678 (3.8228)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5021 (0.5194)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [37]  [1800/2502]  eta: 0:06:57  lr: 0.003961  min_lr: 0.003961  loss: 3.9391 (3.8188)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4747 (0.5198)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [37]  [2000/2502]  eta: 0:04:58  lr: 0.003960  min_lr: 0.003960  loss: 3.8702 (3.8120)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5315 (0.5233)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [37]  [2200/2502]  eta: 0:02:59  lr: 0.003960  min_lr: 0.003960  loss: 4.0181 (3.8157)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5316 (0.5248)  time: 0.6048  data: 0.0004  max mem: 53905
Epoch: [37]  [2400/2502]  eta: 0:01:00  lr: 0.003960  min_lr: 0.003960  loss: 3.8894 (3.8190)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4998 (0.5236)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [37]  [2501/2502]  eta: 0:00:00  lr: 0.003959  min_lr: 0.003959  loss: 4.1506 (3.8224)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4654 (0.5217)  time: 0.5323  data: 0.0007  max mem: 53905
Epoch: [37] Total time: 0:24:45 (0.5936 s / it)
Averaged stats: lr: 0.003959  min_lr: 0.003959  loss: 4.1506 (3.8367)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4654 (0.5217)
Test:  [ 0/50]  eta: 0:02:53  loss: 0.8680 (0.8680)  acc1: 88.4000 (88.4000)  acc5: 98.4000 (98.4000)  time: 3.4653  data: 3.1579  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.1065 (1.1570)  acc1: 78.8000 (78.7273)  acc5: 95.2000 (95.0182)  time: 0.5730  data: 0.2875  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.1939 (1.1747)  acc1: 76.0000 (77.9429)  acc5: 95.6000 (95.3333)  time: 0.2839  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3410 (1.3329)  acc1: 71.2000 (75.1871)  acc5: 91.2000 (93.1226)  time: 0.2842  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.7106 (1.4240)  acc1: 66.0000 (72.9073)  acc5: 88.0000 (91.8244)  time: 0.2841  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.6770 (1.4480)  acc1: 66.0000 (72.3200)  acc5: 87.2000 (91.4240)  time: 0.2839  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3504 s / it)
* Acc@1 71.952 Acc@5 91.294 loss 1.460
Accuracy of the model on the 50000 test images: 72.0%
Max accuracy: 71.95%
Epoch: [38]  [   0/2502]  eta: 1:44:02  lr: 0.003959  min_lr: 0.003959  loss: 3.9369 (3.9369)  weight_decay: 0.0500 (0.0500)  time: 2.4948  data: 1.8853  max mem: 53905
Epoch: [38]  [ 200/2502]  eta: 0:23:03  lr: 0.003959  min_lr: 0.003959  loss: 3.8538 (3.8258)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5274 (0.5178)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [38]  [ 400/2502]  eta: 0:20:56  lr: 0.003959  min_lr: 0.003959  loss: 3.8952 (3.8551)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5084 (0.5182)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [38]  [ 600/2502]  eta: 0:18:53  lr: 0.003958  min_lr: 0.003958  loss: 4.0217 (3.8517)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4932 (0.5184)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [38]  [ 800/2502]  eta: 0:16:52  lr: 0.003958  min_lr: 0.003958  loss: 4.0193 (3.8524)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4945 (0.5216)  time: 0.5991  data: 0.0005  max mem: 53905
Epoch: [38]  [1000/2502]  eta: 0:14:53  lr: 0.003958  min_lr: 0.003958  loss: 4.0869 (3.8606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4893 (0.5171)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [38]  [1200/2502]  eta: 0:12:53  lr: 0.003957  min_lr: 0.003957  loss: 3.9840 (3.8581)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5469 (0.5208)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [38]  [1400/2502]  eta: 0:10:55  lr: 0.003957  min_lr: 0.003957  loss: 3.9692 (3.8580)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5107 (0.5185)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [38]  [1600/2502]  eta: 0:08:56  lr: 0.003956  min_lr: 0.003956  loss: 3.8044 (3.8509)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5812 (0.5232)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [38]  [1800/2502]  eta: 0:06:57  lr: 0.003956  min_lr: 0.003956  loss: 3.8549 (3.8521)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5085 (0.5232)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [38]  [2000/2502]  eta: 0:04:58  lr: 0.003956  min_lr: 0.003956  loss: 3.9101 (3.8520)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4656 (0.5206)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [38]  [2200/2502]  eta: 0:02:59  lr: 0.003955  min_lr: 0.003955  loss: 3.7747 (3.8471)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5440 (0.5217)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [38]  [2400/2502]  eta: 0:01:00  lr: 0.003955  min_lr: 0.003955  loss: 3.9242 (3.8465)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4871 (0.5210)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [38]  [2501/2502]  eta: 0:00:00  lr: 0.003955  min_lr: 0.003955  loss: 3.9453 (3.8450)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5131 (0.5216)  time: 0.5338  data: 0.0007  max mem: 53905
Epoch: [38] Total time: 0:24:45 (0.5938 s / it)
Averaged stats: lr: 0.003955  min_lr: 0.003955  loss: 3.9453 (3.8381)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5131 (0.5216)
Test:  [ 0/50]  eta: 0:02:44  loss: 0.8773 (0.8773)  acc1: 89.2000 (89.2000)  acc5: 98.4000 (98.4000)  time: 3.2819  data: 2.9588  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.1754 (1.1846)  acc1: 80.8000 (78.9091)  acc5: 95.2000 (94.8000)  time: 0.5819  data: 0.2900  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.1740 (1.1832)  acc1: 78.0000 (78.3619)  acc5: 95.2000 (95.1429)  time: 0.3027  data: 0.0118  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3919 (1.3301)  acc1: 71.2000 (75.1355)  acc5: 92.4000 (93.0968)  time: 0.2895  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.6831 (1.4279)  acc1: 67.2000 (72.9463)  acc5: 88.4000 (92.0098)  time: 0.2851  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.6929 (1.4595)  acc1: 66.8000 (72.3120)  acc5: 88.4000 (91.5520)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3543 s / it)
* Acc@1 72.098 Acc@5 91.532 loss 1.464
Accuracy of the model on the 50000 test images: 72.1%
Max accuracy: 72.10%
Epoch: [39]  [   0/2502]  eta: 1:29:09  lr: 0.003955  min_lr: 0.003955  loss: 4.3361 (4.3361)  weight_decay: 0.0500 (0.0500)  time: 2.1383  data: 1.5277  max mem: 53905
Epoch: [39]  [ 200/2502]  eta: 0:23:02  lr: 0.003954  min_lr: 0.003954  loss: 3.9263 (3.8556)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5002 (0.5253)  time: 0.5937  data: 0.0005  max mem: 53905
Epoch: [39]  [ 400/2502]  eta: 0:20:54  lr: 0.003954  min_lr: 0.003954  loss: 3.9358 (3.8894)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4956 (0.5117)  time: 0.5935  data: 0.0006  max mem: 53905
Epoch: [39]  [ 600/2502]  eta: 0:18:54  lr: 0.003954  min_lr: 0.003954  loss: 4.0210 (3.8629)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5558 (0.5109)  time: 0.5937  data: 0.0004  max mem: 53905
Epoch: [39]  [ 800/2502]  eta: 0:16:53  lr: 0.003953  min_lr: 0.003953  loss: 3.8502 (3.8558)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5224 (0.5090)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [39]  [1000/2502]  eta: 0:14:53  lr: 0.003953  min_lr: 0.003953  loss: 3.8820 (3.8463)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5226 (0.5152)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [39]  [1200/2502]  eta: 0:12:54  lr: 0.003952  min_lr: 0.003952  loss: 3.4451 (3.8393)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4766 (0.5164)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [39]  [1400/2502]  eta: 0:10:55  lr: 0.003952  min_lr: 0.003952  loss: 3.9961 (3.8528)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4886 (0.5144)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [39]  [1600/2502]  eta: 0:08:56  lr: 0.003952  min_lr: 0.003952  loss: 4.0258 (3.8393)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4863 (0.5160)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [39]  [1800/2502]  eta: 0:06:57  lr: 0.003951  min_lr: 0.003951  loss: 4.1460 (3.8434)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5084 (0.5176)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [39]  [2000/2502]  eta: 0:04:58  lr: 0.003951  min_lr: 0.003951  loss: 4.0590 (3.8420)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4561 (0.5159)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [39]  [2200/2502]  eta: 0:02:59  lr: 0.003950  min_lr: 0.003950  loss: 3.7539 (3.8430)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4968 (0.5158)  time: 0.5976  data: 0.0004  max mem: 53905
Epoch: [39]  [2400/2502]  eta: 0:01:00  lr: 0.003950  min_lr: 0.003950  loss: 3.8993 (3.8421)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4960 (0.5165)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [39]  [2501/2502]  eta: 0:00:00  lr: 0.003950  min_lr: 0.003950  loss: 3.9151 (3.8409)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5478 (0.5187)  time: 0.5330  data: 0.0009  max mem: 53905
Epoch: [39] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.003950  min_lr: 0.003950  loss: 3.9151 (3.8239)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5478 (0.5187)
Test:  [ 0/50]  eta: 0:02:33  loss: 0.8004 (0.8004)  acc1: 88.4000 (88.4000)  acc5: 98.0000 (98.0000)  time: 3.0732  data: 2.7621  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 1.0409 (1.1357)  acc1: 81.2000 (78.4364)  acc5: 96.0000 (95.0909)  time: 0.5385  data: 0.2516  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.1239 (1.1458)  acc1: 76.8000 (78.0571)  acc5: 95.2000 (95.0095)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3874 (1.2988)  acc1: 71.2000 (74.6323)  acc5: 91.6000 (93.0323)  time: 0.2851  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.7283 (1.4025)  acc1: 66.4000 (72.2342)  acc5: 88.0000 (91.7951)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.7281 (1.4256)  acc1: 67.2000 (71.6400)  acc5: 88.0000 (91.5040)  time: 0.2847  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3434 s / it)
* Acc@1 72.116 Acc@5 91.458 loss 1.427
Accuracy of the model on the 50000 test images: 72.1%
Max accuracy: 72.12%
Epoch: [40]  [   0/2502]  eta: 1:44:25  lr: 0.003950  min_lr: 0.003950  loss: 3.9194 (3.9194)  weight_decay: 0.0500 (0.0500)  time: 2.5042  data: 1.9009  max mem: 53905
Epoch: [40]  [ 200/2502]  eta: 0:23:08  lr: 0.003949  min_lr: 0.003949  loss: 3.9191 (3.7911)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5645 (0.5461)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [40]  [ 400/2502]  eta: 0:20:58  lr: 0.003949  min_lr: 0.003949  loss: 4.0071 (3.7958)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4908 (0.5340)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [40]  [ 600/2502]  eta: 0:18:54  lr: 0.003949  min_lr: 0.003949  loss: 3.9653 (3.8217)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5619 (0.5407)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [40]  [ 800/2502]  eta: 0:16:54  lr: 0.003948  min_lr: 0.003948  loss: 3.9897 (3.8151)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5174 (0.5392)  time: 0.5963  data: 0.0005  max mem: 53905
Epoch: [40]  [1000/2502]  eta: 0:14:53  lr: 0.003948  min_lr: 0.003948  loss: 3.5472 (3.8020)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5098 (0.5349)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [40]  [1200/2502]  eta: 0:12:53  lr: 0.003947  min_lr: 0.003947  loss: 4.0113 (3.8018)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4723 (0.5332)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [40]  [1400/2502]  eta: 0:10:54  lr: 0.003947  min_lr: 0.003947  loss: 3.9995 (3.8003)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5069 (0.5329)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [40]  [1600/2502]  eta: 0:08:55  lr: 0.003947  min_lr: 0.003947  loss: 3.7636 (3.7993)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5212 (0.5297)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [40]  [1800/2502]  eta: 0:06:56  lr: 0.003946  min_lr: 0.003946  loss: 3.8740 (3.7982)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5076 (0.5274)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [40]  [2000/2502]  eta: 0:04:57  lr: 0.003946  min_lr: 0.003946  loss: 3.9055 (3.7959)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4951 (0.5272)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [40]  [2200/2502]  eta: 0:02:59  lr: 0.003945  min_lr: 0.003945  loss: 3.8154 (3.8028)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5427 (0.5267)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [40]  [2400/2502]  eta: 0:01:00  lr: 0.003945  min_lr: 0.003945  loss: 3.9547 (3.8014)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5346 (0.5266)  time: 0.5953  data: 0.0004  max mem: 53905
Epoch: [40]  [2501/2502]  eta: 0:00:00  lr: 0.003945  min_lr: 0.003945  loss: 4.0418 (3.8049)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5140 (0.5268)  time: 0.5328  data: 0.0008  max mem: 53905
Epoch: [40] Total time: 0:24:43 (0.5929 s / it)
Averaged stats: lr: 0.003945  min_lr: 0.003945  loss: 4.0418 (3.8088)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5140 (0.5268)
Test:  [ 0/50]  eta: 0:02:49  loss: 0.8544 (0.8544)  acc1: 89.2000 (89.2000)  acc5: 97.2000 (97.2000)  time: 3.3926  data: 3.0581  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.1503 (1.1585)  acc1: 80.8000 (79.0909)  acc5: 96.4000 (94.8727)  time: 0.5844  data: 0.2966  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.1503 (1.1676)  acc1: 77.6000 (78.8571)  acc5: 95.6000 (95.0476)  time: 0.2973  data: 0.0104  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3642 (1.3100)  acc1: 74.0000 (76.0516)  acc5: 92.0000 (93.1484)  time: 0.2916  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.6954 (1.4095)  acc1: 68.4000 (73.8049)  acc5: 87.6000 (91.7854)  time: 0.2879  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.6954 (1.4363)  acc1: 67.2000 (73.0320)  acc5: 87.2000 (91.6080)  time: 0.2838  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3555 s / it)
* Acc@1 72.880 Acc@5 91.706 loss 1.439
Accuracy of the model on the 50000 test images: 72.9%
Max accuracy: 72.88%
Epoch: [41]  [   0/2502]  eta: 1:32:53  lr: 0.003945  min_lr: 0.003945  loss: 3.5163 (3.5163)  weight_decay: 0.0500 (0.0500)  time: 2.2275  data: 1.6307  max mem: 53905
Epoch: [41]  [ 200/2502]  eta: 0:23:01  lr: 0.003944  min_lr: 0.003944  loss: 3.8129 (3.7720)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5090 (0.5113)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [41]  [ 400/2502]  eta: 0:20:54  lr: 0.003944  min_lr: 0.003944  loss: 3.9582 (3.8040)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4856 (0.5213)  time: 0.5930  data: 0.0007  max mem: 53905
Epoch: [41]  [ 600/2502]  eta: 0:18:53  lr: 0.003943  min_lr: 0.003943  loss: 4.0743 (3.7914)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5565 (0.5295)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [41]  [ 800/2502]  eta: 0:16:52  lr: 0.003943  min_lr: 0.003943  loss: 4.1381 (3.7963)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5486 (0.5328)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [41]  [1000/2502]  eta: 0:14:53  lr: 0.003943  min_lr: 0.003943  loss: 3.8156 (3.8011)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4907 (0.5312)  time: 0.5962  data: 0.0007  max mem: 53905
Epoch: [41]  [1200/2502]  eta: 0:12:53  lr: 0.003942  min_lr: 0.003942  loss: 4.0409 (3.7966)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5298 (0.5264)  time: 0.5938  data: 0.0007  max mem: 53905
Epoch: [41]  [1400/2502]  eta: 0:10:54  lr: 0.003942  min_lr: 0.003942  loss: 3.9755 (3.7970)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5013 (nan)  time: 0.5919  data: 0.0007  max mem: 53905
Epoch: [41]  [1600/2502]  eta: 0:08:55  lr: 0.003941  min_lr: 0.003941  loss: 4.0095 (3.7944)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4964 (nan)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [41]  [1800/2502]  eta: 0:06:56  lr: 0.003941  min_lr: 0.003941  loss: 3.9896 (3.7980)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5398 (nan)  time: 0.5913  data: 0.0007  max mem: 53905
Epoch: [41]  [2000/2502]  eta: 0:04:57  lr: 0.003940  min_lr: 0.003940  loss: 3.5427 (3.7951)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4691 (nan)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [41]  [2200/2502]  eta: 0:02:59  lr: 0.003940  min_lr: 0.003940  loss: 3.8030 (3.7968)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4950 (nan)  time: 0.5927  data: 0.0008  max mem: 53905
Epoch: [41]  [2400/2502]  eta: 0:01:00  lr: 0.003940  min_lr: 0.003940  loss: 3.6949 (3.8018)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5430 (nan)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [41]  [2501/2502]  eta: 0:00:00  lr: 0.003939  min_lr: 0.003939  loss: 3.7428 (3.7974)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5844 (nan)  time: 0.5373  data: 0.0008  max mem: 53905
Epoch: [41] Total time: 0:24:44 (0.5931 s / it)
Averaged stats: lr: 0.003939  min_lr: 0.003939  loss: 3.7428 (3.7987)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5844 (nan)
Test:  [ 0/50]  eta: 0:02:34  loss: 0.6988 (0.6988)  acc1: 89.6000 (89.6000)  acc5: 98.0000 (98.0000)  time: 3.0898  data: 2.7565  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 1.1160 (1.1455)  acc1: 78.8000 (78.8000)  acc5: 96.4000 (95.2727)  time: 0.5371  data: 0.2510  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.1252 (1.1681)  acc1: 76.8000 (78.0191)  acc5: 96.0000 (95.5238)  time: 0.2821  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3886 (1.3004)  acc1: 71.2000 (74.9290)  acc5: 92.0000 (93.6258)  time: 0.2824  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.6408 (1.3907)  acc1: 68.4000 (72.7317)  acc5: 88.4000 (92.2439)  time: 0.2822  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5949 (1.4059)  acc1: 67.6000 (72.0000)  acc5: 88.4000 (91.9360)  time: 0.2820  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3404 s / it)
* Acc@1 72.338 Acc@5 91.660 loss 1.409
Accuracy of the model on the 50000 test images: 72.3%
Max accuracy: 72.88%
Epoch: [42]  [   0/2502]  eta: 1:53:29  lr: 0.003939  min_lr: 0.003939  loss: 4.0162 (4.0162)  weight_decay: 0.0500 (0.0500)  time: 2.7215  data: 2.0271  max mem: 53905
Epoch: [42]  [ 200/2502]  eta: 0:23:09  lr: 0.003939  min_lr: 0.003939  loss: 3.9732 (3.8429)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4722 (0.5135)  time: 0.5912  data: 0.0006  max mem: 53905
Epoch: [42]  [ 400/2502]  eta: 0:20:58  lr: 0.003939  min_lr: 0.003939  loss: 3.5929 (3.8080)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5289 (0.5177)  time: 0.5930  data: 0.0007  max mem: 53905
Epoch: [42]  [ 600/2502]  eta: 0:18:54  lr: 0.003938  min_lr: 0.003938  loss: 3.9734 (3.7775)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5106 (0.5190)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [42]  [ 800/2502]  eta: 0:16:54  lr: 0.003938  min_lr: 0.003938  loss: 3.6866 (3.7873)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4857 (0.5187)  time: 0.5923  data: 0.0006  max mem: 53905
Epoch: [42]  [1000/2502]  eta: 0:14:54  lr: 0.003937  min_lr: 0.003937  loss: 4.0222 (3.7966)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4864 (0.5211)  time: 0.5928  data: 0.0006  max mem: 53905
Epoch: [42]  [1200/2502]  eta: 0:12:54  lr: 0.003937  min_lr: 0.003937  loss: 3.9114 (3.7910)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4856 (0.5195)  time: 0.5930  data: 0.0007  max mem: 53905
Epoch: [42]  [1400/2502]  eta: 0:10:55  lr: 0.003936  min_lr: 0.003936  loss: 3.9341 (3.7891)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5782 (0.5222)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [42]  [1600/2502]  eta: 0:08:56  lr: 0.003936  min_lr: 0.003936  loss: 3.9060 (3.7908)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4778 (0.5200)  time: 0.5913  data: 0.0007  max mem: 53905
Epoch: [42]  [1800/2502]  eta: 0:06:57  lr: 0.003935  min_lr: 0.003935  loss: 3.9318 (3.7839)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5450 (0.5220)  time: 0.5912  data: 0.0008  max mem: 53905
Epoch: [42]  [2000/2502]  eta: 0:04:58  lr: 0.003935  min_lr: 0.003935  loss: 3.9692 (3.7845)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4859 (0.5205)  time: 0.5933  data: 0.0006  max mem: 53905
Epoch: [42]  [2200/2502]  eta: 0:02:59  lr: 0.003934  min_lr: 0.003934  loss: 4.0584 (3.7852)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5571 (0.5230)  time: 0.5912  data: 0.0008  max mem: 53905
Epoch: [42]  [2400/2502]  eta: 0:01:00  lr: 0.003934  min_lr: 0.003934  loss: 3.8595 (3.7875)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5385 (0.5251)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [42]  [2501/2502]  eta: 0:00:00  lr: 0.003934  min_lr: 0.003934  loss: 3.9506 (3.7883)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4718 (0.5238)  time: 0.5344  data: 0.0008  max mem: 53905
Epoch: [42] Total time: 0:24:45 (0.5935 s / it)
Averaged stats: lr: 0.003934  min_lr: 0.003934  loss: 3.9506 (3.7936)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4718 (0.5238)
Test:  [ 0/50]  eta: 0:02:29  loss: 0.6084 (0.6084)  acc1: 90.8000 (90.8000)  acc5: 99.2000 (99.2000)  time: 2.9977  data: 2.6594  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.0212 (1.0344)  acc1: 78.8000 (79.5273)  acc5: 96.0000 (95.2000)  time: 0.5881  data: 0.2888  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0212 (1.0630)  acc1: 78.0000 (78.6857)  acc5: 95.6000 (95.3905)  time: 0.3162  data: 0.0261  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3347 (1.2047)  acc1: 70.8000 (75.6774)  acc5: 91.2000 (93.2258)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5234 (1.2813)  acc1: 68.0000 (73.7171)  acc5: 88.8000 (92.1073)  time: 0.2846  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4963 (1.3002)  acc1: 68.0000 (73.1200)  acc5: 89.2000 (91.7600)  time: 0.2842  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3539 s / it)
* Acc@1 72.868 Acc@5 91.884 loss 1.304
Accuracy of the model on the 50000 test images: 72.9%
Max accuracy: 72.88%
Epoch: [43]  [   0/2502]  eta: 1:55:13  lr: 0.003934  min_lr: 0.003934  loss: 4.3415 (4.3415)  weight_decay: 0.0500 (0.0500)  time: 2.7632  data: 2.0556  max mem: 53905
Epoch: [43]  [ 200/2502]  eta: 0:23:08  lr: 0.003933  min_lr: 0.003933  loss: 4.0208 (3.7803)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5038 (0.5203)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [43]  [ 400/2502]  eta: 0:20:54  lr: 0.003933  min_lr: 0.003933  loss: 3.8393 (3.7941)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5342 (0.5182)  time: 0.5906  data: 0.0006  max mem: 53905
Epoch: [43]  [ 600/2502]  eta: 0:18:53  lr: 0.003932  min_lr: 0.003932  loss: 3.9009 (3.7893)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5104 (0.5226)  time: 0.5971  data: 0.0008  max mem: 53905
Epoch: [43]  [ 800/2502]  eta: 0:16:52  lr: 0.003932  min_lr: 0.003932  loss: 3.7003 (3.7852)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5270 (0.5254)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [43]  [1000/2502]  eta: 0:14:53  lr: 0.003931  min_lr: 0.003931  loss: 4.0038 (3.7922)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5536 (0.5284)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [43]  [1200/2502]  eta: 0:12:54  lr: 0.003931  min_lr: 0.003931  loss: 3.6660 (3.7833)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5321 (0.5271)  time: 0.5923  data: 0.0006  max mem: 53905
Epoch: [43]  [1400/2502]  eta: 0:10:54  lr: 0.003931  min_lr: 0.003931  loss: 3.8976 (3.7837)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5090 (0.5289)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [43]  [1600/2502]  eta: 0:08:55  lr: 0.003930  min_lr: 0.003930  loss: 4.0726 (3.7863)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4668 (0.5290)  time: 0.5964  data: 0.0006  max mem: 53905
Epoch: [43]  [1800/2502]  eta: 0:06:56  lr: 0.003930  min_lr: 0.003930  loss: 4.0115 (3.7805)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5324 (0.5300)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [43]  [2000/2502]  eta: 0:04:57  lr: 0.003929  min_lr: 0.003929  loss: 3.9629 (3.7861)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5060 (0.5294)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [43]  [2200/2502]  eta: 0:02:59  lr: 0.003929  min_lr: 0.003929  loss: 3.6444 (3.7859)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5468 (0.5279)  time: 0.5921  data: 0.0007  max mem: 53905
Epoch: [43]  [2400/2502]  eta: 0:01:00  lr: 0.003928  min_lr: 0.003928  loss: 3.8587 (3.7818)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5979 (0.5313)  time: 0.5928  data: 0.0007  max mem: 53905
Epoch: [43]  [2501/2502]  eta: 0:00:00  lr: 0.003928  min_lr: 0.003928  loss: 3.8734 (3.7812)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5004 (0.5323)  time: 0.5335  data: 0.0006  max mem: 53905
Epoch: [43] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.003928  min_lr: 0.003928  loss: 3.8734 (3.7841)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5004 (0.5323)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.8140 (0.8140)  acc1: 91.2000 (91.2000)  acc5: 98.4000 (98.4000)  time: 3.0176  data: 2.6916  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.1966 (1.1473)  acc1: 78.8000 (78.7636)  acc5: 96.4000 (95.2727)  time: 0.5910  data: 0.2923  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.1966 (1.1688)  acc1: 76.0000 (77.9619)  acc5: 96.4000 (95.4095)  time: 0.3167  data: 0.0264  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.4163 (1.3105)  acc1: 71.2000 (75.2129)  acc5: 92.0000 (93.3677)  time: 0.2849  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.6680 (1.3955)  acc1: 67.6000 (73.1317)  acc5: 88.8000 (92.3122)  time: 0.2844  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.6680 (1.4206)  acc1: 67.6000 (72.6880)  acc5: 89.2000 (92.0400)  time: 0.2841  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3543 s / it)
* Acc@1 72.610 Acc@5 91.778 loss 1.427
Accuracy of the model on the 50000 test images: 72.6%
Max accuracy: 72.88%
Epoch: [44]  [   0/2502]  eta: 1:38:46  lr: 0.003928  min_lr: 0.003928  loss: 3.1750 (3.1750)  weight_decay: 0.0500 (0.0500)  time: 2.3687  data: 1.5547  max mem: 53905
Epoch: [44]  [ 200/2502]  eta: 0:23:07  lr: 0.003927  min_lr: 0.003927  loss: 4.0503 (3.7197)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6062 (0.5378)  time: 0.6012  data: 0.0005  max mem: 53905
Epoch: [44]  [ 400/2502]  eta: 0:20:57  lr: 0.003927  min_lr: 0.003927  loss: 3.8993 (3.7508)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5188 (0.5257)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [44]  [ 600/2502]  eta: 0:18:53  lr: 0.003927  min_lr: 0.003927  loss: 4.0308 (3.7667)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5053 (0.5263)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [44]  [ 800/2502]  eta: 0:16:53  lr: 0.003926  min_lr: 0.003926  loss: 3.4139 (3.7567)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5150 (0.5299)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [44]  [1000/2502]  eta: 0:14:53  lr: 0.003926  min_lr: 0.003926  loss: 3.8888 (3.7585)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4852 (0.5296)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [44]  [1200/2502]  eta: 0:12:53  lr: 0.003925  min_lr: 0.003925  loss: 3.9538 (3.7683)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5494 (0.5337)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [44]  [1400/2502]  eta: 0:10:54  lr: 0.003925  min_lr: 0.003925  loss: 3.8674 (3.7656)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5622 (0.5373)  time: 0.5934  data: 0.0004  max mem: 53905
Epoch: [44]  [1600/2502]  eta: 0:08:55  lr: 0.003924  min_lr: 0.003924  loss: 3.7816 (3.7599)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5071 (0.5358)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [44]  [1800/2502]  eta: 0:06:56  lr: 0.003924  min_lr: 0.003924  loss: 3.9741 (3.7589)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5172 (0.5357)  time: 0.5956  data: 0.0004  max mem: 53905
Epoch: [44]  [2000/2502]  eta: 0:04:57  lr: 0.003923  min_lr: 0.003923  loss: 3.8309 (3.7618)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5194 (0.5352)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [44]  [2200/2502]  eta: 0:02:59  lr: 0.003923  min_lr: 0.003923  loss: 3.4501 (3.7620)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5140 (0.5338)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [44]  [2400/2502]  eta: 0:01:00  lr: 0.003922  min_lr: 0.003922  loss: 3.8524 (3.7623)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5800 (0.5349)  time: 0.5973  data: 0.0004  max mem: 53905
Epoch: [44]  [2501/2502]  eta: 0:00:00  lr: 0.003922  min_lr: 0.003922  loss: 3.5990 (3.7624)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5582 (0.5362)  time: 0.5334  data: 0.0007  max mem: 53905
Epoch: [44] Total time: 0:24:43 (0.5929 s / it)
Averaged stats: lr: 0.003922  min_lr: 0.003922  loss: 3.5990 (3.7706)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5582 (0.5362)
Test:  [ 0/50]  eta: 0:02:48  loss: 0.6493 (0.6493)  acc1: 90.4000 (90.4000)  acc5: 98.0000 (98.0000)  time: 3.3731  data: 3.0467  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.0948 (1.0978)  acc1: 80.8000 (79.2364)  acc5: 96.8000 (95.3818)  time: 0.5658  data: 0.2774  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.1095 (1.1167)  acc1: 77.2000 (78.3238)  acc5: 96.4000 (95.6381)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2384 (1.2451)  acc1: 73.2000 (75.7032)  acc5: 92.8000 (93.6774)  time: 0.2857  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5874 (1.3348)  acc1: 67.6000 (73.5122)  acc5: 88.0000 (92.3220)  time: 0.2853  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5874 (1.3627)  acc1: 66.4000 (72.6160)  acc5: 88.4000 (92.0480)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3491 s / it)
* Acc@1 73.076 Acc@5 92.054 loss 1.360
Accuracy of the model on the 50000 test images: 73.1%
Max accuracy: 73.08%
Epoch: [45]  [   0/2502]  eta: 1:50:36  lr: 0.003922  min_lr: 0.003922  loss: 3.2335 (3.2335)  weight_decay: 0.0500 (0.0500)  time: 2.6527  data: 2.0600  max mem: 53905
Epoch: [45]  [ 200/2502]  eta: 0:23:06  lr: 0.003921  min_lr: 0.003921  loss: 3.7044 (3.7740)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [45]  [ 400/2502]  eta: 0:20:57  lr: 0.003921  min_lr: 0.003921  loss: 4.0490 (3.7782)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5326 (nan)  time: 0.6097  data: 0.0005  max mem: 53905
Epoch: [45]  [ 600/2502]  eta: 0:18:54  lr: 0.003920  min_lr: 0.003920  loss: 4.0974 (3.7657)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5139 (nan)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [45]  [ 800/2502]  eta: 0:16:53  lr: 0.003920  min_lr: 0.003920  loss: 4.1185 (3.7826)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5354 (nan)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [45]  [1000/2502]  eta: 0:14:53  lr: 0.003919  min_lr: 0.003919  loss: 3.6240 (3.7831)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5209 (nan)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [45]  [1200/2502]  eta: 0:12:54  lr: 0.003919  min_lr: 0.003919  loss: 3.8656 (3.7747)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5090 (nan)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [45]  [1400/2502]  eta: 0:10:54  lr: 0.003918  min_lr: 0.003918  loss: 3.8519 (3.7728)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5502 (nan)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [45]  [1600/2502]  eta: 0:08:55  lr: 0.003918  min_lr: 0.003918  loss: 4.0439 (3.7657)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5019 (nan)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [45]  [1800/2502]  eta: 0:06:56  lr: 0.003917  min_lr: 0.003917  loss: 4.0561 (3.7671)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5415 (nan)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [45]  [2000/2502]  eta: 0:04:58  lr: 0.003917  min_lr: 0.003917  loss: 3.9328 (3.7679)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4982 (nan)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [45]  [2200/2502]  eta: 0:02:59  lr: 0.003916  min_lr: 0.003916  loss: 3.9369 (3.7688)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5603 (nan)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [45]  [2400/2502]  eta: 0:01:00  lr: 0.003916  min_lr: 0.003916  loss: 3.8736 (3.7653)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4974 (nan)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [45]  [2501/2502]  eta: 0:00:00  lr: 0.003916  min_lr: 0.003916  loss: 3.8340 (3.7634)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5142 (nan)  time: 0.5336  data: 0.0007  max mem: 53905
Epoch: [45] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.003916  min_lr: 0.003916  loss: 3.8340 (3.7704)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5142 (nan)
Test:  [ 0/50]  eta: 0:02:55  loss: 0.7014 (0.7014)  acc1: 90.0000 (90.0000)  acc5: 99.2000 (99.2000)  time: 3.5117  data: 3.1985  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.0760 (1.0818)  acc1: 77.2000 (79.7455)  acc5: 96.8000 (95.6000)  time: 0.5784  data: 0.2912  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0894 (1.0905)  acc1: 77.2000 (79.1810)  acc5: 96.8000 (95.8857)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2490 (1.2204)  acc1: 70.8000 (75.8323)  acc5: 93.2000 (93.7806)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5587 (1.3077)  acc1: 68.4000 (73.5610)  acc5: 88.4000 (92.6049)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5454 (1.3260)  acc1: 68.4000 (73.0320)  acc5: 89.2000 (92.3600)  time: 0.2849  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3520 s / it)
* Acc@1 73.084 Acc@5 92.096 loss 1.333
Accuracy of the model on the 50000 test images: 73.1%
Max accuracy: 73.08%
Epoch: [46]  [   0/2502]  eta: 1:35:42  lr: 0.003916  min_lr: 0.003916  loss: 3.8414 (3.8414)  weight_decay: 0.0500 (0.0500)  time: 2.2952  data: 1.6902  max mem: 53905
Epoch: [46]  [ 200/2502]  eta: 0:23:10  lr: 0.003915  min_lr: 0.003915  loss: 3.8845 (3.7786)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5122 (0.5268)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [46]  [ 400/2502]  eta: 0:20:57  lr: 0.003914  min_lr: 0.003914  loss: 3.9682 (3.7628)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4972 (0.5259)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [46]  [ 600/2502]  eta: 0:18:54  lr: 0.003914  min_lr: 0.003914  loss: 3.8969 (3.7508)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5227 (0.5230)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [46]  [ 800/2502]  eta: 0:16:53  lr: 0.003913  min_lr: 0.003913  loss: 3.9929 (3.7584)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5656 (0.5319)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [46]  [1000/2502]  eta: 0:14:53  lr: 0.003913  min_lr: 0.003913  loss: 3.5311 (3.7595)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5704 (0.5357)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [46]  [1200/2502]  eta: 0:12:54  lr: 0.003912  min_lr: 0.003912  loss: 3.6851 (3.7477)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5192 (0.5379)  time: 0.5982  data: 0.0004  max mem: 53905
Epoch: [46]  [1400/2502]  eta: 0:10:55  lr: 0.003912  min_lr: 0.003912  loss: 3.6101 (3.7511)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4890 (0.5364)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [46]  [1600/2502]  eta: 0:08:55  lr: 0.003911  min_lr: 0.003911  loss: 3.7762 (3.7531)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5695 (0.5387)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [46]  [1800/2502]  eta: 0:06:56  lr: 0.003911  min_lr: 0.003911  loss: 3.6440 (3.7547)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5184 (0.5380)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [46]  [2000/2502]  eta: 0:04:58  lr: 0.003910  min_lr: 0.003910  loss: 4.0454 (3.7596)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5238 (0.5380)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [46]  [2200/2502]  eta: 0:02:59  lr: 0.003910  min_lr: 0.003910  loss: 4.0820 (3.7630)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5123 (0.5378)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [46]  [2400/2502]  eta: 0:01:00  lr: 0.003909  min_lr: 0.003909  loss: 3.8550 (3.7650)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4971 (0.5404)  time: 0.5969  data: 0.0004  max mem: 53905
Epoch: [46]  [2501/2502]  eta: 0:00:00  lr: 0.003909  min_lr: 0.003909  loss: 3.5748 (3.7643)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5215 (0.5410)  time: 0.5329  data: 0.0007  max mem: 53905
Epoch: [46] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.003909  min_lr: 0.003909  loss: 3.5748 (3.7623)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5215 (0.5410)
Test:  [ 0/50]  eta: 0:02:51  loss: 0.6890 (0.6890)  acc1: 89.2000 (89.2000)  acc5: 98.0000 (98.0000)  time: 3.4239  data: 3.0845  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.0949 (1.0753)  acc1: 80.0000 (80.8364)  acc5: 95.6000 (95.6364)  time: 0.5987  data: 0.3021  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0926 (1.0603)  acc1: 79.2000 (80.2857)  acc5: 95.6000 (96.0191)  time: 0.3008  data: 0.0122  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1972 (1.1893)  acc1: 73.6000 (77.3806)  acc5: 93.6000 (94.0774)  time: 0.2841  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5677 (1.2815)  acc1: 68.4000 (74.8585)  acc5: 88.8000 (92.8683)  time: 0.2826  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5677 (1.3037)  acc1: 67.2000 (74.0320)  acc5: 89.6000 (92.6800)  time: 0.2825  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3549 s / it)
* Acc@1 73.642 Acc@5 92.324 loss 1.319
Accuracy of the model on the 50000 test images: 73.6%
Max accuracy: 73.64%
Epoch: [47]  [   0/2502]  eta: 1:32:38  lr: 0.003909  min_lr: 0.003909  loss: 3.0459 (3.0459)  weight_decay: 0.0500 (0.0500)  time: 2.2215  data: 1.6188  max mem: 53905
Epoch: [47]  [ 200/2502]  eta: 0:23:02  lr: 0.003908  min_lr: 0.003908  loss: 3.6472 (3.7287)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5931 (0.5471)  time: 0.5935  data: 0.0004  max mem: 53905
Epoch: [47]  [ 400/2502]  eta: 0:20:55  lr: 0.003908  min_lr: 0.003908  loss: 4.0429 (3.7615)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5325 (0.5447)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [47]  [ 600/2502]  eta: 0:18:54  lr: 0.003907  min_lr: 0.003907  loss: 3.8645 (3.7660)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5288 (0.5413)  time: 0.5915  data: 0.0006  max mem: 53905
Epoch: [47]  [ 800/2502]  eta: 0:16:53  lr: 0.003907  min_lr: 0.003907  loss: 3.6475 (3.7565)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4969 (0.5370)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [47]  [1000/2502]  eta: 0:14:53  lr: 0.003906  min_lr: 0.003906  loss: 3.8665 (3.7614)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5123 (0.5364)  time: 0.5988  data: 0.0004  max mem: 53905
Epoch: [47]  [1200/2502]  eta: 0:12:54  lr: 0.003906  min_lr: 0.003906  loss: 3.8686 (3.7586)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5426 (0.5387)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [47]  [1400/2502]  eta: 0:10:55  lr: 0.003905  min_lr: 0.003905  loss: 3.7530 (3.7568)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5073 (0.5382)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [47]  [1600/2502]  eta: 0:08:56  lr: 0.003905  min_lr: 0.003905  loss: 3.7323 (3.7600)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5306 (0.5360)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [47]  [1800/2502]  eta: 0:06:57  lr: 0.003904  min_lr: 0.003904  loss: 3.9295 (3.7585)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5372 (0.5359)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [47]  [2000/2502]  eta: 0:04:58  lr: 0.003904  min_lr: 0.003904  loss: 3.8850 (3.7615)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4912 (0.5346)  time: 0.5954  data: 0.0004  max mem: 53905
Epoch: [47]  [2200/2502]  eta: 0:02:59  lr: 0.003903  min_lr: 0.003903  loss: 3.5839 (3.7611)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5289 (0.5365)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [47]  [2400/2502]  eta: 0:01:00  lr: 0.003902  min_lr: 0.003902  loss: 3.7552 (3.7572)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5112 (0.5349)  time: 0.5937  data: 0.0005  max mem: 53905
Epoch: [47]  [2501/2502]  eta: 0:00:00  lr: 0.003902  min_lr: 0.003902  loss: 4.0482 (3.7556)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4767 (0.5340)  time: 0.5331  data: 0.0009  max mem: 53905
Epoch: [47] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.003902  min_lr: 0.003902  loss: 4.0482 (3.7442)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4767 (0.5340)
Test:  [ 0/50]  eta: 0:03:01  loss: 0.8624 (0.8624)  acc1: 88.8000 (88.8000)  acc5: 97.2000 (97.2000)  time: 3.6214  data: 3.3082  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.1004 (1.1526)  acc1: 77.6000 (79.7455)  acc5: 96.0000 (95.3091)  time: 0.5880  data: 0.3011  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.1574 (1.1488)  acc1: 77.6000 (79.1048)  acc5: 96.0000 (95.8667)  time: 0.2849  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3107 (1.2856)  acc1: 74.0000 (75.8839)  acc5: 92.4000 (93.8839)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.6281 (1.3626)  acc1: 67.2000 (74.1268)  acc5: 88.8000 (92.7024)  time: 0.2846  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5625 (1.3862)  acc1: 70.0000 (73.5200)  acc5: 88.8000 (92.4640)  time: 0.2841  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3537 s / it)
* Acc@1 73.356 Acc@5 92.202 loss 1.400
Accuracy of the model on the 50000 test images: 73.4%
Max accuracy: 73.64%
Epoch: [48]  [   0/2502]  eta: 1:51:04  lr: 0.003902  min_lr: 0.003902  loss: 4.1072 (4.1072)  weight_decay: 0.0500 (0.0500)  time: 2.6638  data: 2.0537  max mem: 53905
Epoch: [48]  [ 200/2502]  eta: 0:23:09  lr: 0.003902  min_lr: 0.003902  loss: 3.9369 (3.7394)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5167 (0.5332)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [48]  [ 400/2502]  eta: 0:20:56  lr: 0.003901  min_lr: 0.003901  loss: 3.8452 (3.7355)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5482 (0.5340)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [48]  [ 600/2502]  eta: 0:18:52  lr: 0.003900  min_lr: 0.003900  loss: 3.6707 (3.7314)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5444 (0.5445)  time: 0.5965  data: 0.0005  max mem: 53905
Epoch: [48]  [ 800/2502]  eta: 0:16:52  lr: 0.003900  min_lr: 0.003900  loss: 3.6980 (3.7476)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5468 (0.5453)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [48]  [1000/2502]  eta: 0:14:52  lr: 0.003899  min_lr: 0.003899  loss: 3.9880 (3.7446)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5270 (0.5407)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [48]  [1200/2502]  eta: 0:12:53  lr: 0.003899  min_lr: 0.003899  loss: 3.5873 (3.7342)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4972 (0.5382)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [48]  [1400/2502]  eta: 0:10:54  lr: 0.003898  min_lr: 0.003898  loss: 3.7835 (3.7338)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5184 (0.5388)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [48]  [1600/2502]  eta: 0:08:55  lr: 0.003898  min_lr: 0.003898  loss: 4.0276 (3.7343)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4899 (0.5378)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [48]  [1800/2502]  eta: 0:06:56  lr: 0.003897  min_lr: 0.003897  loss: 4.0726 (3.7400)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5096 (0.5375)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [48]  [2000/2502]  eta: 0:04:58  lr: 0.003897  min_lr: 0.003897  loss: 3.9381 (3.7452)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5153 (0.5384)  time: 0.5977  data: 0.0005  max mem: 53905
Epoch: [48]  [2200/2502]  eta: 0:02:59  lr: 0.003896  min_lr: 0.003896  loss: 3.7425 (3.7515)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5064 (0.5411)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [48]  [2400/2502]  eta: 0:01:00  lr: 0.003895  min_lr: 0.003895  loss: 3.6343 (3.7542)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5051 (0.5390)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [48]  [2501/2502]  eta: 0:00:00  lr: 0.003895  min_lr: 0.003895  loss: 3.6371 (3.7527)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4936 (0.5381)  time: 0.5330  data: 0.0007  max mem: 53905
Epoch: [48] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.003895  min_lr: 0.003895  loss: 3.6371 (3.7430)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4936 (0.5381)
Test:  [ 0/50]  eta: 0:02:28  loss: 0.6861 (0.6861)  acc1: 89.2000 (89.2000)  acc5: 98.4000 (98.4000)  time: 2.9740  data: 2.6453  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 1.0036 (1.0475)  acc1: 80.8000 (79.6727)  acc5: 95.6000 (95.4546)  time: 0.5398  data: 0.2516  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.0049 (1.0512)  acc1: 79.2000 (79.4857)  acc5: 95.6000 (95.7905)  time: 0.2908  data: 0.0064  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2697 (1.1749)  acc1: 72.0000 (76.5806)  acc5: 92.4000 (93.9871)  time: 0.2851  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5011 (1.2569)  acc1: 69.2000 (74.6146)  acc5: 88.8000 (92.7902)  time: 0.2904  data: 0.0015  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4881 (1.2742)  acc1: 69.2000 (73.7680)  acc5: 88.8000 (92.4800)  time: 0.2906  data: 0.0013  max mem: 53905
Test: Total time: 0:00:17 (0.3458 s / it)
* Acc@1 73.514 Acc@5 92.292 loss 1.284
Accuracy of the model on the 50000 test images: 73.5%
Max accuracy: 73.64%
Epoch: [49]  [   0/2502]  eta: 1:42:34  lr: 0.003895  min_lr: 0.003895  loss: 3.6584 (3.6584)  weight_decay: 0.0500 (0.0500)  time: 2.4600  data: 1.7250  max mem: 53905
Epoch: [49]  [ 200/2502]  eta: 0:23:03  lr: 0.003895  min_lr: 0.003895  loss: 4.0571 (3.7952)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5366 (0.5621)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [49]  [ 400/2502]  eta: 0:20:54  lr: 0.003894  min_lr: 0.003894  loss: 3.5283 (3.7589)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5171 (0.5366)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [49]  [ 600/2502]  eta: 0:18:52  lr: 0.003893  min_lr: 0.003893  loss: 3.9957 (3.7628)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5357 (0.5368)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [49]  [ 800/2502]  eta: 0:16:51  lr: 0.003893  min_lr: 0.003893  loss: 3.7817 (3.7565)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5191 (0.5337)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [49]  [1000/2502]  eta: 0:14:52  lr: 0.003892  min_lr: 0.003892  loss: 3.7273 (3.7549)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [49]  [1200/2502]  eta: 0:12:52  lr: 0.003892  min_lr: 0.003892  loss: 3.8937 (3.7504)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5611 (nan)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [49]  [1400/2502]  eta: 0:10:53  lr: 0.003891  min_lr: 0.003891  loss: 3.7490 (3.7433)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5014 (nan)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [49]  [1600/2502]  eta: 0:08:55  lr: 0.003890  min_lr: 0.003890  loss: 3.8622 (3.7394)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5380 (nan)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [49]  [1800/2502]  eta: 0:06:56  lr: 0.003890  min_lr: 0.003890  loss: 3.8041 (3.7404)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5246 (nan)  time: 0.5924  data: 0.0006  max mem: 53905
Epoch: [49]  [2000/2502]  eta: 0:04:57  lr: 0.003889  min_lr: 0.003889  loss: 3.9213 (3.7410)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5628 (nan)  time: 0.5961  data: 0.0004  max mem: 53905
Epoch: [49]  [2200/2502]  eta: 0:02:59  lr: 0.003889  min_lr: 0.003889  loss: 3.9090 (3.7394)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4642 (nan)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [49]  [2400/2502]  eta: 0:01:00  lr: 0.003888  min_lr: 0.003888  loss: 3.9298 (3.7336)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5321 (nan)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [49]  [2501/2502]  eta: 0:00:00  lr: 0.003888  min_lr: 0.003888  loss: 3.6258 (3.7311)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5549 (nan)  time: 0.5322  data: 0.0007  max mem: 53905
Epoch: [49] Total time: 0:24:43 (0.5929 s / it)
Averaged stats: lr: 0.003888  min_lr: 0.003888  loss: 3.6258 (3.7368)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5549 (nan)
Test:  [ 0/50]  eta: 0:02:46  loss: 0.7325 (0.7325)  acc1: 88.4000 (88.4000)  acc5: 97.6000 (97.6000)  time: 3.3222  data: 3.0004  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.9984 (1.0038)  acc1: 80.4000 (80.2909)  acc5: 96.0000 (95.4909)  time: 0.5734  data: 0.2888  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0022 (1.0151)  acc1: 77.6000 (79.1238)  acc5: 96.0000 (95.7143)  time: 0.2900  data: 0.0090  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1915 (1.1502)  acc1: 71.2000 (76.4774)  acc5: 92.4000 (93.8452)  time: 0.2815  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5110 (1.2438)  acc1: 68.8000 (74.2927)  acc5: 89.2000 (92.7317)  time: 0.2813  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4975 (1.2577)  acc1: 67.6000 (73.7360)  acc5: 89.6000 (92.5680)  time: 0.2811  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3487 s / it)
* Acc@1 73.802 Acc@5 92.468 loss 1.265
Accuracy of the model on the 50000 test images: 73.8%
Max accuracy: 73.80%
Epoch: [50]  [   0/2502]  eta: 1:32:17  lr: 0.003888  min_lr: 0.003888  loss: 3.6419 (3.6419)  weight_decay: 0.0500 (0.0500)  time: 2.2134  data: 1.6075  max mem: 53905
Epoch: [50]  [ 200/2502]  eta: 0:23:07  lr: 0.003887  min_lr: 0.003887  loss: 3.2476 (3.6507)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5010 (0.5303)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [50]  [ 400/2502]  eta: 0:20:54  lr: 0.003887  min_lr: 0.003887  loss: 3.8215 (3.6965)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4978 (0.5338)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [50]  [ 600/2502]  eta: 0:18:52  lr: 0.003886  min_lr: 0.003886  loss: 3.5550 (3.7154)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5976 (0.5460)  time: 0.5980  data: 0.0004  max mem: 53905
Epoch: [50]  [ 800/2502]  eta: 0:16:51  lr: 0.003885  min_lr: 0.003885  loss: 3.8039 (3.7117)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5083 (0.5411)  time: 0.5901  data: 0.0004  max mem: 53905
Epoch: [50]  [1000/2502]  eta: 0:14:51  lr: 0.003885  min_lr: 0.003885  loss: 3.8641 (3.7182)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4965 (0.5344)  time: 0.5901  data: 0.0004  max mem: 53905
Epoch: [50]  [1200/2502]  eta: 0:12:52  lr: 0.003884  min_lr: 0.003884  loss: 3.5411 (3.7269)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5417 (0.5359)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [50]  [1400/2502]  eta: 0:10:53  lr: 0.003884  min_lr: 0.003884  loss: 3.9045 (3.7353)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4781 (0.5329)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [50]  [1600/2502]  eta: 0:08:54  lr: 0.003883  min_lr: 0.003883  loss: 3.5470 (3.7396)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5535 (0.5343)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [50]  [1800/2502]  eta: 0:06:56  lr: 0.003882  min_lr: 0.003882  loss: 3.6142 (3.7404)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5229 (0.5332)  time: 0.6038  data: 0.0004  max mem: 53905
Epoch: [50]  [2000/2502]  eta: 0:04:57  lr: 0.003882  min_lr: 0.003882  loss: 3.8186 (3.7361)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5059 (0.5331)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [50]  [2200/2502]  eta: 0:02:59  lr: 0.003881  min_lr: 0.003881  loss: 3.9020 (3.7445)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5492 (0.5346)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [50]  [2400/2502]  eta: 0:01:00  lr: 0.003881  min_lr: 0.003881  loss: 3.8679 (3.7434)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5204 (0.5332)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [50]  [2501/2502]  eta: 0:00:00  lr: 0.003880  min_lr: 0.003880  loss: 3.7532 (3.7445)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5516 (0.5346)  time: 0.5325  data: 0.0007  max mem: 53905
Epoch: [50] Total time: 0:24:42 (0.5926 s / it)
Averaged stats: lr: 0.003880  min_lr: 0.003880  loss: 3.7532 (3.7310)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5516 (0.5346)
Test:  [ 0/50]  eta: 0:03:22  loss: 0.8328 (0.8328)  acc1: 90.4000 (90.4000)  acc5: 98.0000 (98.0000)  time: 4.0582  data: 3.7358  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 1.0154 (1.0718)  acc1: 80.0000 (80.0364)  acc5: 96.4000 (95.6000)  time: 0.6265  data: 0.3400  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0821 (1.0729)  acc1: 78.8000 (79.2571)  acc5: 96.4000 (95.9429)  time: 0.2838  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1639 (1.2158)  acc1: 72.4000 (76.2710)  acc5: 93.2000 (94.0387)  time: 0.2841  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5651 (1.2971)  acc1: 68.0000 (74.2634)  acc5: 89.2000 (93.0244)  time: 0.2837  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5574 (1.3170)  acc1: 68.4000 (73.8080)  acc5: 90.0000 (92.8000)  time: 0.2835  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3628 s / it)
* Acc@1 73.572 Acc@5 92.484 loss 1.326
Accuracy of the model on the 50000 test images: 73.6%
Max accuracy: 73.80%
Epoch: [51]  [   0/2502]  eta: 1:56:38  lr: 0.003880  min_lr: 0.003880  loss: 3.7453 (3.7453)  weight_decay: 0.0500 (0.0500)  time: 2.7973  data: 1.8911  max mem: 53905
Epoch: [51]  [ 200/2502]  eta: 0:23:05  lr: 0.003880  min_lr: 0.003880  loss: 3.6034 (3.6413)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5558 (0.5663)  time: 0.5910  data: 0.0006  max mem: 53905
Epoch: [51]  [ 400/2502]  eta: 0:20:57  lr: 0.003879  min_lr: 0.003879  loss: 4.1217 (3.6908)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4908 (0.5511)  time: 0.5950  data: 0.0007  max mem: 53905
Epoch: [51]  [ 600/2502]  eta: 0:18:53  lr: 0.003878  min_lr: 0.003878  loss: 3.8702 (3.7053)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5207 (0.5436)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [51]  [ 800/2502]  eta: 0:16:52  lr: 0.003878  min_lr: 0.003878  loss: 3.9870 (3.7230)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5251 (0.5442)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [51]  [1000/2502]  eta: 0:14:53  lr: 0.003877  min_lr: 0.003877  loss: 3.9895 (3.7296)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5052 (0.5397)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [51]  [1200/2502]  eta: 0:12:53  lr: 0.003877  min_lr: 0.003877  loss: 3.5089 (3.7230)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5926 (0.5423)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [51]  [1400/2502]  eta: 0:10:54  lr: 0.003876  min_lr: 0.003876  loss: 3.8692 (3.7218)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5099 (0.5424)  time: 0.6001  data: 0.0005  max mem: 53905
Epoch: [51]  [1600/2502]  eta: 0:08:55  lr: 0.003875  min_lr: 0.003875  loss: 3.6889 (3.7224)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5140 (0.5414)  time: 0.5909  data: 0.0006  max mem: 53905
Epoch: [51]  [1800/2502]  eta: 0:06:56  lr: 0.003875  min_lr: 0.003875  loss: 3.8333 (3.7234)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5687 (0.5401)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [51]  [2000/2502]  eta: 0:04:57  lr: 0.003874  min_lr: 0.003874  loss: 3.9601 (3.7280)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5047 (0.5405)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [51]  [2200/2502]  eta: 0:02:59  lr: 0.003873  min_lr: 0.003873  loss: 3.9675 (3.7212)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5326 (0.5383)  time: 0.5908  data: 0.0007  max mem: 53905
Epoch: [51]  [2400/2502]  eta: 0:01:00  lr: 0.003873  min_lr: 0.003873  loss: 3.2688 (3.7163)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5412 (0.5401)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [51]  [2501/2502]  eta: 0:00:00  lr: 0.003873  min_lr: 0.003873  loss: 3.9602 (3.7218)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4805 (0.5392)  time: 0.5347  data: 0.0006  max mem: 53905
Epoch: [51] Total time: 0:24:42 (0.5925 s / it)
Averaged stats: lr: 0.003873  min_lr: 0.003873  loss: 3.9602 (3.7165)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4805 (0.5392)
Test:  [ 0/50]  eta: 0:03:19  loss: 0.7089 (0.7089)  acc1: 92.0000 (92.0000)  acc5: 97.6000 (97.6000)  time: 3.9882  data: 3.6739  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 1.0935 (1.0458)  acc1: 78.8000 (79.8909)  acc5: 96.0000 (95.0182)  time: 0.6200  data: 0.3344  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0483 (1.0439)  acc1: 78.4000 (79.0857)  acc5: 96.0000 (95.7333)  time: 0.2833  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.2144 (1.1859)  acc1: 73.6000 (76.3355)  acc5: 92.4000 (93.8452)  time: 0.2834  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5240 (1.2697)  acc1: 70.0000 (74.3512)  acc5: 89.2000 (92.7610)  time: 0.2833  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5240 (1.2909)  acc1: 69.2000 (73.8800)  acc5: 89.6000 (92.5040)  time: 0.2830  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3603 s / it)
* Acc@1 73.910 Acc@5 92.488 loss 1.298
Accuracy of the model on the 50000 test images: 73.9%
Max accuracy: 73.91%
Epoch: [52]  [   0/2502]  eta: 1:37:35  lr: 0.003873  min_lr: 0.003873  loss: 4.4669 (4.4669)  weight_decay: 0.0500 (0.0500)  time: 2.3402  data: 1.7298  max mem: 53905
Epoch: [52]  [ 200/2502]  eta: 0:23:03  lr: 0.003872  min_lr: 0.003872  loss: 3.8890 (3.6736)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5405 (0.5228)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [52]  [ 400/2502]  eta: 0:20:54  lr: 0.003871  min_lr: 0.003871  loss: 3.8047 (3.6886)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5136 (0.5337)  time: 0.5920  data: 0.0008  max mem: 53905
Epoch: [52]  [ 600/2502]  eta: 0:18:53  lr: 0.003871  min_lr: 0.003871  loss: 3.7943 (3.6788)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5305 (0.5343)  time: 0.5934  data: 0.0007  max mem: 53905
Epoch: [52]  [ 800/2502]  eta: 0:16:53  lr: 0.003870  min_lr: 0.003870  loss: 3.6902 (3.6778)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5509 (0.5445)  time: 0.5920  data: 0.0007  max mem: 53905
Epoch: [52]  [1000/2502]  eta: 0:14:53  lr: 0.003869  min_lr: 0.003869  loss: 3.9883 (3.6750)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5283 (0.5426)  time: 0.5928  data: 0.0008  max mem: 53905
Epoch: [52]  [1200/2502]  eta: 0:12:54  lr: 0.003869  min_lr: 0.003869  loss: 3.8492 (3.6817)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5684 (0.5436)  time: 0.5978  data: 0.0005  max mem: 53905
Epoch: [52]  [1400/2502]  eta: 0:10:54  lr: 0.003868  min_lr: 0.003868  loss: 3.7466 (3.6881)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5522 (0.5425)  time: 0.5911  data: 0.0008  max mem: 53905
Epoch: [52]  [1600/2502]  eta: 0:08:55  lr: 0.003867  min_lr: 0.003867  loss: 3.8977 (3.6970)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5509 (0.5418)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [52]  [1800/2502]  eta: 0:06:57  lr: 0.003867  min_lr: 0.003867  loss: 3.7859 (3.7004)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4850 (0.5412)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [52]  [2000/2502]  eta: 0:04:58  lr: 0.003866  min_lr: 0.003866  loss: 3.9336 (3.6990)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5102 (nan)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [52]  [2200/2502]  eta: 0:02:59  lr: 0.003865  min_lr: 0.003865  loss: 3.7736 (3.6970)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5405 (nan)  time: 0.5994  data: 0.0006  max mem: 53905
Epoch: [52]  [2400/2502]  eta: 0:01:00  lr: 0.003865  min_lr: 0.003865  loss: 3.8481 (3.6954)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5351 (nan)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [52]  [2501/2502]  eta: 0:00:00  lr: 0.003865  min_lr: 0.003865  loss: 3.9029 (3.6942)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5212 (nan)  time: 0.5320  data: 0.0009  max mem: 53905
Epoch: [52] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.003865  min_lr: 0.003865  loss: 3.9029 (3.7209)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5212 (nan)
Test:  [ 0/50]  eta: 0:02:38  loss: 0.7041 (0.7041)  acc1: 89.6000 (89.6000)  acc5: 98.8000 (98.8000)  time: 3.1763  data: 2.8539  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.0349 (1.0573)  acc1: 80.0000 (80.5455)  acc5: 96.0000 (95.8545)  time: 0.5744  data: 0.2899  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0736 (1.0815)  acc1: 78.8000 (79.7714)  acc5: 96.0000 (95.9238)  time: 0.2978  data: 0.0170  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3600 (1.2185)  acc1: 73.2000 (77.1355)  acc5: 92.0000 (94.0129)  time: 0.2814  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5518 (1.2959)  acc1: 70.4000 (75.2781)  acc5: 89.6000 (92.8781)  time: 0.2812  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5118 (1.3135)  acc1: 70.4000 (74.7280)  acc5: 89.6000 (92.6320)  time: 0.2809  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3481 s / it)
* Acc@1 74.344 Acc@5 92.572 loss 1.326
Accuracy of the model on the 50000 test images: 74.3%
Max accuracy: 74.34%
Epoch: [53]  [   0/2502]  eta: 1:46:55  lr: 0.003865  min_lr: 0.003865  loss: 2.8818 (2.8818)  weight_decay: 0.0500 (0.0500)  time: 2.5640  data: 1.9564  max mem: 53905
Epoch: [53]  [ 200/2502]  eta: 0:23:01  lr: 0.003864  min_lr: 0.003864  loss: 3.9016 (3.6869)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5129 (0.5454)  time: 0.5904  data: 0.0006  max mem: 53905
Epoch: [53]  [ 400/2502]  eta: 0:20:54  lr: 0.003863  min_lr: 0.003863  loss: 3.7138 (3.6879)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5696 (0.5580)  time: 0.5908  data: 0.0007  max mem: 53905
Epoch: [53]  [ 600/2502]  eta: 0:18:52  lr: 0.003863  min_lr: 0.003863  loss: 3.6695 (3.6907)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5222 (0.5497)  time: 0.5912  data: 0.0006  max mem: 53905
Epoch: [53]  [ 800/2502]  eta: 0:16:51  lr: 0.003862  min_lr: 0.003862  loss: 3.8869 (3.6856)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5111 (0.5535)  time: 0.5979  data: 0.0004  max mem: 53905
Epoch: [53]  [1000/2502]  eta: 0:14:52  lr: 0.003861  min_lr: 0.003861  loss: 3.5349 (3.6819)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5543 (0.5482)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [53]  [1200/2502]  eta: 0:12:53  lr: 0.003861  min_lr: 0.003861  loss: 3.8448 (3.7000)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5104 (0.5444)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [53]  [1400/2502]  eta: 0:10:54  lr: 0.003860  min_lr: 0.003860  loss: 3.5386 (3.6956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5333 (0.5434)  time: 0.5915  data: 0.0006  max mem: 53905
Epoch: [53]  [1600/2502]  eta: 0:08:55  lr: 0.003859  min_lr: 0.003859  loss: 3.6743 (3.7081)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5291 (0.5435)  time: 0.5915  data: 0.0006  max mem: 53905
Epoch: [53]  [1800/2502]  eta: 0:06:56  lr: 0.003859  min_lr: 0.003859  loss: 3.8503 (3.7112)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5388 (0.5439)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [53]  [2000/2502]  eta: 0:04:57  lr: 0.003858  min_lr: 0.003858  loss: 3.6974 (3.7048)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5095 (0.5439)  time: 0.5974  data: 0.0007  max mem: 53905
Epoch: [53]  [2200/2502]  eta: 0:02:59  lr: 0.003857  min_lr: 0.003857  loss: 3.7693 (3.7039)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5675 (0.5446)  time: 0.5913  data: 0.0007  max mem: 53905
Epoch: [53]  [2400/2502]  eta: 0:01:00  lr: 0.003857  min_lr: 0.003857  loss: 3.5949 (3.7095)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5342 (0.5449)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [53]  [2501/2502]  eta: 0:00:00  lr: 0.003856  min_lr: 0.003856  loss: 3.7771 (3.7098)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5001 (0.5433)  time: 0.5326  data: 0.0006  max mem: 53905
Epoch: [53] Total time: 0:24:43 (0.5929 s / it)
Averaged stats: lr: 0.003856  min_lr: 0.003856  loss: 3.7771 (3.7111)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5001 (0.5433)
Test:  [ 0/50]  eta: 0:02:33  loss: 0.8373 (0.8373)  acc1: 88.4000 (88.4000)  acc5: 98.0000 (98.0000)  time: 3.0642  data: 2.7280  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.0235 (1.0442)  acc1: 80.8000 (80.4364)  acc5: 96.4000 (95.8545)  time: 0.5847  data: 0.2971  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9995 (1.0435)  acc1: 79.6000 (79.7524)  acc5: 96.4000 (96.0952)  time: 0.3102  data: 0.0272  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2171 (1.1598)  acc1: 74.8000 (76.8516)  acc5: 92.8000 (94.4645)  time: 0.2837  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4464 (1.2333)  acc1: 69.6000 (74.9463)  acc5: 90.0000 (93.3366)  time: 0.2834  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4172 (1.2525)  acc1: 69.2000 (74.3840)  acc5: 90.0000 (93.0960)  time: 0.2831  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3520 s / it)
* Acc@1 74.434 Acc@5 92.798 loss 1.265
Accuracy of the model on the 50000 test images: 74.4%
Max accuracy: 74.43%
Epoch: [54]  [   0/2502]  eta: 1:44:56  lr: 0.003856  min_lr: 0.003856  loss: 3.0093 (3.0093)  weight_decay: 0.0500 (0.0500)  time: 2.5166  data: 1.9063  max mem: 53905
Epoch: [54]  [ 200/2502]  eta: 0:23:07  lr: 0.003856  min_lr: 0.003856  loss: 3.8896 (3.6557)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5606 (0.5623)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [54]  [ 400/2502]  eta: 0:20:54  lr: 0.003855  min_lr: 0.003855  loss: 3.2699 (3.6295)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5287 (0.5535)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [54]  [ 600/2502]  eta: 0:18:53  lr: 0.003854  min_lr: 0.003854  loss: 3.9705 (3.6562)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5051 (0.5478)  time: 0.5998  data: 0.0005  max mem: 53905
Epoch: [54]  [ 800/2502]  eta: 0:16:52  lr: 0.003854  min_lr: 0.003854  loss: 3.7556 (3.6782)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5287 (0.5395)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [54]  [1000/2502]  eta: 0:14:52  lr: 0.003853  min_lr: 0.003853  loss: 3.8098 (3.6813)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5573 (0.5420)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [54]  [1200/2502]  eta: 0:12:53  lr: 0.003852  min_lr: 0.003852  loss: 3.7924 (3.6881)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5156 (0.5384)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [54]  [1400/2502]  eta: 0:10:54  lr: 0.003852  min_lr: 0.003852  loss: 3.8841 (3.6942)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5093 (0.5371)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [54]  [1600/2502]  eta: 0:08:55  lr: 0.003851  min_lr: 0.003851  loss: 3.7964 (3.6873)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5207 (0.5377)  time: 0.5949  data: 0.0004  max mem: 53905
Epoch: [54]  [1800/2502]  eta: 0:06:56  lr: 0.003850  min_lr: 0.003850  loss: 3.9296 (3.6911)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4977 (0.5347)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [54]  [2000/2502]  eta: 0:04:57  lr: 0.003850  min_lr: 0.003850  loss: 3.9629 (3.6975)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4842 (0.5339)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [54]  [2200/2502]  eta: 0:02:59  lr: 0.003849  min_lr: 0.003849  loss: 3.6594 (3.6976)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5756 (0.5336)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [54]  [2400/2502]  eta: 0:01:00  lr: 0.003848  min_lr: 0.003848  loss: 3.8111 (3.6985)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6047 (0.5360)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [54]  [2501/2502]  eta: 0:00:00  lr: 0.003848  min_lr: 0.003848  loss: 4.0168 (3.7047)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4894 (0.5354)  time: 0.5320  data: 0.0007  max mem: 53905
Epoch: [54] Total time: 0:24:42 (0.5927 s / it)
Averaged stats: lr: 0.003848  min_lr: 0.003848  loss: 4.0168 (3.6952)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4894 (0.5354)
Test:  [ 0/50]  eta: 0:02:43  loss: 0.8235 (0.8235)  acc1: 91.2000 (91.2000)  acc5: 98.4000 (98.4000)  time: 3.2651  data: 2.9481  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.0368 (1.1086)  acc1: 81.2000 (80.8364)  acc5: 96.4000 (95.8909)  time: 0.5791  data: 0.2949  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.1076 (1.1387)  acc1: 77.6000 (78.9714)  acc5: 95.6000 (95.9429)  time: 0.2959  data: 0.0150  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.4720 (1.3013)  acc1: 72.4000 (76.0903)  acc5: 92.8000 (93.9742)  time: 0.2815  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.6478 (1.3870)  acc1: 67.6000 (73.9902)  acc5: 88.8000 (92.7512)  time: 0.2813  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.6451 (1.4036)  acc1: 68.4000 (73.5040)  acc5: 89.6000 (92.5440)  time: 0.2811  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3499 s / it)
* Acc@1 73.622 Acc@5 92.550 loss 1.412
Accuracy of the model on the 50000 test images: 73.6%
Max accuracy: 74.43%
Epoch: [55]  [   0/2502]  eta: 1:47:36  lr: 0.003848  min_lr: 0.003848  loss: 2.6327 (2.6327)  weight_decay: 0.0500 (0.0500)  time: 2.5805  data: 1.7373  max mem: 53905
Epoch: [55]  [ 200/2502]  eta: 0:23:05  lr: 0.003847  min_lr: 0.003847  loss: 3.8779 (3.6922)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5120 (0.5281)  time: 0.6000  data: 0.0006  max mem: 53905
Epoch: [55]  [ 400/2502]  eta: 0:20:57  lr: 0.003846  min_lr: 0.003846  loss: 3.6169 (3.7153)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5137 (0.5333)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [55]  [ 600/2502]  eta: 0:18:53  lr: 0.003846  min_lr: 0.003846  loss: 3.7487 (3.6917)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5363 (0.5434)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [55]  [ 800/2502]  eta: 0:16:52  lr: 0.003845  min_lr: 0.003845  loss: 3.6853 (3.7002)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5064 (0.5464)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [55]  [1000/2502]  eta: 0:14:52  lr: 0.003844  min_lr: 0.003844  loss: 3.5953 (3.6853)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5347 (0.5462)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [55]  [1200/2502]  eta: 0:12:53  lr: 0.003844  min_lr: 0.003844  loss: 4.0262 (3.6944)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5747 (0.5510)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [55]  [1400/2502]  eta: 0:10:54  lr: 0.003843  min_lr: 0.003843  loss: 3.8694 (3.6957)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5240 (0.5506)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [55]  [1600/2502]  eta: 0:08:55  lr: 0.003842  min_lr: 0.003842  loss: 3.4373 (3.6962)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5729 (0.5512)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [55]  [1800/2502]  eta: 0:06:56  lr: 0.003842  min_lr: 0.003842  loss: 3.7362 (3.6998)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5409 (0.5528)  time: 0.5913  data: 0.0007  max mem: 53905
Epoch: [55]  [2000/2502]  eta: 0:04:57  lr: 0.003841  min_lr: 0.003841  loss: 3.3358 (3.6983)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5224 (0.5501)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [55]  [2200/2502]  eta: 0:02:59  lr: 0.003840  min_lr: 0.003840  loss: 3.9703 (3.7044)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5127 (0.5475)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [55]  [2400/2502]  eta: 0:01:00  lr: 0.003839  min_lr: 0.003839  loss: 3.5361 (3.6986)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5503 (0.5480)  time: 0.5909  data: 0.0008  max mem: 53905
Epoch: [55]  [2501/2502]  eta: 0:00:00  lr: 0.003839  min_lr: 0.003839  loss: 3.6695 (3.6955)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4865 (0.5477)  time: 0.5316  data: 0.0007  max mem: 53905
Epoch: [55] Total time: 0:24:42 (0.5927 s / it)
Averaged stats: lr: 0.003839  min_lr: 0.003839  loss: 3.6695 (3.6981)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4865 (0.5477)
Test:  [ 0/50]  eta: 0:02:48  loss: 0.6951 (0.6951)  acc1: 90.4000 (90.4000)  acc5: 98.0000 (98.0000)  time: 3.3602  data: 3.0240  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.0214 (1.0299)  acc1: 82.0000 (80.8000)  acc5: 96.4000 (95.8546)  time: 0.5666  data: 0.2807  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.0598 (1.0544)  acc1: 78.4000 (79.6762)  acc5: 96.0000 (96.0000)  time: 0.2843  data: 0.0034  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2407 (1.1930)  acc1: 74.0000 (77.0581)  acc5: 92.8000 (94.1419)  time: 0.2814  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5282 (1.2749)  acc1: 69.6000 (75.0732)  acc5: 88.8000 (93.0342)  time: 0.2811  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4896 (1.2882)  acc1: 70.4000 (74.6480)  acc5: 90.0000 (92.8400)  time: 0.2809  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3462 s / it)
* Acc@1 74.434 Acc@5 92.836 loss 1.298
Accuracy of the model on the 50000 test images: 74.4%
Max accuracy: 74.43%
Epoch: [56]  [   0/2502]  eta: 1:45:39  lr: 0.003839  min_lr: 0.003839  loss: 4.5026 (4.5026)  weight_decay: 0.0500 (0.0500)  time: 2.5339  data: 1.9312  max mem: 53905
Epoch: [56]  [ 200/2502]  eta: 0:23:02  lr: 0.003838  min_lr: 0.003838  loss: 3.6314 (3.6696)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5346 (0.5445)  time: 0.5900  data: 0.0005  max mem: 53905
Epoch: [56]  [ 400/2502]  eta: 0:20:51  lr: 0.003838  min_lr: 0.003838  loss: 3.8746 (3.6613)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5704 (0.5532)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [56]  [ 600/2502]  eta: 0:18:51  lr: 0.003837  min_lr: 0.003837  loss: 3.7129 (3.6649)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5225 (0.5545)  time: 0.5910  data: 0.0006  max mem: 53905
Epoch: [56]  [ 800/2502]  eta: 0:16:51  lr: 0.003836  min_lr: 0.003836  loss: 3.6681 (3.6658)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5078 (0.5429)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [56]  [1000/2502]  eta: 0:14:51  lr: 0.003836  min_lr: 0.003836  loss: 3.8102 (3.6694)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4981 (0.5420)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [56]  [1200/2502]  eta: 0:12:53  lr: 0.003835  min_lr: 0.003835  loss: 3.6789 (3.6749)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5272 (0.5423)  time: 0.5909  data: 0.0006  max mem: 53905
Epoch: [56]  [1400/2502]  eta: 0:10:53  lr: 0.003834  min_lr: 0.003834  loss: 3.9016 (3.6819)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5222 (0.5432)  time: 0.5913  data: 0.0006  max mem: 53905
Epoch: [56]  [1600/2502]  eta: 0:08:55  lr: 0.003833  min_lr: 0.003833  loss: 3.8082 (3.6783)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5434 (nan)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [56]  [1800/2502]  eta: 0:06:56  lr: 0.003833  min_lr: 0.003833  loss: 3.9074 (3.6784)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5387 (nan)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [56]  [2000/2502]  eta: 0:04:57  lr: 0.003832  min_lr: 0.003832  loss: 3.6689 (3.6794)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5536 (nan)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [56]  [2200/2502]  eta: 0:02:59  lr: 0.003831  min_lr: 0.003831  loss: 3.8914 (3.6898)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5319 (nan)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [56]  [2400/2502]  eta: 0:01:00  lr: 0.003831  min_lr: 0.003831  loss: 3.7485 (3.6905)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4970 (nan)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [56]  [2501/2502]  eta: 0:00:00  lr: 0.003830  min_lr: 0.003830  loss: 3.8893 (3.6931)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5273 (nan)  time: 0.5318  data: 0.0007  max mem: 53905
Epoch: [56] Total time: 0:24:42 (0.5927 s / it)
Averaged stats: lr: 0.003830  min_lr: 0.003830  loss: 3.8893 (3.6947)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5273 (nan)
Test:  [ 0/50]  eta: 0:02:54  loss: 0.7725 (0.7725)  acc1: 89.2000 (89.2000)  acc5: 98.4000 (98.4000)  time: 3.4953  data: 3.1653  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 1.0320 (1.0737)  acc1: 81.6000 (81.0909)  acc5: 96.4000 (95.9273)  time: 0.6003  data: 0.3135  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0811 (1.0942)  acc1: 78.4000 (79.7905)  acc5: 96.4000 (96.0952)  time: 0.2971  data: 0.0144  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2694 (1.2254)  acc1: 73.6000 (76.6839)  acc5: 93.6000 (94.2581)  time: 0.2835  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5274 (1.2994)  acc1: 68.8000 (74.6634)  acc5: 89.2000 (93.1317)  time: 0.2832  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4873 (1.3131)  acc1: 69.2000 (74.3440)  acc5: 90.0000 (92.9600)  time: 0.2830  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3552 s / it)
* Acc@1 74.344 Acc@5 92.916 loss 1.320
Accuracy of the model on the 50000 test images: 74.3%
Max accuracy: 74.43%
Epoch: [57]  [   0/2502]  eta: 1:54:18  lr: 0.003830  min_lr: 0.003830  loss: 3.7555 (3.7555)  weight_decay: 0.0500 (0.0500)  time: 2.7413  data: 2.1380  max mem: 53905
Epoch: [57]  [ 200/2502]  eta: 0:23:12  lr: 0.003829  min_lr: 0.003829  loss: 3.3151 (3.6337)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5405 (0.5511)  time: 0.6044  data: 0.0006  max mem: 53905
Epoch: [57]  [ 400/2502]  eta: 0:20:58  lr: 0.003829  min_lr: 0.003829  loss: 3.5679 (3.6530)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5514 (0.5440)  time: 0.5927  data: 0.0006  max mem: 53905
Epoch: [57]  [ 600/2502]  eta: 0:18:53  lr: 0.003828  min_lr: 0.003828  loss: 3.7313 (3.6426)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5153 (0.5464)  time: 0.5915  data: 0.0007  max mem: 53905
Epoch: [57]  [ 800/2502]  eta: 0:16:53  lr: 0.003827  min_lr: 0.003827  loss: 3.7502 (3.6585)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4881 (0.5414)  time: 0.5903  data: 0.0009  max mem: 53905
Epoch: [57]  [1000/2502]  eta: 0:14:52  lr: 0.003827  min_lr: 0.003827  loss: 3.8822 (3.6669)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5914 (0.5422)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [57]  [1200/2502]  eta: 0:12:53  lr: 0.003826  min_lr: 0.003826  loss: 3.6821 (3.6635)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5682 (0.5474)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [57]  [1400/2502]  eta: 0:10:54  lr: 0.003825  min_lr: 0.003825  loss: 3.9064 (3.6755)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5490 (0.5473)  time: 0.5948  data: 0.0005  max mem: 53905
Epoch: [57]  [1600/2502]  eta: 0:08:55  lr: 0.003824  min_lr: 0.003824  loss: 3.8031 (3.6756)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5229 (0.5462)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [57]  [1800/2502]  eta: 0:06:56  lr: 0.003824  min_lr: 0.003824  loss: 3.7387 (3.6772)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5940 (0.5487)  time: 0.5912  data: 0.0006  max mem: 53905
Epoch: [57]  [2000/2502]  eta: 0:04:57  lr: 0.003823  min_lr: 0.003823  loss: 4.0515 (3.6808)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5315 (0.5495)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [57]  [2200/2502]  eta: 0:02:59  lr: 0.003822  min_lr: 0.003822  loss: 3.7316 (3.6809)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5181 (0.5494)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [57]  [2400/2502]  eta: 0:01:00  lr: 0.003821  min_lr: 0.003821  loss: 3.6972 (3.6871)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4759 (0.5480)  time: 0.6080  data: 0.0005  max mem: 53905
Epoch: [57]  [2501/2502]  eta: 0:00:00  lr: 0.003821  min_lr: 0.003821  loss: 3.9115 (3.6839)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5155 (0.5475)  time: 0.5328  data: 0.0008  max mem: 53905
Epoch: [57] Total time: 0:24:43 (0.5928 s / it)
Averaged stats: lr: 0.003821  min_lr: 0.003821  loss: 3.9115 (3.6853)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5155 (0.5475)
Test:  [ 0/50]  eta: 0:03:14  loss: 0.8498 (0.8498)  acc1: 91.6000 (91.6000)  acc5: 97.6000 (97.6000)  time: 3.8921  data: 3.5634  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 1.1086 (1.1724)  acc1: 78.8000 (80.5818)  acc5: 96.0000 (95.6000)  time: 0.6118  data: 0.3244  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.1217 (1.1623)  acc1: 77.6000 (79.5619)  acc5: 96.0000 (95.9238)  time: 0.2839  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.2601 (1.2802)  acc1: 74.0000 (76.9290)  acc5: 94.4000 (93.9742)  time: 0.2840  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5374 (1.3513)  acc1: 69.6000 (74.9073)  acc5: 89.2000 (92.9756)  time: 0.2838  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5374 (1.3647)  acc1: 70.0000 (74.4320)  acc5: 90.0000 (92.7040)  time: 0.2835  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3585 s / it)
* Acc@1 74.360 Acc@5 92.740 loss 1.367
Accuracy of the model on the 50000 test images: 74.4%
Max accuracy: 74.43%
Epoch: [58]  [   0/2502]  eta: 2:00:02  lr: 0.003821  min_lr: 0.003821  loss: 3.7669 (3.7669)  weight_decay: 0.0500 (0.0500)  time: 2.8785  data: 1.8034  max mem: 53905
Epoch: [58]  [ 200/2502]  eta: 0:23:06  lr: 0.003820  min_lr: 0.003820  loss: 3.6753 (3.6766)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5086 (0.5408)  time: 0.5911  data: 0.0007  max mem: 53905
Epoch: [58]  [ 400/2502]  eta: 0:20:55  lr: 0.003820  min_lr: 0.003820  loss: 3.9039 (3.6757)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5015 (0.5373)  time: 0.5915  data: 0.0007  max mem: 53905
Epoch: [58]  [ 600/2502]  eta: 0:18:53  lr: 0.003819  min_lr: 0.003819  loss: 3.7616 (3.6682)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5146 (0.5446)  time: 0.5910  data: 0.0008  max mem: 53905
Epoch: [58]  [ 800/2502]  eta: 0:16:52  lr: 0.003818  min_lr: 0.003818  loss: 3.6330 (3.6620)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5401 (0.5461)  time: 0.5911  data: 0.0008  max mem: 53905
Epoch: [58]  [1000/2502]  eta: 0:14:52  lr: 0.003817  min_lr: 0.003817  loss: 3.8084 (3.6692)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5373 (0.5461)  time: 0.6007  data: 0.0008  max mem: 53905
Epoch: [58]  [1200/2502]  eta: 0:12:53  lr: 0.003817  min_lr: 0.003817  loss: 3.6158 (3.6750)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5400 (0.5488)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [58]  [1400/2502]  eta: 0:10:54  lr: 0.003816  min_lr: 0.003816  loss: 3.7463 (3.6708)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5455 (0.5515)  time: 0.5911  data: 0.0008  max mem: 53905
Epoch: [58]  [1600/2502]  eta: 0:08:55  lr: 0.003815  min_lr: 0.003815  loss: 3.5340 (3.6713)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5325 (0.5514)  time: 0.5904  data: 0.0007  max mem: 53905
Epoch: [58]  [1800/2502]  eta: 0:06:56  lr: 0.003814  min_lr: 0.003814  loss: 3.5633 (3.6688)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5337 (0.5504)  time: 0.5903  data: 0.0005  max mem: 53905
Epoch: [58]  [2000/2502]  eta: 0:04:57  lr: 0.003814  min_lr: 0.003814  loss: 3.7163 (3.6701)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5176 (0.5506)  time: 0.5915  data: 0.0007  max mem: 53905
Epoch: [58]  [2200/2502]  eta: 0:02:59  lr: 0.003813  min_lr: 0.003813  loss: 3.9250 (3.6693)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4991 (0.5494)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [58]  [2400/2502]  eta: 0:01:00  lr: 0.003812  min_lr: 0.003812  loss: 3.5619 (3.6692)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5304 (0.5507)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [58]  [2501/2502]  eta: 0:00:00  lr: 0.003812  min_lr: 0.003812  loss: 3.8828 (3.6723)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5379 (0.5496)  time: 0.5323  data: 0.0009  max mem: 53905
Epoch: [58] Total time: 0:24:42 (0.5925 s / it)
Averaged stats: lr: 0.003812  min_lr: 0.003812  loss: 3.8828 (3.6794)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5379 (0.5496)
Test:  [ 0/50]  eta: 0:02:41  loss: 0.7840 (0.7840)  acc1: 89.2000 (89.2000)  acc5: 99.2000 (99.2000)  time: 3.2387  data: 2.9132  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.1654 (1.1254)  acc1: 80.4000 (80.5818)  acc5: 96.4000 (96.0727)  time: 0.5560  data: 0.2690  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.1247 (1.1296)  acc1: 79.2000 (80.0952)  acc5: 96.0000 (95.9810)  time: 0.2860  data: 0.0026  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2908 (1.2495)  acc1: 74.8000 (77.1355)  acc5: 92.4000 (94.0903)  time: 0.2841  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5691 (1.3338)  acc1: 69.2000 (75.0439)  acc5: 89.6000 (93.0634)  time: 0.2838  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5691 (1.3510)  acc1: 68.4000 (74.5440)  acc5: 89.6000 (92.9120)  time: 0.2836  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3459 s / it)
* Acc@1 74.632 Acc@5 92.942 loss 1.351
Accuracy of the model on the 50000 test images: 74.6%
Max accuracy: 74.63%
Epoch: [59]  [   0/2502]  eta: 1:36:01  lr: 0.003812  min_lr: 0.003812  loss: 3.2275 (3.2275)  weight_decay: 0.0500 (0.0500)  time: 2.3028  data: 1.7102  max mem: 53905
Epoch: [59]  [ 200/2502]  eta: 0:23:05  lr: 0.003811  min_lr: 0.003811  loss: 3.8271 (3.6835)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5072 (0.5446)  time: 0.5956  data: 0.0008  max mem: 53905
Epoch: [59]  [ 400/2502]  eta: 0:20:54  lr: 0.003810  min_lr: 0.003810  loss: 3.8591 (3.6356)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5484 (0.5498)  time: 0.5904  data: 0.0006  max mem: 53905
Epoch: [59]  [ 600/2502]  eta: 0:18:51  lr: 0.003809  min_lr: 0.003809  loss: 3.5870 (3.6663)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5242 (0.5466)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [59]  [ 800/2502]  eta: 0:16:51  lr: 0.003809  min_lr: 0.003809  loss: 3.8278 (3.6640)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5236 (0.5481)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [59]  [1000/2502]  eta: 0:14:51  lr: 0.003808  min_lr: 0.003808  loss: 3.7359 (3.6571)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5129 (0.5504)  time: 0.5901  data: 0.0007  max mem: 53905
Epoch: [59]  [1200/2502]  eta: 0:12:52  lr: 0.003807  min_lr: 0.003807  loss: 3.7487 (3.6608)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5304 (0.5481)  time: 0.5905  data: 0.0005  max mem: 53905
Epoch: [59]  [1400/2502]  eta: 0:10:53  lr: 0.003806  min_lr: 0.003806  loss: 3.9012 (3.6637)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5420 (0.5458)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [59]  [1600/2502]  eta: 0:08:54  lr: 0.003805  min_lr: 0.003805  loss: 3.9574 (3.6759)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5638 (0.5484)  time: 0.5956  data: 0.0004  max mem: 53905
Epoch: [59]  [1800/2502]  eta: 0:06:56  lr: 0.003805  min_lr: 0.003805  loss: 3.4789 (3.6733)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5504 (0.5484)  time: 0.5991  data: 0.0005  max mem: 53905
Epoch: [59]  [2000/2502]  eta: 0:04:57  lr: 0.003804  min_lr: 0.003804  loss: 3.7219 (3.6712)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5460 (0.5474)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [59]  [2200/2502]  eta: 0:02:58  lr: 0.003803  min_lr: 0.003803  loss: 3.9647 (3.6811)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5414 (0.5482)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [59]  [2400/2502]  eta: 0:01:00  lr: 0.003802  min_lr: 0.003802  loss: 3.7936 (3.6833)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5148 (0.5494)  time: 0.5912  data: 0.0009  max mem: 53905
Epoch: [59]  [2501/2502]  eta: 0:00:00  lr: 0.003802  min_lr: 0.003802  loss: 3.7601 (3.6853)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5331  data: 0.0009  max mem: 53905
Epoch: [59] Total time: 0:24:42 (0.5925 s / it)
Averaged stats: lr: 0.003802  min_lr: 0.003802  loss: 3.7601 (3.6691)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)
Test:  [ 0/50]  eta: 0:02:58  loss: 0.8093 (0.8093)  acc1: 89.6000 (89.6000)  acc5: 98.8000 (98.8000)  time: 3.5685  data: 3.2570  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.0319 (1.1218)  acc1: 81.2000 (80.0727)  acc5: 96.4000 (95.8182)  time: 0.5817  data: 0.2965  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.1327 (1.1375)  acc1: 77.6000 (78.6476)  acc5: 96.0000 (95.8095)  time: 0.2832  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2850 (1.2562)  acc1: 72.4000 (76.1806)  acc5: 92.8000 (93.9097)  time: 0.2834  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5669 (1.3266)  acc1: 69.6000 (74.5366)  acc5: 89.2000 (92.8878)  time: 0.2831  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5080 (1.3470)  acc1: 69.6000 (73.8640)  acc5: 89.6000 (92.7200)  time: 0.2828  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3516 s / it)
* Acc@1 73.986 Acc@5 92.636 loss 1.353
Accuracy of the model on the 50000 test images: 74.0%
Max accuracy: 74.63%
Epoch: [60]  [   0/2502]  eta: 1:49:25  lr: 0.003802  min_lr: 0.003802  loss: 4.1259 (4.1259)  weight_decay: 0.0500 (0.0500)  time: 2.6240  data: 1.6970  max mem: 53905
Epoch: [60]  [ 200/2502]  eta: 0:23:05  lr: 0.003801  min_lr: 0.003801  loss: 3.4918 (3.6615)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5275 (0.5414)  time: 0.5908  data: 0.0007  max mem: 53905
Epoch: [60]  [ 400/2502]  eta: 0:20:55  lr: 0.003800  min_lr: 0.003800  loss: 3.7621 (3.6705)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5040 (0.5461)  time: 0.6068  data: 0.0006  max mem: 53905
Epoch: [60]  [ 600/2502]  eta: 0:18:51  lr: 0.003800  min_lr: 0.003800  loss: 3.7577 (3.6513)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5490 (0.5418)  time: 0.5903  data: 0.0005  max mem: 53905
Epoch: [60]  [ 800/2502]  eta: 0:16:51  lr: 0.003799  min_lr: 0.003799  loss: 3.4665 (3.6420)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5590 (0.5483)  time: 0.5905  data: 0.0006  max mem: 53905
Epoch: [60]  [1000/2502]  eta: 0:14:51  lr: 0.003798  min_lr: 0.003798  loss: 3.8282 (3.6510)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4974 (0.5474)  time: 0.5950  data: 0.0006  max mem: 53905
Epoch: [60]  [1200/2502]  eta: 0:12:52  lr: 0.003797  min_lr: 0.003797  loss: 3.8851 (3.6702)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5353 (0.5475)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [60]  [1400/2502]  eta: 0:10:53  lr: 0.003797  min_lr: 0.003797  loss: 3.7109 (3.6652)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5277 (0.5479)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [60]  [1600/2502]  eta: 0:08:54  lr: 0.003796  min_lr: 0.003796  loss: 3.7025 (3.6673)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5578 (0.5485)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [60]  [1800/2502]  eta: 0:06:56  lr: 0.003795  min_lr: 0.003795  loss: 3.7768 (3.6725)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5213 (0.5498)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [60]  [2000/2502]  eta: 0:04:57  lr: 0.003794  min_lr: 0.003794  loss: 3.9637 (3.6679)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5407 (0.5499)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [60]  [2200/2502]  eta: 0:02:59  lr: 0.003793  min_lr: 0.003793  loss: 3.3560 (3.6660)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5368 (0.5481)  time: 0.5981  data: 0.0005  max mem: 53905
Epoch: [60]  [2400/2502]  eta: 0:01:00  lr: 0.003793  min_lr: 0.003793  loss: 3.8454 (3.6695)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5246 (0.5495)  time: 0.5928  data: 0.0006  max mem: 53905
Epoch: [60]  [2501/2502]  eta: 0:00:00  lr: 0.003792  min_lr: 0.003792  loss: 4.0429 (3.6717)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5195 (0.5487)  time: 0.5341  data: 0.0008  max mem: 53905
Epoch: [60] Total time: 0:24:42 (0.5926 s / it)
Averaged stats: lr: 0.003792  min_lr: 0.003792  loss: 4.0429 (3.6760)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5195 (0.5487)
Test:  [ 0/50]  eta: 0:02:36  loss: 0.7532 (0.7532)  acc1: 90.8000 (90.8000)  acc5: 98.8000 (98.8000)  time: 3.1302  data: 2.8147  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.1019 (1.1047)  acc1: 80.4000 (79.7818)  acc5: 96.8000 (95.7091)  time: 0.5996  data: 0.3054  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.1349 (1.1401)  acc1: 78.0000 (79.0857)  acc5: 96.8000 (95.6381)  time: 0.3149  data: 0.0275  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2889 (1.2572)  acc1: 76.0000 (76.8000)  acc5: 93.2000 (94.1032)  time: 0.2829  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5956 (1.3417)  acc1: 68.4000 (74.7707)  acc5: 90.0000 (93.0342)  time: 0.2823  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5898 (1.3520)  acc1: 69.6000 (74.2400)  acc5: 90.0000 (92.8880)  time: 0.2820  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3542 s / it)
* Acc@1 74.386 Acc@5 92.648 loss 1.359
Accuracy of the model on the 50000 test images: 74.4%
Max accuracy: 74.63%
Epoch: [61]  [   0/2502]  eta: 1:50:22  lr: 0.003792  min_lr: 0.003792  loss: 2.3842 (2.3842)  weight_decay: 0.0500 (0.0500)  time: 2.6468  data: 2.0552  max mem: 53905
Epoch: [61]  [ 200/2502]  eta: 0:23:07  lr: 0.003791  min_lr: 0.003791  loss: 3.8047 (3.6081)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5729 (0.5813)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [61]  [ 400/2502]  eta: 0:20:55  lr: 0.003791  min_lr: 0.003791  loss: 3.4749 (3.6176)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4972 (0.5648)  time: 0.5902  data: 0.0004  max mem: 53905
Epoch: [61]  [ 600/2502]  eta: 0:18:52  lr: 0.003790  min_lr: 0.003790  loss: 3.6026 (3.6541)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5191 (0.5574)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [61]  [ 800/2502]  eta: 0:16:52  lr: 0.003789  min_lr: 0.003789  loss: 3.7244 (3.6638)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5045 (0.5521)  time: 0.5975  data: 0.0005  max mem: 53905
Epoch: [61]  [1000/2502]  eta: 0:14:52  lr: 0.003788  min_lr: 0.003788  loss: 3.8803 (3.6614)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5221 (0.5494)  time: 0.5902  data: 0.0004  max mem: 53905
Epoch: [61]  [1200/2502]  eta: 0:12:52  lr: 0.003787  min_lr: 0.003787  loss: 3.7077 (3.6758)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5436 (0.5502)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [61]  [1400/2502]  eta: 0:10:53  lr: 0.003787  min_lr: 0.003787  loss: 3.6276 (3.6684)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5565 (0.5490)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [61]  [1600/2502]  eta: 0:08:54  lr: 0.003786  min_lr: 0.003786  loss: 3.6958 (3.6699)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5283 (0.5485)  time: 0.5909  data: 0.0006  max mem: 53905
Epoch: [61]  [1800/2502]  eta: 0:06:56  lr: 0.003785  min_lr: 0.003785  loss: 3.6282 (3.6652)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5233 (0.5495)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [61]  [2000/2502]  eta: 0:04:57  lr: 0.003784  min_lr: 0.003784  loss: 3.7729 (3.6629)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5101 (0.5506)  time: 0.5905  data: 0.0005  max mem: 53905
Epoch: [61]  [2200/2502]  eta: 0:02:59  lr: 0.003783  min_lr: 0.003783  loss: 3.5945 (3.6632)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5431 (0.5508)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [61]  [2400/2502]  eta: 0:01:00  lr: 0.003782  min_lr: 0.003782  loss: 3.3192 (3.6583)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5425 (0.5504)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [61]  [2501/2502]  eta: 0:00:00  lr: 0.003782  min_lr: 0.003782  loss: 3.9316 (3.6610)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5044 (0.5486)  time: 0.5323  data: 0.0007  max mem: 53905
Epoch: [61] Total time: 0:24:42 (0.5925 s / it)
Averaged stats: lr: 0.003782  min_lr: 0.003782  loss: 3.9316 (3.6599)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5044 (0.5486)
Test:  [ 0/50]  eta: 0:02:31  loss: 0.7419 (0.7419)  acc1: 89.6000 (89.6000)  acc5: 98.4000 (98.4000)  time: 3.0264  data: 2.7019  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.0113 (1.0806)  acc1: 81.2000 (79.6727)  acc5: 96.4000 (95.9636)  time: 0.5645  data: 0.2676  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.1218 (1.1091)  acc1: 80.0000 (79.4667)  acc5: 96.4000 (96.1905)  time: 0.3071  data: 0.0123  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3113 (1.2242)  acc1: 74.8000 (76.9677)  acc5: 92.8000 (94.3871)  time: 0.2968  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5170 (1.2967)  acc1: 71.2000 (75.3756)  acc5: 89.6000 (93.2488)  time: 0.2973  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5131 (1.3118)  acc1: 71.2000 (74.7520)  acc5: 90.0000 (93.0560)  time: 0.2957  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3578 s / it)
* Acc@1 74.634 Acc@5 92.940 loss 1.320
Accuracy of the model on the 50000 test images: 74.6%
Max accuracy: 74.63%
Epoch: [62]  [   0/2502]  eta: 1:33:30  lr: 0.003782  min_lr: 0.003782  loss: 4.2197 (4.2197)  weight_decay: 0.0500 (0.0500)  time: 2.2423  data: 1.6508  max mem: 53905
Epoch: [62]  [ 200/2502]  eta: 0:22:58  lr: 0.003781  min_lr: 0.003781  loss: 3.7527 (3.6475)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5314 (0.5628)  time: 0.5904  data: 0.0005  max mem: 53905
Epoch: [62]  [ 400/2502]  eta: 0:20:52  lr: 0.003780  min_lr: 0.003780  loss: 3.4893 (3.6411)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5732 (0.5643)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [62]  [ 600/2502]  eta: 0:18:51  lr: 0.003780  min_lr: 0.003780  loss: 3.9120 (3.6484)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5110 (0.5541)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [62]  [ 800/2502]  eta: 0:16:50  lr: 0.003779  min_lr: 0.003779  loss: 3.6854 (3.6567)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5345 (0.5563)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [62]  [1000/2502]  eta: 0:14:51  lr: 0.003778  min_lr: 0.003778  loss: 3.7484 (3.6697)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5132 (0.5516)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [62]  [1200/2502]  eta: 0:12:52  lr: 0.003777  min_lr: 0.003777  loss: 3.5737 (3.6582)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5360 (0.5485)  time: 0.5905  data: 0.0005  max mem: 53905
Epoch: [62]  [1400/2502]  eta: 0:10:53  lr: 0.003776  min_lr: 0.003776  loss: 3.6145 (3.6563)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5395 (0.5484)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [62]  [1600/2502]  eta: 0:08:54  lr: 0.003776  min_lr: 0.003776  loss: 3.7327 (3.6580)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5212 (0.5484)  time: 0.6051  data: 0.0005  max mem: 53905
Epoch: [62]  [1800/2502]  eta: 0:06:56  lr: 0.003775  min_lr: 0.003775  loss: 3.4694 (3.6529)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5736 (0.5493)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [62]  [2000/2502]  eta: 0:04:57  lr: 0.003774  min_lr: 0.003774  loss: 3.9042 (3.6569)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5557 (0.5502)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [62]  [2200/2502]  eta: 0:02:59  lr: 0.003773  min_lr: 0.003773  loss: 4.0733 (3.6630)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5381 (0.5497)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [62]  [2400/2502]  eta: 0:01:00  lr: 0.003772  min_lr: 0.003772  loss: 3.8552 (3.6669)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5201 (0.5490)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [62]  [2501/2502]  eta: 0:00:00  lr: 0.003772  min_lr: 0.003772  loss: 3.5937 (3.6649)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5317 (0.5484)  time: 0.5324  data: 0.0007  max mem: 53905
Epoch: [62] Total time: 0:24:41 (0.5923 s / it)
Averaged stats: lr: 0.003772  min_lr: 0.003772  loss: 3.5937 (3.6639)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5317 (0.5484)
Test:  [ 0/50]  eta: 0:03:10  loss: 0.6665 (0.6665)  acc1: 90.8000 (90.8000)  acc5: 97.6000 (97.6000)  time: 3.8062  data: 3.4846  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.9819 (1.0176)  acc1: 81.6000 (80.5818)  acc5: 96.8000 (96.1091)  time: 0.6033  data: 0.3172  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0149 (1.0493)  acc1: 78.4000 (79.5238)  acc5: 96.8000 (96.1905)  time: 0.2832  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2620 (1.1852)  acc1: 74.4000 (77.1226)  acc5: 92.8000 (94.5806)  time: 0.2833  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5275 (1.2761)  acc1: 68.4000 (75.1707)  acc5: 89.6000 (93.2390)  time: 0.2831  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5275 (1.2983)  acc1: 68.8000 (74.7520)  acc5: 89.6000 (93.0480)  time: 0.2828  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3565 s / it)
* Acc@1 74.724 Acc@5 92.944 loss 1.305
Accuracy of the model on the 50000 test images: 74.7%
Max accuracy: 74.72%
Epoch: [63]  [   0/2502]  eta: 1:29:31  lr: 0.003772  min_lr: 0.003772  loss: 3.2991 (3.2991)  weight_decay: 0.0500 (0.0500)  time: 2.1470  data: 1.5571  max mem: 53905
Epoch: [63]  [ 200/2502]  eta: 0:23:10  lr: 0.003771  min_lr: 0.003771  loss: 3.4915 (3.5902)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5680 (0.5801)  time: 0.6000  data: 0.0004  max mem: 53905
Epoch: [63]  [ 400/2502]  eta: 0:20:56  lr: 0.003770  min_lr: 0.003770  loss: 3.5667 (3.6106)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6187 (0.5751)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [63]  [ 600/2502]  eta: 0:18:53  lr: 0.003769  min_lr: 0.003769  loss: 3.5805 (3.6197)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5562 (0.5631)  time: 0.6014  data: 0.0005  max mem: 53905
Epoch: [63]  [ 800/2502]  eta: 0:16:53  lr: 0.003768  min_lr: 0.003768  loss: 3.7933 (3.6314)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4952 (nan)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [63]  [1000/2502]  eta: 0:14:53  lr: 0.003768  min_lr: 0.003768  loss: 3.6575 (3.6339)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5450 (nan)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [63]  [1200/2502]  eta: 0:12:53  lr: 0.003767  min_lr: 0.003767  loss: 3.7264 (3.6370)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5182 (nan)  time: 0.6004  data: 0.0005  max mem: 53905
Epoch: [63]  [1400/2502]  eta: 0:10:54  lr: 0.003766  min_lr: 0.003766  loss: 3.7982 (3.6271)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6030 (nan)  time: 0.5902  data: 0.0005  max mem: 53905
Epoch: [63]  [1600/2502]  eta: 0:08:55  lr: 0.003765  min_lr: 0.003765  loss: 3.6505 (3.6314)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5222 (nan)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [63]  [1800/2502]  eta: 0:06:56  lr: 0.003764  min_lr: 0.003764  loss: 3.8611 (3.6310)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5121 (nan)  time: 0.5903  data: 0.0005  max mem: 53905
Epoch: [63]  [2000/2502]  eta: 0:04:57  lr: 0.003763  min_lr: 0.003763  loss: 3.8070 (3.6389)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5464 (nan)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [63]  [2200/2502]  eta: 0:02:59  lr: 0.003763  min_lr: 0.003763  loss: 3.5648 (3.6356)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5628 (nan)  time: 0.5912  data: 0.0006  max mem: 53905
Epoch: [63]  [2400/2502]  eta: 0:01:00  lr: 0.003762  min_lr: 0.003762  loss: 3.5315 (3.6370)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5343 (nan)  time: 0.6010  data: 0.0004  max mem: 53905
Epoch: [63]  [2501/2502]  eta: 0:00:00  lr: 0.003761  min_lr: 0.003761  loss: 3.7935 (3.6395)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5245 (nan)  time: 0.5325  data: 0.0009  max mem: 53905
Epoch: [63] Total time: 0:24:43 (0.5927 s / it)
Averaged stats: lr: 0.003761  min_lr: 0.003761  loss: 3.7935 (3.6469)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5245 (nan)
Test:  [ 0/50]  eta: 0:03:18  loss: 0.8419 (0.8419)  acc1: 88.0000 (88.0000)  acc5: 98.0000 (98.0000)  time: 3.9731  data: 3.6408  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 1.0183 (1.0567)  acc1: 80.8000 (81.3091)  acc5: 96.8000 (96.2909)  time: 0.6184  data: 0.3314  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0187 (1.0753)  acc1: 80.4000 (80.2286)  acc5: 96.4000 (96.3429)  time: 0.2832  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.2695 (1.2049)  acc1: 73.6000 (77.5484)  acc5: 93.2000 (94.4774)  time: 0.2835  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5252 (1.2826)  acc1: 70.4000 (75.5024)  acc5: 89.6000 (93.3463)  time: 0.2833  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4970 (1.2973)  acc1: 70.4000 (75.1280)  acc5: 90.4000 (93.2400)  time: 0.2832  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3606 s / it)
* Acc@1 74.918 Acc@5 93.152 loss 1.306
Accuracy of the model on the 50000 test images: 74.9%
Max accuracy: 74.92%
Epoch: [64]  [   0/2502]  eta: 1:40:36  lr: 0.003761  min_lr: 0.003761  loss: 2.8427 (2.8427)  weight_decay: 0.0500 (0.0500)  time: 2.4128  data: 1.8202  max mem: 53905
Epoch: [64]  [ 200/2502]  eta: 0:23:05  lr: 0.003760  min_lr: 0.003760  loss: 3.6423 (3.6188)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5279 (0.5478)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [64]  [ 400/2502]  eta: 0:20:56  lr: 0.003760  min_lr: 0.003760  loss: 3.8039 (3.6362)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5286 (0.5608)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [64]  [ 600/2502]  eta: 0:18:53  lr: 0.003759  min_lr: 0.003759  loss: 3.9469 (3.6631)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5324 (0.5580)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [64]  [ 800/2502]  eta: 0:16:52  lr: 0.003758  min_lr: 0.003758  loss: 3.9437 (3.6658)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5507 (0.5605)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [64]  [1000/2502]  eta: 0:14:53  lr: 0.003757  min_lr: 0.003757  loss: 3.9189 (3.6637)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5587 (0.5587)  time: 0.5989  data: 0.0004  max mem: 53905
Epoch: [64]  [1200/2502]  eta: 0:12:53  lr: 0.003756  min_lr: 0.003756  loss: 3.4560 (3.6621)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5516 (0.5587)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [64]  [1400/2502]  eta: 0:10:54  lr: 0.003755  min_lr: 0.003755  loss: 3.5225 (3.6597)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5516 (0.5590)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [64]  [1600/2502]  eta: 0:08:55  lr: 0.003754  min_lr: 0.003754  loss: 3.8252 (3.6599)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5668 (0.5604)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [64]  [1800/2502]  eta: 0:06:56  lr: 0.003754  min_lr: 0.003754  loss: 3.6581 (3.6541)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5745 (0.5648)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [64]  [2000/2502]  eta: 0:04:57  lr: 0.003753  min_lr: 0.003753  loss: 3.5441 (3.6571)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5250 (0.5625)  time: 0.5964  data: 0.0004  max mem: 53905
Epoch: [64]  [2200/2502]  eta: 0:02:59  lr: 0.003752  min_lr: 0.003752  loss: 3.5981 (3.6569)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6078 (0.5627)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [64]  [2400/2502]  eta: 0:01:00  lr: 0.003751  min_lr: 0.003751  loss: 3.7753 (3.6563)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5390 (0.5631)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [64]  [2501/2502]  eta: 0:00:00  lr: 0.003751  min_lr: 0.003751  loss: 3.4657 (3.6574)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5311 (0.5621)  time: 0.5328  data: 0.0007  max mem: 53905
Epoch: [64] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.003751  min_lr: 0.003751  loss: 3.4657 (3.6453)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5311 (0.5621)
Test:  [ 0/50]  eta: 0:03:15  loss: 0.7345 (0.7345)  acc1: 89.6000 (89.6000)  acc5: 98.8000 (98.8000)  time: 3.9161  data: 3.6126  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 1.0238 (1.0856)  acc1: 80.4000 (80.3273)  acc5: 95.6000 (96.0364)  time: 0.6133  data: 0.3288  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0586 (1.0868)  acc1: 78.0000 (79.3524)  acc5: 96.0000 (96.0191)  time: 0.2832  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1472 (1.1939)  acc1: 74.8000 (77.0581)  acc5: 93.2000 (94.2194)  time: 0.2833  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4978 (1.2676)  acc1: 69.6000 (75.1902)  acc5: 90.0000 (93.2000)  time: 0.2830  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4694 (1.2875)  acc1: 69.6000 (74.7200)  acc5: 90.4000 (93.0480)  time: 0.2828  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3586 s / it)
* Acc@1 74.938 Acc@5 93.010 loss 1.288
Accuracy of the model on the 50000 test images: 74.9%
Max accuracy: 74.94%
Epoch: [65]  [   0/2502]  eta: 1:45:52  lr: 0.003751  min_lr: 0.003751  loss: 3.7016 (3.7016)  weight_decay: 0.0500 (0.0500)  time: 2.5388  data: 1.9337  max mem: 53905
Epoch: [65]  [ 200/2502]  eta: 0:23:08  lr: 0.003750  min_lr: 0.003750  loss: 3.8061 (3.6577)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5285 (0.5223)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [65]  [ 400/2502]  eta: 0:20:55  lr: 0.003749  min_lr: 0.003749  loss: 3.3793 (3.6445)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5388 (0.5483)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [65]  [ 600/2502]  eta: 0:18:53  lr: 0.003748  min_lr: 0.003748  loss: 3.9140 (3.6268)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5375 (0.5508)  time: 0.6028  data: 0.0005  max mem: 53905
Epoch: [65]  [ 800/2502]  eta: 0:16:52  lr: 0.003747  min_lr: 0.003747  loss: 3.7206 (3.6296)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5452 (0.5560)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [65]  [1000/2502]  eta: 0:14:52  lr: 0.003746  min_lr: 0.003746  loss: 3.7319 (3.6335)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5458 (0.5571)  time: 0.5900  data: 0.0004  max mem: 53905
Epoch: [65]  [1200/2502]  eta: 0:12:52  lr: 0.003745  min_lr: 0.003745  loss: 3.3423 (3.6361)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5347 (0.5585)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [65]  [1400/2502]  eta: 0:10:53  lr: 0.003744  min_lr: 0.003744  loss: 3.8592 (3.6455)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5272 (0.5593)  time: 0.5901  data: 0.0004  max mem: 53905
Epoch: [65]  [1600/2502]  eta: 0:08:54  lr: 0.003744  min_lr: 0.003744  loss: 3.5691 (3.6396)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5094 (0.5575)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [65]  [1800/2502]  eta: 0:06:56  lr: 0.003743  min_lr: 0.003743  loss: 3.6642 (3.6467)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5393 (0.5575)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [65]  [2000/2502]  eta: 0:04:57  lr: 0.003742  min_lr: 0.003742  loss: 3.6069 (3.6501)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5506 (0.5576)  time: 0.5924  data: 0.0006  max mem: 53905
Epoch: [65]  [2200/2502]  eta: 0:02:59  lr: 0.003741  min_lr: 0.003741  loss: 3.9021 (3.6490)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5537 (0.5599)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [65]  [2400/2502]  eta: 0:01:00  lr: 0.003740  min_lr: 0.003740  loss: 3.8316 (3.6472)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5457 (0.5602)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [65]  [2501/2502]  eta: 0:00:00  lr: 0.003740  min_lr: 0.003740  loss: 3.4437 (3.6465)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5624 (0.5606)  time: 0.5400  data: 0.0007  max mem: 53905
Epoch: [65] Total time: 0:24:42 (0.5926 s / it)
Averaged stats: lr: 0.003740  min_lr: 0.003740  loss: 3.4437 (3.6392)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5624 (0.5606)
Test:  [ 0/50]  eta: 0:02:44  loss: 0.7199 (0.7199)  acc1: 90.4000 (90.4000)  acc5: 97.6000 (97.6000)  time: 3.2994  data: 2.9712  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.9621 (1.0103)  acc1: 81.6000 (81.5636)  acc5: 96.4000 (95.8909)  time: 0.5572  data: 0.2706  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.0151 (1.0297)  acc1: 78.4000 (80.3238)  acc5: 96.4000 (96.1524)  time: 0.2832  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1825 (1.1476)  acc1: 76.4000 (77.8452)  acc5: 93.6000 (94.3097)  time: 0.2834  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4542 (1.2203)  acc1: 71.6000 (75.9122)  acc5: 90.0000 (93.2976)  time: 0.2831  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4245 (1.2327)  acc1: 70.8000 (75.3920)  acc5: 90.8000 (93.2000)  time: 0.2829  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3473 s / it)
* Acc@1 75.264 Acc@5 93.218 loss 1.237
Accuracy of the model on the 50000 test images: 75.3%
Max accuracy: 75.26%
Epoch: [66]  [   0/2502]  eta: 1:41:32  lr: 0.003740  min_lr: 0.003740  loss: 3.3966 (3.3966)  weight_decay: 0.0500 (0.0500)  time: 2.4351  data: 1.8447  max mem: 53905
Epoch: [66]  [ 200/2502]  eta: 0:23:01  lr: 0.003739  min_lr: 0.003739  loss: 3.6584 (3.5520)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5369 (0.5399)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [66]  [ 400/2502]  eta: 0:20:54  lr: 0.003738  min_lr: 0.003738  loss: 3.8283 (3.6225)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5392 (0.5529)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [66]  [ 600/2502]  eta: 0:18:51  lr: 0.003737  min_lr: 0.003737  loss: 3.1575 (3.6317)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5144 (0.5492)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [66]  [ 800/2502]  eta: 0:16:51  lr: 0.003736  min_lr: 0.003736  loss: 3.8170 (3.6333)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5500 (0.5460)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [66]  [1000/2502]  eta: 0:14:52  lr: 0.003735  min_lr: 0.003735  loss: 3.4888 (3.6254)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5228 (0.5502)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [66]  [1200/2502]  eta: 0:12:52  lr: 0.003734  min_lr: 0.003734  loss: 3.6137 (3.6231)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5387 (0.5517)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [66]  [1400/2502]  eta: 0:10:53  lr: 0.003733  min_lr: 0.003733  loss: 3.8838 (3.6234)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5939  data: 0.0005  max mem: 53905
Epoch: [66]  [1600/2502]  eta: 0:08:54  lr: 0.003732  min_lr: 0.003732  loss: 3.6618 (3.6259)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5543 (nan)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [66]  [1800/2502]  eta: 0:06:56  lr: 0.003732  min_lr: 0.003732  loss: 3.8973 (3.6284)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5726 (nan)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [66]  [2000/2502]  eta: 0:04:57  lr: 0.003731  min_lr: 0.003731  loss: 3.6613 (3.6317)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5650 (nan)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [66]  [2200/2502]  eta: 0:02:59  lr: 0.003730  min_lr: 0.003730  loss: 3.6122 (3.6312)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5290 (nan)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [66]  [2400/2502]  eta: 0:01:00  lr: 0.003729  min_lr: 0.003729  loss: 3.5451 (3.6331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5194 (nan)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [66]  [2501/2502]  eta: 0:00:00  lr: 0.003728  min_lr: 0.003728  loss: 3.8686 (3.6359)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5228 (nan)  time: 0.5321  data: 0.0006  max mem: 53905
Epoch: [66] Total time: 0:24:42 (0.5924 s / it)
Averaged stats: lr: 0.003728  min_lr: 0.003728  loss: 3.8686 (3.6311)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5228 (nan)
Test:  [ 0/50]  eta: 0:04:25  loss: 0.6414 (0.6414)  acc1: 92.0000 (92.0000)  acc5: 98.8000 (98.8000)  time: 5.3092  data: 4.9489  max mem: 53905
Test:  [10/50]  eta: 0:00:34  loss: 0.8976 (0.9937)  acc1: 80.8000 (81.4909)  acc5: 97.2000 (96.7636)  time: 0.8553  data: 0.5577  max mem: 53905
Test:  [20/50]  eta: 0:00:19  loss: 0.9679 (1.0161)  acc1: 79.2000 (80.8952)  acc5: 96.8000 (96.6095)  time: 0.4295  data: 0.1357  max mem: 53905
Test:  [30/50]  eta: 0:00:10  loss: 1.1924 (1.1360)  acc1: 76.0000 (78.4645)  acc5: 93.2000 (94.7742)  time: 0.3665  data: 0.0768  max mem: 53905
Test:  [40/50]  eta: 0:00:04  loss: 1.4638 (1.2190)  acc1: 72.0000 (76.4781)  acc5: 90.0000 (93.7073)  time: 0.2830  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4159 (1.2354)  acc1: 72.8000 (75.8640)  acc5: 90.0000 (93.4400)  time: 0.2820  data: 0.0002  max mem: 53905
Test: Total time: 0:00:22 (0.4449 s / it)
* Acc@1 75.496 Acc@5 93.312 loss 1.237
Accuracy of the model on the 50000 test images: 75.5%
Max accuracy: 75.50%
Epoch: [67]  [   0/2502]  eta: 1:29:03  lr: 0.003728  min_lr: 0.003728  loss: 3.5905 (3.5905)  weight_decay: 0.0500 (0.0500)  time: 2.1356  data: 1.5431  max mem: 53905
Epoch: [67]  [ 200/2502]  eta: 0:23:03  lr: 0.003727  min_lr: 0.003727  loss: 3.8236 (3.6108)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5744 (0.5571)  time: 0.5909  data: 0.0006  max mem: 53905
Epoch: [67]  [ 400/2502]  eta: 0:20:52  lr: 0.003727  min_lr: 0.003727  loss: 3.8628 (3.6268)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5156 (0.5448)  time: 0.5909  data: 0.0008  max mem: 53905
Epoch: [67]  [ 600/2502]  eta: 0:18:52  lr: 0.003726  min_lr: 0.003726  loss: 3.7523 (3.6400)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5066 (0.5443)  time: 0.5910  data: 0.0006  max mem: 53905
Epoch: [67]  [ 800/2502]  eta: 0:16:51  lr: 0.003725  min_lr: 0.003725  loss: 3.9187 (3.6368)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5722 (0.5514)  time: 0.5908  data: 0.0006  max mem: 53905
Epoch: [67]  [1000/2502]  eta: 0:14:51  lr: 0.003724  min_lr: 0.003724  loss: 3.6865 (3.6405)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5201 (0.5519)  time: 0.5910  data: 0.0006  max mem: 53905
Epoch: [67]  [1200/2502]  eta: 0:12:52  lr: 0.003723  min_lr: 0.003723  loss: 3.8662 (3.6383)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5679 (0.5536)  time: 0.5906  data: 0.0006  max mem: 53905
Epoch: [67]  [1400/2502]  eta: 0:10:53  lr: 0.003722  min_lr: 0.003722  loss: 3.7228 (3.6392)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5526 (0.5534)  time: 0.5904  data: 0.0005  max mem: 53905
Epoch: [67]  [1600/2502]  eta: 0:08:54  lr: 0.003721  min_lr: 0.003721  loss: 3.4136 (3.6403)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5389 (0.5541)  time: 0.5913  data: 0.0006  max mem: 53905
Epoch: [67]  [1800/2502]  eta: 0:06:56  lr: 0.003720  min_lr: 0.003720  loss: 3.8062 (3.6401)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5588 (0.5580)  time: 0.5980  data: 0.0006  max mem: 53905
Epoch: [67]  [2000/2502]  eta: 0:04:57  lr: 0.003719  min_lr: 0.003719  loss: 3.2587 (3.6405)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5130 (0.5559)  time: 0.5912  data: 0.0008  max mem: 53905
Epoch: [67]  [2200/2502]  eta: 0:02:59  lr: 0.003718  min_lr: 0.003718  loss: 3.8444 (3.6402)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5227 (0.5565)  time: 0.5921  data: 0.0007  max mem: 53905
Epoch: [67]  [2400/2502]  eta: 0:01:00  lr: 0.003717  min_lr: 0.003717  loss: 3.7833 (3.6385)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5179 (0.5567)  time: 0.5912  data: 0.0006  max mem: 53905
Epoch: [67]  [2501/2502]  eta: 0:00:00  lr: 0.003717  min_lr: 0.003717  loss: 3.8158 (3.6403)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5023 (0.5550)  time: 0.5324  data: 0.0007  max mem: 53905
Epoch: [67] Total time: 0:24:42 (0.5926 s / it)
Averaged stats: lr: 0.003717  min_lr: 0.003717  loss: 3.8158 (3.6322)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5023 (0.5550)
Test:  [ 0/50]  eta: 0:03:11  loss: 0.7418 (0.7418)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 3.8286  data: 3.5192  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 1.0311 (1.0853)  acc1: 79.6000 (81.2000)  acc5: 96.8000 (96.0364)  time: 0.6052  data: 0.3204  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0842 (1.1060)  acc1: 79.2000 (80.0952)  acc5: 96.8000 (96.0381)  time: 0.2831  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3406 (1.2474)  acc1: 75.6000 (77.4194)  acc5: 94.4000 (94.3355)  time: 0.2834  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5834 (1.3344)  acc1: 68.8000 (75.3073)  acc5: 89.6000 (93.1707)  time: 0.2831  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5773 (1.3487)  acc1: 68.8000 (74.8000)  acc5: 88.8000 (92.9920)  time: 0.2828  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3572 s / it)
* Acc@1 74.772 Acc@5 93.026 loss 1.349
Accuracy of the model on the 50000 test images: 74.8%
Max accuracy: 75.50%
Epoch: [68]  [   0/2502]  eta: 1:53:26  lr: 0.003717  min_lr: 0.003717  loss: 4.1096 (4.1096)  weight_decay: 0.0500 (0.0500)  time: 2.7203  data: 2.1134  max mem: 53905
Epoch: [68]  [ 200/2502]  eta: 0:23:06  lr: 0.003716  min_lr: 0.003716  loss: 3.5547 (3.6197)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5461 (0.5374)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [68]  [ 400/2502]  eta: 0:20:56  lr: 0.003715  min_lr: 0.003715  loss: 3.7533 (3.6225)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5622 (0.5388)  time: 0.5908  data: 0.0006  max mem: 53905
Epoch: [68]  [ 600/2502]  eta: 0:18:52  lr: 0.003714  min_lr: 0.003714  loss: 3.4988 (3.6258)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5736 (nan)  time: 0.5908  data: 0.0007  max mem: 53905
Epoch: [68]  [ 800/2502]  eta: 0:16:52  lr: 0.003713  min_lr: 0.003713  loss: 3.8456 (3.6278)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5894 (nan)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [68]  [1000/2502]  eta: 0:14:52  lr: 0.003712  min_lr: 0.003712  loss: 3.7708 (3.6245)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5243 (nan)  time: 0.5908  data: 0.0006  max mem: 53905
Epoch: [68]  [1200/2502]  eta: 0:12:52  lr: 0.003711  min_lr: 0.003711  loss: 3.6012 (3.6263)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5946 (nan)  time: 0.5907  data: 0.0007  max mem: 53905
Epoch: [68]  [1400/2502]  eta: 0:10:53  lr: 0.003710  min_lr: 0.003710  loss: 3.7214 (3.6355)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5536 (nan)  time: 0.5909  data: 0.0006  max mem: 53905
Epoch: [68]  [1600/2502]  eta: 0:08:55  lr: 0.003710  min_lr: 0.003710  loss: 3.6245 (3.6350)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6035 (nan)  time: 0.5901  data: 0.0006  max mem: 53905
Epoch: [68]  [1800/2502]  eta: 0:06:56  lr: 0.003709  min_lr: 0.003709  loss: 3.6961 (3.6314)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5458 (nan)  time: 0.5901  data: 0.0007  max mem: 53905
Epoch: [68]  [2000/2502]  eta: 0:04:57  lr: 0.003708  min_lr: 0.003708  loss: 3.6382 (3.6311)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5155 (nan)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [68]  [2200/2502]  eta: 0:02:58  lr: 0.003707  min_lr: 0.003707  loss: 3.8035 (3.6282)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5396 (nan)  time: 0.5907  data: 0.0006  max mem: 53905
Epoch: [68]  [2400/2502]  eta: 0:01:00  lr: 0.003706  min_lr: 0.003706  loss: 3.6516 (3.6299)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5834 (nan)  time: 0.5928  data: 0.0006  max mem: 53905
Epoch: [68]  [2501/2502]  eta: 0:00:00  lr: 0.003705  min_lr: 0.003705  loss: 3.6432 (3.6350)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5251 (nan)  time: 0.5316  data: 0.0009  max mem: 53905
Epoch: [68] Total time: 0:24:41 (0.5922 s / it)
Averaged stats: lr: 0.003705  min_lr: 0.003705  loss: 3.6432 (3.6310)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5251 (nan)
Test:  [ 0/50]  eta: 0:02:34  loss: 0.6783 (0.6783)  acc1: 90.4000 (90.4000)  acc5: 99.2000 (99.2000)  time: 3.0981  data: 2.7731  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 1.0779 (1.0389)  acc1: 80.0000 (80.4000)  acc5: 96.0000 (95.7818)  time: 0.5492  data: 0.2645  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.0779 (1.0627)  acc1: 78.8000 (80.0000)  acc5: 96.0000 (95.9429)  time: 0.2878  data: 0.0071  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2066 (1.1617)  acc1: 74.8000 (77.7161)  acc5: 92.4000 (94.4903)  time: 0.2813  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4184 (1.2308)  acc1: 72.0000 (76.2732)  acc5: 90.4000 (93.4244)  time: 0.2811  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3863 (1.2484)  acc1: 71.6000 (75.5840)  acc5: 90.4000 (93.2720)  time: 0.2808  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3424 s / it)
* Acc@1 75.274 Acc@5 93.264 loss 1.255
Accuracy of the model on the 50000 test images: 75.3%
Max accuracy: 75.50%
Epoch: [69]  [   0/2502]  eta: 1:50:19  lr: 0.003705  min_lr: 0.003705  loss: 4.1393 (4.1393)  weight_decay: 0.0500 (0.0500)  time: 2.6456  data: 1.5352  max mem: 53905
Epoch: [69]  [ 200/2502]  eta: 0:23:06  lr: 0.003704  min_lr: 0.003704  loss: 3.4171 (3.5727)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5640 (0.5828)  time: 0.5912  data: 0.0007  max mem: 53905
Epoch: [69]  [ 400/2502]  eta: 0:20:54  lr: 0.003703  min_lr: 0.003703  loss: 3.7737 (3.6110)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4931 (0.5632)  time: 0.5903  data: 0.0008  max mem: 53905
Epoch: [69]  [ 600/2502]  eta: 0:18:52  lr: 0.003703  min_lr: 0.003703  loss: 3.5977 (3.5896)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5619 (0.5652)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [69]  [ 800/2502]  eta: 0:16:51  lr: 0.003702  min_lr: 0.003702  loss: 3.7744 (3.6074)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5351 (0.5592)  time: 0.5908  data: 0.0007  max mem: 53905
Epoch: [69]  [1000/2502]  eta: 0:14:51  lr: 0.003701  min_lr: 0.003701  loss: 4.0038 (3.6144)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5201 (0.5586)  time: 0.5905  data: 0.0006  max mem: 53905
Epoch: [69]  [1200/2502]  eta: 0:12:52  lr: 0.003700  min_lr: 0.003700  loss: 3.7457 (3.6192)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5966 (0.5625)  time: 0.5973  data: 0.0006  max mem: 53905
Epoch: [69]  [1400/2502]  eta: 0:10:53  lr: 0.003699  min_lr: 0.003699  loss: 3.6872 (3.6240)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5188 (0.5600)  time: 0.5910  data: 0.0006  max mem: 53905
Epoch: [69]  [1600/2502]  eta: 0:08:55  lr: 0.003698  min_lr: 0.003698  loss: 3.8489 (3.6298)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5801 (0.5624)  time: 0.5954  data: 0.0006  max mem: 53905
Epoch: [69]  [1800/2502]  eta: 0:06:56  lr: 0.003697  min_lr: 0.003697  loss: 3.5893 (3.6299)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5242 (0.5613)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [69]  [2000/2502]  eta: 0:04:57  lr: 0.003696  min_lr: 0.003696  loss: 3.7056 (3.6321)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5125 (0.5600)  time: 0.5915  data: 0.0006  max mem: 53905
Epoch: [69]  [2200/2502]  eta: 0:02:59  lr: 0.003695  min_lr: 0.003695  loss: 3.3451 (3.6317)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5352 (0.5582)  time: 0.6011  data: 0.0007  max mem: 53905
Epoch: [69]  [2400/2502]  eta: 0:01:00  lr: 0.003694  min_lr: 0.003694  loss: 3.5829 (3.6299)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5416 (0.5588)  time: 0.5913  data: 0.0006  max mem: 53905
Epoch: [69]  [2501/2502]  eta: 0:00:00  lr: 0.003694  min_lr: 0.003694  loss: 3.3523 (3.6271)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5347 (0.5580)  time: 0.5336  data: 0.0008  max mem: 53905
Epoch: [69] Total time: 0:24:43 (0.5928 s / it)
Averaged stats: lr: 0.003694  min_lr: 0.003694  loss: 3.3523 (3.6200)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5347 (0.5580)
Test:  [ 0/50]  eta: 0:02:45  loss: 0.6416 (0.6416)  acc1: 90.4000 (90.4000)  acc5: 98.0000 (98.0000)  time: 3.3169  data: 2.9964  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.9564 (0.9642)  acc1: 81.2000 (80.6182)  acc5: 96.4000 (95.6727)  time: 0.5587  data: 0.2728  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9564 (0.9790)  acc1: 78.4000 (79.8667)  acc5: 96.0000 (95.8857)  time: 0.2831  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1707 (1.0901)  acc1: 75.6000 (77.7806)  acc5: 92.4000 (94.2968)  time: 0.2833  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3703 (1.1604)  acc1: 71.2000 (75.8829)  acc5: 90.8000 (93.4439)  time: 0.2831  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3703 (1.1800)  acc1: 70.0000 (75.4240)  acc5: 90.8000 (93.1520)  time: 0.2828  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3468 s / it)
* Acc@1 75.208 Acc@5 93.144 loss 1.187
Accuracy of the model on the 50000 test images: 75.2%
Max accuracy: 75.50%
Epoch: [70]  [   0/2502]  eta: 1:49:12  lr: 0.003694  min_lr: 0.003694  loss: 3.0093 (3.0093)  weight_decay: 0.0500 (0.0500)  time: 2.6189  data: 1.9538  max mem: 53905
Epoch: [70]  [ 200/2502]  eta: 0:23:04  lr: 0.003693  min_lr: 0.003693  loss: 3.7938 (3.5392)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5385 (0.5356)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [70]  [ 400/2502]  eta: 0:20:56  lr: 0.003692  min_lr: 0.003692  loss: 3.3639 (3.5608)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5178 (0.5433)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [70]  [ 600/2502]  eta: 0:18:53  lr: 0.003691  min_lr: 0.003691  loss: 3.8867 (3.5882)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5379 (0.5430)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [70]  [ 800/2502]  eta: 0:16:52  lr: 0.003690  min_lr: 0.003690  loss: 3.6764 (3.6024)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5373 (0.5459)  time: 0.5954  data: 0.0006  max mem: 53905
Epoch: [70]  [1000/2502]  eta: 0:14:52  lr: 0.003689  min_lr: 0.003689  loss: 3.7730 (3.6148)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5384 (0.5479)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [70]  [1200/2502]  eta: 0:12:53  lr: 0.003688  min_lr: 0.003688  loss: 3.6276 (3.6162)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5226 (0.5514)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [70]  [1400/2502]  eta: 0:10:54  lr: 0.003687  min_lr: 0.003687  loss: 3.8345 (3.6134)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5398 (0.5537)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [70]  [1600/2502]  eta: 0:08:55  lr: 0.003686  min_lr: 0.003686  loss: 3.7601 (3.6122)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5781 (0.5567)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [70]  [1800/2502]  eta: 0:06:56  lr: 0.003685  min_lr: 0.003685  loss: 3.3954 (3.6087)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5380 (0.5561)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [70]  [2000/2502]  eta: 0:04:57  lr: 0.003684  min_lr: 0.003684  loss: 3.6130 (3.6042)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5559 (0.5553)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [70]  [2200/2502]  eta: 0:02:59  lr: 0.003683  min_lr: 0.003683  loss: 3.7271 (3.6016)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5302 (0.5552)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [70]  [2400/2502]  eta: 0:01:00  lr: 0.003682  min_lr: 0.003682  loss: 3.7142 (3.5987)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5931 (0.5584)  time: 0.5962  data: 0.0006  max mem: 53905
Epoch: [70]  [2501/2502]  eta: 0:00:00  lr: 0.003682  min_lr: 0.003682  loss: 3.9003 (3.6017)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5502 (0.5586)  time: 0.5324  data: 0.0009  max mem: 53905
Epoch: [70] Total time: 0:24:42 (0.5926 s / it)
Averaged stats: lr: 0.003682  min_lr: 0.003682  loss: 3.9003 (3.6079)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5502 (0.5586)
Test:  [ 0/50]  eta: 0:02:25  loss: 0.8525 (0.8525)  acc1: 90.8000 (90.8000)  acc5: 99.6000 (99.6000)  time: 2.9146  data: 2.5962  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 1.1912 (1.1948)  acc1: 80.8000 (80.7273)  acc5: 95.6000 (95.6727)  time: 0.5410  data: 0.2569  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.1912 (1.1932)  acc1: 79.6000 (80.6667)  acc5: 96.0000 (96.2286)  time: 0.2925  data: 0.0117  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.3209 (1.2978)  acc1: 76.4000 (77.9226)  acc5: 94.4000 (94.7097)  time: 0.2815  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5671 (1.3693)  acc1: 71.2000 (76.1171)  acc5: 90.4000 (93.6195)  time: 0.2823  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5644 (1.3820)  acc1: 72.8000 (75.6720)  acc5: 90.4000 (93.3840)  time: 0.2837  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3415 s / it)
* Acc@1 75.184 Acc@5 93.356 loss 1.391
Accuracy of the model on the 50000 test images: 75.2%
Max accuracy: 75.50%
Epoch: [71]  [   0/2502]  eta: 1:56:41  lr: 0.003681  min_lr: 0.003681  loss: 2.9102 (2.9102)  weight_decay: 0.0500 (0.0500)  time: 2.7985  data: 1.7331  max mem: 53905
Epoch: [71]  [ 200/2502]  eta: 0:23:10  lr: 0.003681  min_lr: 0.003681  loss: 3.8000 (3.5737)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5428 (0.5576)  time: 0.6033  data: 0.0004  max mem: 53905
Epoch: [71]  [ 400/2502]  eta: 0:20:55  lr: 0.003680  min_lr: 0.003680  loss: 3.6331 (3.6002)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5116 (0.5550)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [71]  [ 600/2502]  eta: 0:18:53  lr: 0.003679  min_lr: 0.003679  loss: 3.7885 (3.6052)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5550 (0.5546)  time: 0.5987  data: 0.0006  max mem: 53905
Epoch: [71]  [ 800/2502]  eta: 0:16:52  lr: 0.003678  min_lr: 0.003678  loss: 3.7253 (3.5960)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5440 (0.5538)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [71]  [1000/2502]  eta: 0:14:52  lr: 0.003677  min_lr: 0.003677  loss: 3.6858 (3.5902)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5097 (0.5586)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [71]  [1200/2502]  eta: 0:12:53  lr: 0.003676  min_lr: 0.003676  loss: 3.2786 (3.5913)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5239 (0.5566)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [71]  [1400/2502]  eta: 0:10:54  lr: 0.003675  min_lr: 0.003675  loss: 3.7417 (3.5960)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5341 (0.5540)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [71]  [1600/2502]  eta: 0:08:55  lr: 0.003674  min_lr: 0.003674  loss: 3.6866 (3.5981)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5279 (0.5590)  time: 0.5956  data: 0.0005  max mem: 53905
Epoch: [71]  [1800/2502]  eta: 0:06:56  lr: 0.003673  min_lr: 0.003673  loss: 3.7905 (3.6016)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5792 (0.5594)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [71]  [2000/2502]  eta: 0:04:57  lr: 0.003672  min_lr: 0.003672  loss: 3.7719 (3.6040)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5625 (0.5614)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [71]  [2200/2502]  eta: 0:02:59  lr: 0.003671  min_lr: 0.003671  loss: 3.8052 (3.6047)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5346 (0.5603)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [71]  [2400/2502]  eta: 0:01:00  lr: 0.003670  min_lr: 0.003670  loss: 3.7896 (3.6067)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5438 (0.5593)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [71]  [2501/2502]  eta: 0:00:00  lr: 0.003669  min_lr: 0.003669  loss: 3.7157 (3.6072)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6116 (0.5611)  time: 0.5329  data: 0.0007  max mem: 53905
Epoch: [71] Total time: 0:24:42 (0.5927 s / it)
Averaged stats: lr: 0.003669  min_lr: 0.003669  loss: 3.7157 (3.6164)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6116 (0.5611)
Test:  [ 0/50]  eta: 0:02:40  loss: 0.6821 (0.6821)  acc1: 90.4000 (90.4000)  acc5: 98.4000 (98.4000)  time: 3.2098  data: 2.8831  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.9703 (0.9744)  acc1: 81.6000 (81.1636)  acc5: 96.4000 (96.0000)  time: 0.5581  data: 0.2713  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9516 (0.9704)  acc1: 79.6000 (80.9333)  acc5: 96.4000 (96.2095)  time: 0.2883  data: 0.0053  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0973 (1.1013)  acc1: 76.0000 (78.0774)  acc5: 94.0000 (94.4000)  time: 0.2837  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3722 (1.1744)  acc1: 71.2000 (76.1171)  acc5: 91.2000 (93.5610)  time: 0.2833  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3583 (1.1853)  acc1: 70.4000 (75.5280)  acc5: 91.6000 (93.3680)  time: 0.2830  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3462 s / it)
* Acc@1 75.678 Acc@5 93.360 loss 1.184
Accuracy of the model on the 50000 test images: 75.7%
Max accuracy: 75.68%
Epoch: [72]  [   0/2502]  eta: 1:34:18  lr: 0.003669  min_lr: 0.003669  loss: 3.6219 (3.6219)  weight_decay: 0.0500 (0.0500)  time: 2.2615  data: 1.6656  max mem: 53905
Epoch: [72]  [ 200/2502]  eta: 0:23:04  lr: 0.003668  min_lr: 0.003668  loss: 3.5977 (3.6202)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5430 (0.5609)  time: 0.6080  data: 0.0004  max mem: 53905
Epoch: [72]  [ 400/2502]  eta: 0:20:55  lr: 0.003667  min_lr: 0.003667  loss: 3.5147 (3.6330)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5453 (0.5604)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [72]  [ 600/2502]  eta: 0:18:52  lr: 0.003666  min_lr: 0.003666  loss: 3.5339 (3.6406)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5538 (0.5673)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [72]  [ 800/2502]  eta: 0:16:52  lr: 0.003665  min_lr: 0.003665  loss: 3.6263 (3.6461)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5826 (0.5666)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [72]  [1000/2502]  eta: 0:14:52  lr: 0.003664  min_lr: 0.003664  loss: 3.5716 (3.6326)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5670 (0.5654)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [72]  [1200/2502]  eta: 0:12:53  lr: 0.003663  min_lr: 0.003663  loss: 3.7506 (3.6246)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5481 (0.5625)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [72]  [1400/2502]  eta: 0:10:54  lr: 0.003662  min_lr: 0.003662  loss: 3.8641 (3.6232)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5629 (0.5597)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [72]  [1600/2502]  eta: 0:08:55  lr: 0.003661  min_lr: 0.003661  loss: 3.8454 (3.6200)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4948 (0.5585)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [72]  [1800/2502]  eta: 0:06:56  lr: 0.003660  min_lr: 0.003660  loss: 3.8553 (3.6228)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5306 (0.5573)  time: 0.5905  data: 0.0005  max mem: 53905
Epoch: [72]  [2000/2502]  eta: 0:04:57  lr: 0.003659  min_lr: 0.003659  loss: 3.4624 (3.6119)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5080 (0.5564)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [72]  [2200/2502]  eta: 0:02:59  lr: 0.003658  min_lr: 0.003658  loss: 3.6617 (3.6146)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5444 (0.5571)  time: 0.5901  data: 0.0004  max mem: 53905
Epoch: [72]  [2400/2502]  eta: 0:01:00  lr: 0.003657  min_lr: 0.003657  loss: 3.7922 (3.6127)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5404 (0.5567)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [72]  [2501/2502]  eta: 0:00:00  lr: 0.003657  min_lr: 0.003657  loss: 3.6068 (3.6097)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5623 (0.5573)  time: 0.5323  data: 0.0009  max mem: 53905
Epoch: [72] Total time: 0:24:42 (0.5924 s / it)
Averaged stats: lr: 0.003657  min_lr: 0.003657  loss: 3.6068 (3.6091)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5623 (0.5573)
Test:  [ 0/50]  eta: 0:03:19  loss: 0.6997 (0.6997)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 3.9968  data: 3.6988  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.9947 (1.0153)  acc1: 82.0000 (81.7455)  acc5: 96.8000 (96.5091)  time: 0.6200  data: 0.3367  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0136 (1.0271)  acc1: 78.8000 (80.6667)  acc5: 96.8000 (96.4000)  time: 0.2824  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1873 (1.1475)  acc1: 75.2000 (77.9484)  acc5: 94.0000 (94.7226)  time: 0.2824  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4455 (1.2253)  acc1: 70.4000 (76.0585)  acc5: 90.0000 (93.6683)  time: 0.2821  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4392 (1.2437)  acc1: 71.2000 (75.5440)  acc5: 90.4000 (93.3440)  time: 0.2818  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3587 s / it)
* Acc@1 75.570 Acc@5 93.496 loss 1.247
Accuracy of the model on the 50000 test images: 75.6%
Max accuracy: 75.68%
Epoch: [73]  [   0/2502]  eta: 1:47:25  lr: 0.003657  min_lr: 0.003657  loss: 4.2251 (4.2251)  weight_decay: 0.0500 (0.0500)  time: 2.5761  data: 1.8163  max mem: 53905
Epoch: [73]  [ 200/2502]  eta: 0:23:05  lr: 0.003656  min_lr: 0.003656  loss: 3.7638 (3.6534)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5512 (0.5469)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [73]  [ 400/2502]  eta: 0:20:53  lr: 0.003655  min_lr: 0.003655  loss: 3.4445 (3.5783)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5916 (0.5578)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [73]  [ 600/2502]  eta: 0:18:52  lr: 0.003654  min_lr: 0.003654  loss: 3.7840 (3.5850)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5562 (0.5570)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [73]  [ 800/2502]  eta: 0:16:51  lr: 0.003653  min_lr: 0.003653  loss: 3.6401 (3.6037)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4957 (0.5548)  time: 0.5905  data: 0.0003  max mem: 53905
Epoch: [73]  [1000/2502]  eta: 0:14:51  lr: 0.003652  min_lr: 0.003652  loss: 3.3745 (3.6022)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5527 (0.5580)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [73]  [1200/2502]  eta: 0:12:52  lr: 0.003651  min_lr: 0.003651  loss: 3.5819 (3.6012)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5508 (0.5585)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [73]  [1400/2502]  eta: 0:10:53  lr: 0.003650  min_lr: 0.003650  loss: 3.5896 (3.5900)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5661 (0.5604)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [73]  [1600/2502]  eta: 0:08:54  lr: 0.003649  min_lr: 0.003649  loss: 3.6261 (3.5966)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5402 (0.5572)  time: 0.5901  data: 0.0004  max mem: 53905
Epoch: [73]  [1800/2502]  eta: 0:06:56  lr: 0.003648  min_lr: 0.003648  loss: 3.7277 (3.5964)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5635 (nan)  time: 0.5903  data: 0.0005  max mem: 53905
Epoch: [73]  [2000/2502]  eta: 0:04:57  lr: 0.003647  min_lr: 0.003647  loss: 3.6668 (3.5934)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5302 (nan)  time: 0.5905  data: 0.0005  max mem: 53905
Epoch: [73]  [2200/2502]  eta: 0:02:58  lr: 0.003646  min_lr: 0.003646  loss: 3.5529 (3.5946)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5678 (nan)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [73]  [2400/2502]  eta: 0:01:00  lr: 0.003645  min_lr: 0.003645  loss: 3.7793 (3.5918)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5331 (nan)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [73]  [2501/2502]  eta: 0:00:00  lr: 0.003644  min_lr: 0.003644  loss: 3.8000 (3.5924)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5244 (nan)  time: 0.5331  data: 0.0009  max mem: 53905
Epoch: [73] Total time: 0:24:42 (0.5924 s / it)
Averaged stats: lr: 0.003644  min_lr: 0.003644  loss: 3.8000 (3.5922)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5244 (nan)
Test:  [ 0/50]  eta: 0:02:49  loss: 0.7364 (0.7364)  acc1: 90.4000 (90.4000)  acc5: 99.2000 (99.2000)  time: 3.3820  data: 3.0644  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 1.0365 (1.0239)  acc1: 80.0000 (81.0909)  acc5: 96.8000 (96.2909)  time: 0.6079  data: 0.3222  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0388 (1.0411)  acc1: 79.6000 (80.4762)  acc5: 96.8000 (96.4000)  time: 0.3068  data: 0.0242  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2117 (1.1782)  acc1: 76.4000 (78.0000)  acc5: 93.6000 (94.6194)  time: 0.2847  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5026 (1.2623)  acc1: 71.2000 (75.9024)  acc5: 90.0000 (93.5317)  time: 0.2913  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4871 (1.2697)  acc1: 71.2000 (75.4000)  acc5: 90.4000 (93.4080)  time: 0.2910  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3605 s / it)
* Acc@1 75.442 Acc@5 93.336 loss 1.270
Accuracy of the model on the 50000 test images: 75.4%
Max accuracy: 75.68%
Epoch: [74]  [   0/2502]  eta: 1:54:59  lr: 0.003644  min_lr: 0.003644  loss: 3.6048 (3.6048)  weight_decay: 0.0500 (0.0500)  time: 2.7576  data: 2.1564  max mem: 53905
Epoch: [74]  [ 200/2502]  eta: 0:23:09  lr: 0.003643  min_lr: 0.003643  loss: 3.6980 (3.6021)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5347 (0.5747)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [74]  [ 400/2502]  eta: 0:20:57  lr: 0.003642  min_lr: 0.003642  loss: 3.5444 (3.5904)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5994 (0.5800)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [74]  [ 600/2502]  eta: 0:18:53  lr: 0.003641  min_lr: 0.003641  loss: 3.6357 (3.5955)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5521 (0.5766)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [74]  [ 800/2502]  eta: 0:16:53  lr: 0.003640  min_lr: 0.003640  loss: 3.3636 (3.5934)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5308 (0.5704)  time: 0.5912  data: 0.0006  max mem: 53905
Epoch: [74]  [1000/2502]  eta: 0:14:53  lr: 0.003639  min_lr: 0.003639  loss: 3.3991 (3.6055)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5313 (0.5651)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [74]  [1200/2502]  eta: 0:12:53  lr: 0.003638  min_lr: 0.003638  loss: 3.7480 (3.6054)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5765 (0.5687)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [74]  [1400/2502]  eta: 0:10:54  lr: 0.003637  min_lr: 0.003637  loss: 3.7837 (3.5959)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5275 (0.5663)  time: 0.5970  data: 0.0004  max mem: 53905
Epoch: [74]  [1600/2502]  eta: 0:08:55  lr: 0.003636  min_lr: 0.003636  loss: 3.3824 (3.6039)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5678 (0.5681)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [74]  [1800/2502]  eta: 0:06:56  lr: 0.003635  min_lr: 0.003635  loss: 3.7253 (3.6063)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5795 (0.5679)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [74]  [2000/2502]  eta: 0:04:57  lr: 0.003634  min_lr: 0.003634  loss: 3.7069 (3.6042)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5739 (0.5673)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [74]  [2200/2502]  eta: 0:02:59  lr: 0.003633  min_lr: 0.003633  loss: 3.8532 (3.6063)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5822 (0.5660)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [74]  [2400/2502]  eta: 0:01:00  lr: 0.003632  min_lr: 0.003632  loss: 3.4443 (3.6018)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5915 (0.5661)  time: 0.5985  data: 0.0005  max mem: 53905
Epoch: [74]  [2501/2502]  eta: 0:00:00  lr: 0.003631  min_lr: 0.003631  loss: 3.9614 (3.6040)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5185 (0.5648)  time: 0.5404  data: 0.0009  max mem: 53905
Epoch: [74] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.003631  min_lr: 0.003631  loss: 3.9614 (3.6038)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5185 (0.5648)
Test:  [ 0/50]  eta: 0:02:49  loss: 0.7086 (0.7086)  acc1: 92.0000 (92.0000)  acc5: 98.0000 (98.0000)  time: 3.3866  data: 3.0757  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.0599 (1.0836)  acc1: 83.2000 (81.8546)  acc5: 96.8000 (96.3273)  time: 0.5970  data: 0.3118  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0845 (1.0940)  acc1: 78.8000 (80.5905)  acc5: 96.4000 (96.1524)  time: 0.3007  data: 0.0180  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2305 (1.1996)  acc1: 76.0000 (78.4387)  acc5: 93.6000 (94.5936)  time: 0.2834  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4920 (1.2735)  acc1: 71.2000 (76.3902)  acc5: 90.8000 (93.7756)  time: 0.2831  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4697 (1.2802)  acc1: 71.2000 (75.8240)  acc5: 91.6000 (93.6080)  time: 0.2829  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3544 s / it)
* Acc@1 75.584 Acc@5 93.516 loss 1.285
Accuracy of the model on the 50000 test images: 75.6%
Max accuracy: 75.68%
Epoch: [75]  [   0/2502]  eta: 1:55:09  lr: 0.003631  min_lr: 0.003631  loss: 2.8898 (2.8898)  weight_decay: 0.0500 (0.0500)  time: 2.7618  data: 1.7639  max mem: 53905
Epoch: [75]  [ 200/2502]  eta: 0:23:04  lr: 0.003630  min_lr: 0.003630  loss: 3.9571 (3.6256)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5363 (0.5651)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [75]  [ 400/2502]  eta: 0:20:52  lr: 0.003629  min_lr: 0.003629  loss: 3.3097 (3.5830)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5689 (0.5564)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [75]  [ 600/2502]  eta: 0:18:52  lr: 0.003628  min_lr: 0.003628  loss: 3.5786 (3.6000)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5408 (0.5507)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [75]  [ 800/2502]  eta: 0:16:51  lr: 0.003627  min_lr: 0.003627  loss: 3.5252 (3.6139)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5416 (0.5578)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [75]  [1000/2502]  eta: 0:14:51  lr: 0.003626  min_lr: 0.003626  loss: 3.6938 (3.6152)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5244 (0.5607)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [75]  [1200/2502]  eta: 0:12:53  lr: 0.003625  min_lr: 0.003625  loss: 3.2748 (3.6119)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5567 (0.5613)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [75]  [1400/2502]  eta: 0:10:53  lr: 0.003624  min_lr: 0.003624  loss: 3.4973 (3.6076)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6124 (0.5633)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [75]  [1600/2502]  eta: 0:08:55  lr: 0.003623  min_lr: 0.003623  loss: 3.7803 (3.6024)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5482 (0.5603)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [75]  [1800/2502]  eta: 0:06:56  lr: 0.003622  min_lr: 0.003622  loss: 3.6533 (3.5953)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5028 (0.5592)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [75]  [2000/2502]  eta: 0:04:57  lr: 0.003621  min_lr: 0.003621  loss: 3.6418 (3.6022)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5498 (0.5589)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [75]  [2200/2502]  eta: 0:02:59  lr: 0.003620  min_lr: 0.003620  loss: 3.3230 (3.6003)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5846 (0.5610)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [75]  [2400/2502]  eta: 0:01:00  lr: 0.003619  min_lr: 0.003619  loss: 3.4573 (3.6022)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5316 (0.5598)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [75]  [2501/2502]  eta: 0:00:00  lr: 0.003618  min_lr: 0.003618  loss: 3.6660 (3.6017)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5791 (0.5606)  time: 0.5319  data: 0.0009  max mem: 53905
Epoch: [75] Total time: 0:24:42 (0.5925 s / it)
Averaged stats: lr: 0.003618  min_lr: 0.003618  loss: 3.6660 (3.5923)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5791 (0.5606)
Test:  [ 0/50]  eta: 0:03:20  loss: 0.6086 (0.6086)  acc1: 92.8000 (92.8000)  acc5: 97.6000 (97.6000)  time: 4.0127  data: 3.6997  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 1.0194 (1.0187)  acc1: 82.4000 (81.7818)  acc5: 96.8000 (96.1091)  time: 0.6211  data: 0.3367  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9869 (1.0259)  acc1: 80.4000 (80.9714)  acc5: 96.8000 (96.3238)  time: 0.2822  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.2407 (1.1424)  acc1: 76.4000 (78.4903)  acc5: 93.6000 (94.8258)  time: 0.2825  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4414 (1.2225)  acc1: 71.6000 (76.5561)  acc5: 90.8000 (93.7756)  time: 0.2823  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4505 (1.2424)  acc1: 71.6000 (76.0400)  acc5: 90.8000 (93.5520)  time: 0.2821  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3592 s / it)
* Acc@1 75.750 Acc@5 93.462 loss 1.254
Accuracy of the model on the 50000 test images: 75.8%
Max accuracy: 75.75%
Epoch: [76]  [   0/2502]  eta: 1:41:48  lr: 0.003618  min_lr: 0.003618  loss: 4.3305 (4.3305)  weight_decay: 0.0500 (0.0500)  time: 2.4413  data: 1.8457  max mem: 53905
Epoch: [76]  [ 200/2502]  eta: 0:23:07  lr: 0.003617  min_lr: 0.003617  loss: 3.6848 (3.5134)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5423 (0.5526)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [76]  [ 400/2502]  eta: 0:20:55  lr: 0.003616  min_lr: 0.003616  loss: 3.5007 (3.5030)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5192 (0.5566)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [76]  [ 600/2502]  eta: 0:18:53  lr: 0.003615  min_lr: 0.003615  loss: 3.6770 (3.5275)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5066 (0.5553)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [76]  [ 800/2502]  eta: 0:16:52  lr: 0.003614  min_lr: 0.003614  loss: 3.8076 (3.5408)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5321 (0.5534)  time: 0.5969  data: 0.0004  max mem: 53905
Epoch: [76]  [1000/2502]  eta: 0:14:52  lr: 0.003613  min_lr: 0.003613  loss: 3.3174 (3.5336)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5179 (0.5541)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [76]  [1200/2502]  eta: 0:12:53  lr: 0.003612  min_lr: 0.003612  loss: 3.5387 (3.5501)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5471 (0.5570)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [76]  [1400/2502]  eta: 0:10:54  lr: 0.003611  min_lr: 0.003611  loss: 3.5249 (3.5506)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5276 (0.5570)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [76]  [1600/2502]  eta: 0:08:55  lr: 0.003610  min_lr: 0.003610  loss: 3.7871 (3.5609)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5452 (0.5565)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [76]  [1800/2502]  eta: 0:06:56  lr: 0.003609  min_lr: 0.003609  loss: 3.8286 (3.5673)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5793 (0.5612)  time: 0.5969  data: 0.0005  max mem: 53905
Epoch: [76]  [2000/2502]  eta: 0:04:58  lr: 0.003608  min_lr: 0.003608  loss: 3.7339 (3.5780)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5815 (0.5609)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [76]  [2200/2502]  eta: 0:02:59  lr: 0.003606  min_lr: 0.003606  loss: 3.5219 (3.5801)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5274 (0.5616)  time: 0.5902  data: 0.0004  max mem: 53905
Epoch: [76]  [2400/2502]  eta: 0:01:00  lr: 0.003605  min_lr: 0.003605  loss: 3.4885 (3.5838)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5120 (0.5598)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [76]  [2501/2502]  eta: 0:00:00  lr: 0.003605  min_lr: 0.003605  loss: 3.6867 (3.5860)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5491 (0.5585)  time: 0.5322  data: 0.0006  max mem: 53905
Epoch: [76] Total time: 0:24:44 (0.5931 s / it)
Averaged stats: lr: 0.003605  min_lr: 0.003605  loss: 3.6867 (3.5897)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5491 (0.5585)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.7190 (0.7190)  acc1: 90.8000 (90.8000)  acc5: 98.0000 (98.0000)  time: 3.0435  data: 2.6990  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.9488 (0.9610)  acc1: 83.2000 (82.0000)  acc5: 96.4000 (96.3273)  time: 0.5397  data: 0.2519  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9580 (0.9864)  acc1: 81.2000 (80.9524)  acc5: 96.4000 (96.1714)  time: 0.2861  data: 0.0039  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0996 (1.1128)  acc1: 75.6000 (78.4129)  acc5: 92.8000 (94.5032)  time: 0.2830  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4350 (1.1954)  acc1: 70.8000 (76.4878)  acc5: 90.0000 (93.4927)  time: 0.2827  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4391 (1.2085)  acc1: 70.8000 (76.0400)  acc5: 91.2000 (93.3920)  time: 0.2823  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3431 s / it)
* Acc@1 75.834 Acc@5 93.266 loss 1.213
Accuracy of the model on the 50000 test images: 75.8%
Max accuracy: 75.83%
Epoch: [77]  [   0/2502]  eta: 1:41:21  lr: 0.003605  min_lr: 0.003605  loss: 2.9489 (2.9489)  weight_decay: 0.0500 (0.0500)  time: 2.4307  data: 1.8370  max mem: 53905
Epoch: [77]  [ 200/2502]  eta: 0:23:03  lr: 0.003604  min_lr: 0.003604  loss: 3.7154 (3.5533)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5641 (0.5785)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [77]  [ 400/2502]  eta: 0:20:54  lr: 0.003603  min_lr: 0.003603  loss: 3.7718 (3.5682)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5905  data: 0.0005  max mem: 53905
Epoch: [77]  [ 600/2502]  eta: 0:18:52  lr: 0.003602  min_lr: 0.003602  loss: 3.5806 (3.5612)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5526 (nan)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [77]  [ 800/2502]  eta: 0:16:52  lr: 0.003601  min_lr: 0.003601  loss: 3.5548 (3.5708)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5744 (nan)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [77]  [1000/2502]  eta: 0:14:52  lr: 0.003599  min_lr: 0.003599  loss: 3.8373 (3.5802)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5391 (nan)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [77]  [1200/2502]  eta: 0:12:53  lr: 0.003598  min_lr: 0.003598  loss: 3.8201 (3.5783)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5341 (nan)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [77]  [1400/2502]  eta: 0:10:54  lr: 0.003597  min_lr: 0.003597  loss: 3.5284 (3.5706)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5602 (nan)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [77]  [1600/2502]  eta: 0:08:55  lr: 0.003596  min_lr: 0.003596  loss: 3.6479 (3.5710)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5963 (nan)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [77]  [1800/2502]  eta: 0:06:56  lr: 0.003595  min_lr: 0.003595  loss: 3.7792 (3.5707)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5243 (nan)  time: 0.6052  data: 0.0004  max mem: 53905
Epoch: [77]  [2000/2502]  eta: 0:04:58  lr: 0.003594  min_lr: 0.003594  loss: 3.6676 (3.5774)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5173 (nan)  time: 0.5944  data: 0.0005  max mem: 53905
Epoch: [77]  [2200/2502]  eta: 0:02:59  lr: 0.003593  min_lr: 0.003593  loss: 3.6279 (3.5807)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6057 (nan)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [77]  [2400/2502]  eta: 0:01:00  lr: 0.003592  min_lr: 0.003592  loss: 3.7994 (3.5789)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5136 (nan)  time: 0.5937  data: 0.0005  max mem: 53905
Epoch: [77]  [2501/2502]  eta: 0:00:00  lr: 0.003591  min_lr: 0.003591  loss: 3.7131 (3.5752)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5765 (nan)  time: 0.5331  data: 0.0009  max mem: 53905
Epoch: [77] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.003591  min_lr: 0.003591  loss: 3.7131 (3.5833)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5765 (nan)
Test:  [ 0/50]  eta: 0:03:02  loss: 0.6182 (0.6182)  acc1: 90.4000 (90.4000)  acc5: 98.8000 (98.8000)  time: 3.6534  data: 3.3284  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8749 (0.9275)  acc1: 81.6000 (81.8545)  acc5: 96.4000 (96.5091)  time: 0.5930  data: 0.3058  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8829 (0.9336)  acc1: 80.0000 (80.6286)  acc5: 96.8000 (96.5714)  time: 0.2857  data: 0.0020  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0354 (1.0633)  acc1: 73.6000 (77.9484)  acc5: 94.0000 (94.7484)  time: 0.2844  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3781 (1.1360)  acc1: 71.2000 (76.2244)  acc5: 90.4000 (93.7659)  time: 0.2841  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3315 (1.1504)  acc1: 70.4000 (75.6480)  acc5: 91.2000 (93.5920)  time: 0.2838  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3544 s / it)
* Acc@1 75.932 Acc@5 93.648 loss 1.152
Accuracy of the model on the 50000 test images: 75.9%
Max accuracy: 75.93%
Epoch: [78]  [   0/2502]  eta: 1:40:47  lr: 0.003591  min_lr: 0.003591  loss: 3.6919 (3.6919)  weight_decay: 0.0500 (0.0500)  time: 2.4172  data: 1.8101  max mem: 53905
Epoch: [78]  [ 200/2502]  eta: 0:23:07  lr: 0.003590  min_lr: 0.003590  loss: 3.6711 (3.6003)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5960 (0.5800)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [78]  [ 400/2502]  eta: 0:20:56  lr: 0.003589  min_lr: 0.003589  loss: 3.7967 (3.5450)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5356 (0.5624)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [78]  [ 600/2502]  eta: 0:18:52  lr: 0.003588  min_lr: 0.003588  loss: 3.4718 (3.5644)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6089 (0.5653)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [78]  [ 800/2502]  eta: 0:16:53  lr: 0.003587  min_lr: 0.003587  loss: 3.5784 (3.5715)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5142 (0.5625)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [78]  [1000/2502]  eta: 0:14:52  lr: 0.003586  min_lr: 0.003586  loss: 3.4060 (3.5658)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5064 (0.5618)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [78]  [1200/2502]  eta: 0:12:53  lr: 0.003585  min_lr: 0.003585  loss: 3.6124 (3.5770)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6255 (0.5681)  time: 0.5959  data: 0.0004  max mem: 53905
Epoch: [78]  [1400/2502]  eta: 0:10:54  lr: 0.003584  min_lr: 0.003584  loss: 3.7260 (3.5802)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5431 (0.5656)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [78]  [1600/2502]  eta: 0:08:55  lr: 0.003583  min_lr: 0.003583  loss: 3.4976 (3.5757)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5229 (0.5633)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [78]  [1800/2502]  eta: 0:06:56  lr: 0.003582  min_lr: 0.003582  loss: 3.7965 (3.5767)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5441 (0.5608)  time: 0.5978  data: 0.0004  max mem: 53905
Epoch: [78]  [2000/2502]  eta: 0:04:57  lr: 0.003580  min_lr: 0.003580  loss: 3.7931 (3.5754)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5520 (0.5609)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [78]  [2200/2502]  eta: 0:02:59  lr: 0.003579  min_lr: 0.003579  loss: 3.7583 (3.5736)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5346 (0.5602)  time: 0.5939  data: 0.0005  max mem: 53905
Epoch: [78]  [2400/2502]  eta: 0:01:00  lr: 0.003578  min_lr: 0.003578  loss: 3.7727 (3.5763)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5858 (0.5622)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [78]  [2501/2502]  eta: 0:00:00  lr: 0.003578  min_lr: 0.003578  loss: 3.7239 (3.5783)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5203 (0.5620)  time: 0.5328  data: 0.0007  max mem: 53905
Epoch: [78] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.003578  min_lr: 0.003578  loss: 3.7239 (3.5815)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5203 (0.5620)
Test:  [ 0/50]  eta: 0:03:09  loss: 0.7464 (0.7464)  acc1: 90.4000 (90.4000)  acc5: 98.4000 (98.4000)  time: 3.7956  data: 3.4691  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.9897 (1.0408)  acc1: 82.0000 (81.0545)  acc5: 96.0000 (96.1818)  time: 0.6030  data: 0.3158  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9897 (1.0357)  acc1: 79.6000 (80.6095)  acc5: 96.4000 (96.3429)  time: 0.2841  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1625 (1.1415)  acc1: 76.4000 (78.6065)  acc5: 93.2000 (94.9419)  time: 0.2844  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4410 (1.2149)  acc1: 72.4000 (76.8000)  acc5: 91.2000 (93.9707)  time: 0.2841  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4049 (1.2314)  acc1: 72.0000 (76.1920)  acc5: 91.6000 (93.6800)  time: 0.2838  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3572 s / it)
* Acc@1 75.860 Acc@5 93.626 loss 1.235
Accuracy of the model on the 50000 test images: 75.9%
Max accuracy: 75.93%
Epoch: [79]  [   0/2502]  eta: 1:58:43  lr: 0.003578  min_lr: 0.003578  loss: 3.1275 (3.1275)  weight_decay: 0.0500 (0.0500)  time: 2.8471  data: 1.5063  max mem: 53905
Epoch: [79]  [ 200/2502]  eta: 0:23:06  lr: 0.003577  min_lr: 0.003577  loss: 3.4611 (3.5933)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5938 (0.5761)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [79]  [ 400/2502]  eta: 0:20:57  lr: 0.003575  min_lr: 0.003575  loss: 3.4435 (3.5792)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5438 (0.5652)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [79]  [ 600/2502]  eta: 0:18:54  lr: 0.003574  min_lr: 0.003574  loss: 3.6044 (3.5805)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5332 (0.5633)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [79]  [ 800/2502]  eta: 0:16:53  lr: 0.003573  min_lr: 0.003573  loss: 3.6224 (3.5863)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4912 (0.5571)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [79]  [1000/2502]  eta: 0:14:53  lr: 0.003572  min_lr: 0.003572  loss: 3.7065 (3.5973)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5398 (0.5552)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [79]  [1200/2502]  eta: 0:12:54  lr: 0.003571  min_lr: 0.003571  loss: 3.4970 (3.5867)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5283 (0.5560)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [79]  [1400/2502]  eta: 0:10:54  lr: 0.003570  min_lr: 0.003570  loss: 3.7973 (3.5836)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5336 (0.5563)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [79]  [1600/2502]  eta: 0:08:55  lr: 0.003569  min_lr: 0.003569  loss: 3.4965 (3.5721)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5696 (0.5588)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [79]  [1800/2502]  eta: 0:06:56  lr: 0.003568  min_lr: 0.003568  loss: 3.6602 (3.5741)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5853 (0.5620)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [79]  [2000/2502]  eta: 0:04:57  lr: 0.003567  min_lr: 0.003567  loss: 3.7990 (3.5814)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5462 (0.5630)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [79]  [2200/2502]  eta: 0:02:59  lr: 0.003565  min_lr: 0.003565  loss: 3.3902 (3.5725)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5756 (0.5647)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [79]  [2400/2502]  eta: 0:01:00  lr: 0.003564  min_lr: 0.003564  loss: 3.6472 (3.5747)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5261 (0.5643)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [79]  [2501/2502]  eta: 0:00:00  lr: 0.003564  min_lr: 0.003564  loss: 3.7169 (3.5734)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5377 (0.5635)  time: 0.5328  data: 0.0007  max mem: 53905
Epoch: [79] Total time: 0:24:43 (0.5929 s / it)
Averaged stats: lr: 0.003564  min_lr: 0.003564  loss: 3.7169 (3.5756)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5377 (0.5635)
Test:  [ 0/50]  eta: 0:03:17  loss: 0.6209 (0.6209)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 3.9537  data: 3.6314  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.9800 (0.9837)  acc1: 82.0000 (81.8182)  acc5: 96.4000 (96.1818)  time: 0.6180  data: 0.3306  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0033 (1.0087)  acc1: 78.8000 (80.6095)  acc5: 96.4000 (96.1143)  time: 0.2846  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.2004 (1.1410)  acc1: 75.2000 (78.3484)  acc5: 93.2000 (94.5807)  time: 0.2848  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4582 (1.2208)  acc1: 71.6000 (76.5366)  acc5: 90.8000 (93.6976)  time: 0.2846  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4257 (1.2348)  acc1: 71.6000 (76.0480)  acc5: 92.0000 (93.6000)  time: 0.2844  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3601 s / it)
* Acc@1 76.046 Acc@5 93.472 loss 1.240
Accuracy of the model on the 50000 test images: 76.0%
Max accuracy: 76.05%
Epoch: [80]  [   0/2502]  eta: 1:30:02  lr: 0.003564  min_lr: 0.003564  loss: 3.4559 (3.4559)  weight_decay: 0.0500 (0.0500)  time: 2.1593  data: 1.5629  max mem: 53905
Epoch: [80]  [ 200/2502]  eta: 0:23:06  lr: 0.003563  min_lr: 0.003563  loss: 3.3950 (3.5083)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5844 (0.5648)  time: 0.6010  data: 0.0005  max mem: 53905
Epoch: [80]  [ 400/2502]  eta: 0:20:55  lr: 0.003562  min_lr: 0.003562  loss: 3.7992 (3.5303)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5296 (0.5613)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [80]  [ 600/2502]  eta: 0:18:52  lr: 0.003560  min_lr: 0.003560  loss: 3.8424 (3.5397)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5287 (0.5561)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [80]  [ 800/2502]  eta: 0:16:53  lr: 0.003559  min_lr: 0.003559  loss: 3.6680 (3.5455)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5188 (0.5538)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [80]  [1000/2502]  eta: 0:14:53  lr: 0.003558  min_lr: 0.003558  loss: 3.5466 (3.5665)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5243 (0.5528)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [80]  [1200/2502]  eta: 0:12:54  lr: 0.003557  min_lr: 0.003557  loss: 3.6157 (3.5721)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5570 (0.5561)  time: 0.5984  data: 0.0005  max mem: 53905
Epoch: [80]  [1400/2502]  eta: 0:10:54  lr: 0.003556  min_lr: 0.003556  loss: 3.8223 (3.5758)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5817 (0.5591)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [80]  [1600/2502]  eta: 0:08:55  lr: 0.003555  min_lr: 0.003555  loss: 3.8723 (3.5770)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5786 (0.5606)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [80]  [1800/2502]  eta: 0:06:56  lr: 0.003554  min_lr: 0.003554  loss: 3.7893 (3.5789)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5391 (0.5600)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [80]  [2000/2502]  eta: 0:04:58  lr: 0.003553  min_lr: 0.003553  loss: 3.8144 (3.5772)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5636 (0.5603)  time: 0.5936  data: 0.0004  max mem: 53905
Epoch: [80]  [2200/2502]  eta: 0:02:59  lr: 0.003551  min_lr: 0.003551  loss: 3.2007 (3.5730)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5860 (0.5616)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [80]  [2400/2502]  eta: 0:01:00  lr: 0.003550  min_lr: 0.003550  loss: 3.6793 (3.5713)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5071 (0.5595)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [80]  [2501/2502]  eta: 0:00:00  lr: 0.003550  min_lr: 0.003550  loss: 3.3754 (3.5704)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5296 (0.5596)  time: 0.5336  data: 0.0008  max mem: 53905
Epoch: [80] Total time: 0:24:44 (0.5935 s / it)
Averaged stats: lr: 0.003550  min_lr: 0.003550  loss: 3.3754 (3.5714)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5296 (0.5596)
Test:  [ 0/50]  eta: 0:03:13  loss: 0.6386 (0.6386)  acc1: 89.6000 (89.6000)  acc5: 98.0000 (98.0000)  time: 3.8745  data: 3.5360  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8860 (0.9392)  acc1: 82.0000 (82.4727)  acc5: 96.4000 (96.1091)  time: 0.6112  data: 0.3219  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9016 (0.9607)  acc1: 81.2000 (81.7524)  acc5: 96.4000 (96.2476)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1163 (1.0768)  acc1: 76.8000 (78.8645)  acc5: 93.6000 (94.7226)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3730 (1.1514)  acc1: 71.6000 (76.9756)  acc5: 90.4000 (93.7659)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3730 (1.1622)  acc1: 70.8000 (76.4960)  acc5: 91.2000 (93.6400)  time: 0.2845  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3591 s / it)
* Acc@1 76.176 Acc@5 93.662 loss 1.169
Accuracy of the model on the 50000 test images: 76.2%
Max accuracy: 76.18%
Epoch: [81]  [   0/2502]  eta: 1:37:04  lr: 0.003550  min_lr: 0.003550  loss: 4.1308 (4.1308)  weight_decay: 0.0500 (0.0500)  time: 2.3281  data: 1.7221  max mem: 53905
Epoch: [81]  [ 200/2502]  eta: 0:23:05  lr: 0.003549  min_lr: 0.003549  loss: 3.7586 (3.5363)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5307 (0.5434)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [81]  [ 400/2502]  eta: 0:20:57  lr: 0.003547  min_lr: 0.003547  loss: 3.7238 (3.5241)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5841 (0.5538)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [81]  [ 600/2502]  eta: 0:18:54  lr: 0.003546  min_lr: 0.003546  loss: 3.5173 (3.5280)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5784 (0.5654)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [81]  [ 800/2502]  eta: 0:16:53  lr: 0.003545  min_lr: 0.003545  loss: 3.8657 (3.5269)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5478 (0.5635)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [81]  [1000/2502]  eta: 0:14:53  lr: 0.003544  min_lr: 0.003544  loss: 3.5722 (3.5267)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5121 (0.5635)  time: 0.5975  data: 0.0004  max mem: 53905
Epoch: [81]  [1200/2502]  eta: 0:12:54  lr: 0.003543  min_lr: 0.003543  loss: 3.6206 (3.5344)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5709 (0.5650)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [81]  [1400/2502]  eta: 0:10:54  lr: 0.003542  min_lr: 0.003542  loss: 3.6506 (3.5438)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5593 (0.5672)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [81]  [1600/2502]  eta: 0:08:55  lr: 0.003541  min_lr: 0.003541  loss: 3.5735 (3.5509)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5747 (0.5659)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [81]  [1800/2502]  eta: 0:06:56  lr: 0.003539  min_lr: 0.003539  loss: 3.6139 (3.5475)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5254 (0.5682)  time: 0.5965  data: 0.0004  max mem: 53905
Epoch: [81]  [2000/2502]  eta: 0:04:57  lr: 0.003538  min_lr: 0.003538  loss: 3.8712 (3.5513)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5426 (0.5691)  time: 0.5952  data: 0.0005  max mem: 53905
Epoch: [81]  [2200/2502]  eta: 0:02:59  lr: 0.003537  min_lr: 0.003537  loss: 3.5912 (3.5542)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5380 (0.5689)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [81]  [2400/2502]  eta: 0:01:00  lr: 0.003536  min_lr: 0.003536  loss: 3.8044 (3.5561)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5756 (0.5687)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [81]  [2501/2502]  eta: 0:00:00  lr: 0.003535  min_lr: 0.003535  loss: 3.7947 (3.5554)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5579 (0.5695)  time: 0.5317  data: 0.0008  max mem: 53905
Epoch: [81] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.003535  min_lr: 0.003535  loss: 3.7947 (3.5640)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5579 (0.5695)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.7415 (0.7415)  acc1: 93.2000 (93.2000)  acc5: 98.0000 (98.0000)  time: 3.0514  data: 2.7190  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.0598 (1.0975)  acc1: 84.0000 (82.0000)  acc5: 96.8000 (96.6909)  time: 0.5859  data: 0.2937  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.1089 (1.1227)  acc1: 80.4000 (81.2191)  acc5: 96.4000 (96.1905)  time: 0.3154  data: 0.0258  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2896 (1.2475)  acc1: 75.2000 (78.4000)  acc5: 93.2000 (94.6710)  time: 0.2943  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5441 (1.3232)  acc1: 71.2000 (76.4781)  acc5: 90.4000 (93.6293)  time: 0.2960  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5397 (1.3397)  acc1: 70.8000 (75.8800)  acc5: 90.4000 (93.4000)  time: 0.2913  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3592 s / it)
* Acc@1 75.612 Acc@5 93.362 loss 1.351
Accuracy of the model on the 50000 test images: 75.6%
Max accuracy: 76.18%
Epoch: [82]  [   0/2502]  eta: 1:57:38  lr: 0.003535  min_lr: 0.003535  loss: 3.8695 (3.8695)  weight_decay: 0.0500 (0.0500)  time: 2.8211  data: 1.6725  max mem: 53905
Epoch: [82]  [ 200/2502]  eta: 0:23:11  lr: 0.003534  min_lr: 0.003534  loss: 3.5455 (3.4870)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5733 (0.5617)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [82]  [ 400/2502]  eta: 0:20:57  lr: 0.003533  min_lr: 0.003533  loss: 3.5534 (3.5088)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5643 (0.5700)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [82]  [ 600/2502]  eta: 0:18:53  lr: 0.003532  min_lr: 0.003532  loss: 3.4772 (3.5336)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5439 (0.5721)  time: 0.5998  data: 0.0004  max mem: 53905
Epoch: [82]  [ 800/2502]  eta: 0:16:52  lr: 0.003531  min_lr: 0.003531  loss: 3.2259 (3.5373)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4952 (0.5701)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [82]  [1000/2502]  eta: 0:14:53  lr: 0.003530  min_lr: 0.003530  loss: 3.3895 (3.5368)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5278 (0.5669)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [82]  [1200/2502]  eta: 0:12:54  lr: 0.003528  min_lr: 0.003528  loss: 3.4783 (3.5441)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5688 (0.5668)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [82]  [1400/2502]  eta: 0:10:54  lr: 0.003527  min_lr: 0.003527  loss: 3.0912 (3.5444)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5020 (0.5644)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [82]  [1600/2502]  eta: 0:08:55  lr: 0.003526  min_lr: 0.003526  loss: 3.6184 (3.5476)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6180 (0.5695)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [82]  [1800/2502]  eta: 0:06:56  lr: 0.003525  min_lr: 0.003525  loss: 3.3751 (3.5547)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5426 (0.5680)  time: 0.5917  data: 0.0003  max mem: 53905
Epoch: [82]  [2000/2502]  eta: 0:04:58  lr: 0.003524  min_lr: 0.003524  loss: 3.6246 (3.5544)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5407 (0.5706)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [82]  [2200/2502]  eta: 0:02:59  lr: 0.003523  min_lr: 0.003523  loss: 3.0462 (3.5538)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5381 (0.5688)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [82]  [2400/2502]  eta: 0:01:00  lr: 0.003522  min_lr: 0.003522  loss: 3.7310 (3.5499)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5187 (0.5672)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [82]  [2501/2502]  eta: 0:00:00  lr: 0.003521  min_lr: 0.003521  loss: 3.7424 (3.5537)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5116 (0.5664)  time: 0.5330  data: 0.0007  max mem: 53905
Epoch: [82] Total time: 0:24:44 (0.5935 s / it)
Averaged stats: lr: 0.003521  min_lr: 0.003521  loss: 3.7424 (3.5601)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5116 (0.5664)
Test:  [ 0/50]  eta: 0:02:48  loss: 0.6026 (0.6026)  acc1: 91.6000 (91.6000)  acc5: 98.8000 (98.8000)  time: 3.3738  data: 3.0477  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.9828 (0.9496)  acc1: 80.4000 (81.3455)  acc5: 97.2000 (96.4364)  time: 0.5655  data: 0.2775  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9828 (0.9691)  acc1: 79.6000 (81.1429)  acc5: 96.8000 (96.3810)  time: 0.2849  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1163 (1.0861)  acc1: 77.2000 (78.9936)  acc5: 92.8000 (94.8000)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3936 (1.1647)  acc1: 72.4000 (77.0732)  acc5: 90.4000 (93.7366)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3629 (1.1911)  acc1: 71.6000 (76.4240)  acc5: 90.4000 (93.4160)  time: 0.2847  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3487 s / it)
* Acc@1 76.202 Acc@5 93.606 loss 1.193
Accuracy of the model on the 50000 test images: 76.2%
Max accuracy: 76.20%
Epoch: [83]  [   0/2502]  eta: 1:28:55  lr: 0.003521  min_lr: 0.003521  loss: 3.2937 (3.2937)  weight_decay: 0.0500 (0.0500)  time: 2.1324  data: 1.5443  max mem: 53905
Epoch: [83]  [ 200/2502]  eta: 0:23:01  lr: 0.003520  min_lr: 0.003520  loss: 3.4377 (3.5176)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5462 (0.5770)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [83]  [ 400/2502]  eta: 0:20:55  lr: 0.003519  min_lr: 0.003519  loss: 3.6954 (3.5366)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5671 (0.5692)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [83]  [ 600/2502]  eta: 0:18:54  lr: 0.003517  min_lr: 0.003517  loss: 3.6106 (3.5297)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5602 (0.5695)  time: 0.5937  data: 0.0004  max mem: 53905
Epoch: [83]  [ 800/2502]  eta: 0:16:53  lr: 0.003516  min_lr: 0.003516  loss: 3.4861 (3.5437)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5591 (0.5687)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [83]  [1000/2502]  eta: 0:14:53  lr: 0.003515  min_lr: 0.003515  loss: 3.6653 (3.5419)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5547 (nan)  time: 0.5934  data: 0.0005  max mem: 53905
Epoch: [83]  [1200/2502]  eta: 0:12:54  lr: 0.003514  min_lr: 0.003514  loss: 3.5433 (3.5442)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5321 (nan)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [83]  [1400/2502]  eta: 0:10:55  lr: 0.003513  min_lr: 0.003513  loss: 3.5125 (3.5437)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5223 (nan)  time: 0.5981  data: 0.0005  max mem: 53905
Epoch: [83]  [1600/2502]  eta: 0:08:56  lr: 0.003512  min_lr: 0.003512  loss: 3.5299 (3.5450)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5560 (nan)  time: 0.5978  data: 0.0005  max mem: 53905
Epoch: [83]  [1800/2502]  eta: 0:06:57  lr: 0.003510  min_lr: 0.003510  loss: 3.7255 (3.5475)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5435 (nan)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [83]  [2000/2502]  eta: 0:04:58  lr: 0.003509  min_lr: 0.003509  loss: 3.3738 (3.5446)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5413 (nan)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [83]  [2200/2502]  eta: 0:02:59  lr: 0.003508  min_lr: 0.003508  loss: 3.6444 (3.5462)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5374 (nan)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [83]  [2400/2502]  eta: 0:01:00  lr: 0.003507  min_lr: 0.003507  loss: 3.7990 (3.5475)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5804 (nan)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [83]  [2501/2502]  eta: 0:00:00  lr: 0.003506  min_lr: 0.003506  loss: 3.8053 (3.5466)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5775 (nan)  time: 0.5418  data: 0.0008  max mem: 53905
Epoch: [83] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.003506  min_lr: 0.003506  loss: 3.8053 (3.5608)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5775 (nan)
Test:  [ 0/50]  eta: 0:02:28  loss: 0.6208 (0.6208)  acc1: 91.2000 (91.2000)  acc5: 99.6000 (99.6000)  time: 2.9744  data: 2.6555  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.9525 (0.9990)  acc1: 81.6000 (81.6364)  acc5: 96.8000 (96.5091)  time: 0.5598  data: 0.2722  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.0264 (1.0296)  acc1: 80.4000 (80.9524)  acc5: 96.8000 (96.6095)  time: 0.3019  data: 0.0172  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2238 (1.1454)  acc1: 76.0000 (78.5677)  acc5: 94.0000 (95.0065)  time: 0.2855  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4231 (1.2163)  acc1: 70.8000 (76.5561)  acc5: 90.8000 (93.9610)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4190 (1.2272)  acc1: 71.6000 (76.0480)  acc5: 90.8000 (93.8320)  time: 0.2850  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3481 s / it)
* Acc@1 76.126 Acc@5 93.690 loss 1.230
Accuracy of the model on the 50000 test images: 76.1%
Max accuracy: 76.20%
Epoch: [84]  [   0/2502]  eta: 1:56:44  lr: 0.003506  min_lr: 0.003506  loss: 3.7236 (3.7236)  weight_decay: 0.0500 (0.0500)  time: 2.7995  data: 1.7582  max mem: 53905
Epoch: [84]  [ 200/2502]  eta: 0:23:15  lr: 0.003505  min_lr: 0.003505  loss: 3.3710 (3.5197)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5673 (0.5801)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [84]  [ 400/2502]  eta: 0:21:00  lr: 0.003504  min_lr: 0.003504  loss: 3.5536 (3.5435)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5229 (0.5643)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [84]  [ 600/2502]  eta: 0:18:56  lr: 0.003503  min_lr: 0.003503  loss: 3.7938 (3.5532)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5720 (0.5657)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [84]  [ 800/2502]  eta: 0:16:54  lr: 0.003502  min_lr: 0.003502  loss: 3.6107 (3.5485)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5548 (0.5682)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [84]  [1000/2502]  eta: 0:14:54  lr: 0.003500  min_lr: 0.003500  loss: 3.6893 (3.5487)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6113 (0.5717)  time: 0.5935  data: 0.0004  max mem: 53905
Epoch: [84]  [1200/2502]  eta: 0:12:54  lr: 0.003499  min_lr: 0.003499  loss: 3.6191 (3.5476)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5875 (0.5723)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [84]  [1400/2502]  eta: 0:10:55  lr: 0.003498  min_lr: 0.003498  loss: 3.5676 (3.5473)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5461 (0.5728)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [84]  [1600/2502]  eta: 0:08:56  lr: 0.003497  min_lr: 0.003497  loss: 3.7458 (3.5459)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5364 (0.5715)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [84]  [1800/2502]  eta: 0:06:57  lr: 0.003496  min_lr: 0.003496  loss: 3.7124 (3.5525)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6144 (0.5746)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [84]  [2000/2502]  eta: 0:04:58  lr: 0.003494  min_lr: 0.003494  loss: 3.7420 (3.5545)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5202 (0.5748)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [84]  [2200/2502]  eta: 0:02:59  lr: 0.003493  min_lr: 0.003493  loss: 3.6280 (3.5526)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5354 (0.5728)  time: 0.5975  data: 0.0005  max mem: 53905
Epoch: [84]  [2400/2502]  eta: 0:01:00  lr: 0.003492  min_lr: 0.003492  loss: 3.4458 (3.5504)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5613 (0.5727)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [84]  [2501/2502]  eta: 0:00:00  lr: 0.003491  min_lr: 0.003491  loss: 3.4449 (3.5475)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5770 (0.5729)  time: 0.5329  data: 0.0008  max mem: 53905
Epoch: [84] Total time: 0:24:47 (0.5946 s / it)
Averaged stats: lr: 0.003491  min_lr: 0.003491  loss: 3.4449 (3.5485)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5770 (0.5729)
Test:  [ 0/50]  eta: 0:03:21  loss: 0.6624 (0.6624)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 4.0290  data: 3.7031  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.9623 (0.9741)  acc1: 81.6000 (82.2909)  acc5: 96.8000 (96.4364)  time: 0.6243  data: 0.3370  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9902 (0.9931)  acc1: 79.2000 (81.0095)  acc5: 96.8000 (96.5143)  time: 0.2842  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1570 (1.1180)  acc1: 77.2000 (78.6452)  acc5: 94.0000 (94.8774)  time: 0.2847  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3900 (1.1839)  acc1: 73.2000 (77.0244)  acc5: 90.4000 (93.8342)  time: 0.2844  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3483 (1.2001)  acc1: 73.2000 (76.5680)  acc5: 90.8000 (93.5840)  time: 0.2841  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3615 s / it)
* Acc@1 76.458 Acc@5 93.702 loss 1.203
Accuracy of the model on the 50000 test images: 76.5%
Max accuracy: 76.46%
Epoch: [85]  [   0/2502]  eta: 1:36:52  lr: 0.003491  min_lr: 0.003491  loss: 3.5626 (3.5626)  weight_decay: 0.0500 (0.0500)  time: 2.3230  data: 1.7285  max mem: 53905
Epoch: [85]  [ 200/2502]  eta: 0:23:02  lr: 0.003490  min_lr: 0.003490  loss: 3.3614 (3.4998)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5860 (0.5797)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [85]  [ 400/2502]  eta: 0:20:56  lr: 0.003489  min_lr: 0.003489  loss: 3.4615 (3.5215)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5740 (0.5836)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [85]  [ 600/2502]  eta: 0:18:53  lr: 0.003488  min_lr: 0.003488  loss: 3.6031 (3.5319)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5793 (0.5746)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [85]  [ 800/2502]  eta: 0:16:53  lr: 0.003487  min_lr: 0.003487  loss: 3.7238 (3.5367)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5421 (0.5720)  time: 0.6027  data: 0.0004  max mem: 53905
Epoch: [85]  [1000/2502]  eta: 0:14:53  lr: 0.003485  min_lr: 0.003485  loss: 3.5418 (3.5467)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5654 (0.5721)  time: 0.5935  data: 0.0004  max mem: 53905
Epoch: [85]  [1200/2502]  eta: 0:12:54  lr: 0.003484  min_lr: 0.003484  loss: 3.5453 (3.5459)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5417 (0.5706)  time: 0.5936  data: 0.0005  max mem: 53905
Epoch: [85]  [1400/2502]  eta: 0:10:55  lr: 0.003483  min_lr: 0.003483  loss: 3.5132 (3.5529)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5436 (0.5700)  time: 0.6046  data: 0.0004  max mem: 53905
Epoch: [85]  [1600/2502]  eta: 0:08:56  lr: 0.003482  min_lr: 0.003482  loss: 3.6262 (3.5488)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5665 (0.5689)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [85]  [1800/2502]  eta: 0:06:57  lr: 0.003481  min_lr: 0.003481  loss: 3.8099 (3.5570)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5434 (0.5702)  time: 0.5937  data: 0.0005  max mem: 53905
Epoch: [85]  [2000/2502]  eta: 0:04:58  lr: 0.003479  min_lr: 0.003479  loss: 3.6645 (3.5637)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6000 (0.5751)  time: 0.5936  data: 0.0004  max mem: 53905
Epoch: [85]  [2200/2502]  eta: 0:02:59  lr: 0.003478  min_lr: 0.003478  loss: 3.8473 (3.5619)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5189 (0.5713)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [85]  [2400/2502]  eta: 0:01:00  lr: 0.003477  min_lr: 0.003477  loss: 3.8366 (3.5606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5350 (0.5685)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [85]  [2501/2502]  eta: 0:00:00  lr: 0.003476  min_lr: 0.003476  loss: 3.8203 (3.5633)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5963 (0.5703)  time: 0.5467  data: 0.0008  max mem: 53905
Epoch: [85] Total time: 0:24:46 (0.5939 s / it)
Averaged stats: lr: 0.003476  min_lr: 0.003476  loss: 3.8203 (3.5570)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5963 (0.5703)
Test:  [ 0/50]  eta: 0:02:43  loss: 0.7598 (0.7598)  acc1: 91.6000 (91.6000)  acc5: 99.2000 (99.2000)  time: 3.2646  data: 2.9379  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.0116 (1.0237)  acc1: 80.8000 (81.7091)  acc5: 96.4000 (96.2545)  time: 0.5559  data: 0.2675  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.0233 (1.0363)  acc1: 80.0000 (80.9524)  acc5: 96.4000 (96.5333)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1845 (1.1477)  acc1: 76.4000 (78.6839)  acc5: 94.4000 (95.0452)  time: 0.2854  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4618 (1.2339)  acc1: 72.4000 (76.5756)  acc5: 90.4000 (93.8342)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4058 (1.2404)  acc1: 70.8000 (76.2000)  acc5: 90.4000 (93.6400)  time: 0.2851  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3475 s / it)
* Acc@1 76.038 Acc@5 93.716 loss 1.239
Accuracy of the model on the 50000 test images: 76.0%
Max accuracy: 76.46%
Epoch: [86]  [   0/2502]  eta: 1:45:07  lr: 0.003476  min_lr: 0.003476  loss: 4.3061 (4.3061)  weight_decay: 0.0500 (0.0500)  time: 2.5211  data: 1.9188  max mem: 53905
Epoch: [86]  [ 200/2502]  eta: 0:23:09  lr: 0.003475  min_lr: 0.003475  loss: 3.8539 (3.5860)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5523 (0.5733)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [86]  [ 400/2502]  eta: 0:20:57  lr: 0.003474  min_lr: 0.003474  loss: 3.4755 (3.5761)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5711 (0.5717)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [86]  [ 600/2502]  eta: 0:18:55  lr: 0.003473  min_lr: 0.003473  loss: 3.8808 (3.5691)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6034 (0.5791)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [86]  [ 800/2502]  eta: 0:16:54  lr: 0.003472  min_lr: 0.003472  loss: 3.7306 (3.5625)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5726 (0.5802)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [86]  [1000/2502]  eta: 0:14:54  lr: 0.003470  min_lr: 0.003470  loss: 3.7655 (3.5667)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5766 (0.5786)  time: 0.5988  data: 0.0004  max mem: 53905
Epoch: [86]  [1200/2502]  eta: 0:12:54  lr: 0.003469  min_lr: 0.003469  loss: 3.7401 (3.5669)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5328 (0.5745)  time: 0.5989  data: 0.0005  max mem: 53905
Epoch: [86]  [1400/2502]  eta: 0:10:55  lr: 0.003468  min_lr: 0.003468  loss: 3.5507 (3.5576)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5294 (0.5725)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [86]  [1600/2502]  eta: 0:08:56  lr: 0.003467  min_lr: 0.003467  loss: 3.6956 (3.5516)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5358 (0.5731)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [86]  [1800/2502]  eta: 0:06:57  lr: 0.003465  min_lr: 0.003465  loss: 3.7476 (3.5483)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5000 (0.5709)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [86]  [2000/2502]  eta: 0:04:58  lr: 0.003464  min_lr: 0.003464  loss: 3.3473 (3.5474)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5972 (0.5725)  time: 0.5986  data: 0.0005  max mem: 53905
Epoch: [86]  [2200/2502]  eta: 0:02:59  lr: 0.003463  min_lr: 0.003463  loss: 3.6033 (3.5529)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5548 (0.5709)  time: 0.5991  data: 0.0004  max mem: 53905
Epoch: [86]  [2400/2502]  eta: 0:01:00  lr: 0.003462  min_lr: 0.003462  loss: 3.7069 (3.5538)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6005 (0.5744)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [86]  [2501/2502]  eta: 0:00:00  lr: 0.003461  min_lr: 0.003461  loss: 3.6839 (3.5545)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5334 (0.5737)  time: 0.5333  data: 0.0008  max mem: 53905
Epoch: [86] Total time: 0:24:45 (0.5939 s / it)
Averaged stats: lr: 0.003461  min_lr: 0.003461  loss: 3.6839 (3.5533)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5334 (0.5737)
Test:  [ 0/50]  eta: 0:02:31  loss: 0.6393 (0.6393)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 3.0315  data: 2.6876  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.9611 (0.9819)  acc1: 82.0000 (81.9636)  acc5: 96.4000 (96.2909)  time: 0.5457  data: 0.2563  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.0042 (1.0056)  acc1: 80.8000 (81.1429)  acc5: 96.8000 (96.4191)  time: 0.2910  data: 0.0069  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2086 (1.1210)  acc1: 76.8000 (78.8258)  acc5: 93.2000 (94.7871)  time: 0.2851  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4132 (1.1975)  acc1: 72.4000 (77.0537)  acc5: 90.8000 (93.9024)  time: 0.2848  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3990 (1.2106)  acc1: 72.4000 (76.4960)  acc5: 92.0000 (93.7680)  time: 0.2845  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3444 s / it)
* Acc@1 76.192 Acc@5 93.650 loss 1.214
Accuracy of the model on the 50000 test images: 76.2%
Max accuracy: 76.46%
Epoch: [87]  [   0/2502]  eta: 1:43:25  lr: 0.003461  min_lr: 0.003461  loss: 3.9266 (3.9266)  weight_decay: 0.0500 (0.0500)  time: 2.4804  data: 1.5228  max mem: 53905
Epoch: [87]  [ 200/2502]  eta: 0:23:06  lr: 0.003460  min_lr: 0.003460  loss: 3.3199 (3.5060)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5483 (0.5788)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [87]  [ 400/2502]  eta: 0:20:58  lr: 0.003459  min_lr: 0.003459  loss: 3.7455 (3.5059)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5664 (0.5736)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [87]  [ 600/2502]  eta: 0:18:54  lr: 0.003457  min_lr: 0.003457  loss: 3.7711 (3.5081)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5367 (0.5683)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [87]  [ 800/2502]  eta: 0:16:54  lr: 0.003456  min_lr: 0.003456  loss: 3.4094 (3.5075)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5116 (0.5629)  time: 0.5973  data: 0.0005  max mem: 53905
Epoch: [87]  [1000/2502]  eta: 0:14:54  lr: 0.003455  min_lr: 0.003455  loss: 3.4632 (3.5073)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5725 (0.5631)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [87]  [1200/2502]  eta: 0:12:54  lr: 0.003454  min_lr: 0.003454  loss: 3.4940 (3.5171)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5720 (0.5661)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [87]  [1400/2502]  eta: 0:10:55  lr: 0.003453  min_lr: 0.003453  loss: 3.3826 (3.5187)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5486 (0.5652)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [87]  [1600/2502]  eta: 0:08:56  lr: 0.003451  min_lr: 0.003451  loss: 3.7821 (3.5229)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5875 (nan)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [87]  [1800/2502]  eta: 0:06:57  lr: 0.003450  min_lr: 0.003450  loss: 3.5186 (3.5268)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5835 (nan)  time: 0.5934  data: 0.0005  max mem: 53905
Epoch: [87]  [2000/2502]  eta: 0:04:58  lr: 0.003449  min_lr: 0.003449  loss: 3.7269 (3.5290)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5556 (nan)  time: 0.5933  data: 0.0004  max mem: 53905
Epoch: [87]  [2200/2502]  eta: 0:02:59  lr: 0.003448  min_lr: 0.003448  loss: 3.6089 (3.5300)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4857 (nan)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [87]  [2400/2502]  eta: 0:01:00  lr: 0.003446  min_lr: 0.003446  loss: 3.4481 (3.5297)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6172 (nan)  time: 0.5941  data: 0.0005  max mem: 53905
Epoch: [87]  [2501/2502]  eta: 0:00:00  lr: 0.003446  min_lr: 0.003446  loss: 3.8209 (3.5325)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5617 (nan)  time: 0.5349  data: 0.0009  max mem: 53905
Epoch: [87] Total time: 0:24:46 (0.5942 s / it)
Averaged stats: lr: 0.003446  min_lr: 0.003446  loss: 3.8209 (3.5451)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5617 (nan)
Test:  [ 0/50]  eta: 0:02:54  loss: 0.8081 (0.8081)  acc1: 92.0000 (92.0000)  acc5: 98.8000 (98.8000)  time: 3.4882  data: 3.1629  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 1.0037 (1.1034)  acc1: 79.6000 (81.4182)  acc5: 96.8000 (96.5455)  time: 0.5845  data: 0.2966  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.1158 (1.1362)  acc1: 79.2000 (80.5524)  acc5: 96.8000 (96.4191)  time: 0.2897  data: 0.0052  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2323 (1.2444)  acc1: 76.4000 (78.6710)  acc5: 94.0000 (94.9419)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.5763 (1.3318)  acc1: 72.0000 (76.7707)  acc5: 90.8000 (93.8732)  time: 0.2851  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.5695 (1.3459)  acc1: 70.8000 (76.1840)  acc5: 91.2000 (93.7200)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3533 s / it)
* Acc@1 76.078 Acc@5 93.682 loss 1.344
Accuracy of the model on the 50000 test images: 76.1%
Max accuracy: 76.46%
Epoch: [88]  [   0/2502]  eta: 1:47:18  lr: 0.003446  min_lr: 0.003446  loss: 3.0769 (3.0769)  weight_decay: 0.0500 (0.0500)  time: 2.5734  data: 1.9609  max mem: 53905
Epoch: [88]  [ 200/2502]  eta: 0:23:07  lr: 0.003444  min_lr: 0.003444  loss: 3.8583 (3.5371)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5727 (0.6083)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [88]  [ 400/2502]  eta: 0:20:58  lr: 0.003443  min_lr: 0.003443  loss: 3.6433 (3.5410)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5171 (0.5710)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [88]  [ 600/2502]  eta: 0:18:56  lr: 0.003442  min_lr: 0.003442  loss: 3.6131 (3.5441)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5502 (0.5744)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [88]  [ 800/2502]  eta: 0:16:54  lr: 0.003441  min_lr: 0.003441  loss: 3.8318 (3.5360)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5182 (0.5676)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [88]  [1000/2502]  eta: 0:14:54  lr: 0.003440  min_lr: 0.003440  loss: 3.8030 (3.5421)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5391 (0.5711)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [88]  [1200/2502]  eta: 0:12:55  lr: 0.003438  min_lr: 0.003438  loss: 3.6409 (3.5429)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5362 (0.5720)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [88]  [1400/2502]  eta: 0:10:55  lr: 0.003437  min_lr: 0.003437  loss: 3.1331 (3.5371)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5775 (0.5723)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [88]  [1600/2502]  eta: 0:08:56  lr: 0.003436  min_lr: 0.003436  loss: 3.6050 (3.5317)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5370 (0.5686)  time: 0.5954  data: 0.0006  max mem: 53905
Epoch: [88]  [1800/2502]  eta: 0:06:57  lr: 0.003435  min_lr: 0.003435  loss: 3.5811 (3.5323)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5384 (0.5679)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [88]  [2000/2502]  eta: 0:04:58  lr: 0.003433  min_lr: 0.003433  loss: 3.6021 (3.5341)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5848 (0.5702)  time: 0.5999  data: 0.0005  max mem: 53905
Epoch: [88]  [2200/2502]  eta: 0:02:59  lr: 0.003432  min_lr: 0.003432  loss: 3.5653 (3.5380)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5408 (0.5686)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [88]  [2400/2502]  eta: 0:01:00  lr: 0.003431  min_lr: 0.003431  loss: 3.5681 (3.5362)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5455 (0.5684)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [88]  [2501/2502]  eta: 0:00:00  lr: 0.003430  min_lr: 0.003430  loss: 3.5899 (3.5359)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6030 (0.5701)  time: 0.5352  data: 0.0008  max mem: 53905
Epoch: [88] Total time: 0:24:46 (0.5941 s / it)
Averaged stats: lr: 0.003430  min_lr: 0.003430  loss: 3.5899 (3.5350)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6030 (0.5701)
Test:  [ 0/50]  eta: 0:02:36  loss: 0.6216 (0.6216)  acc1: 90.0000 (90.0000)  acc5: 98.8000 (98.8000)  time: 3.1294  data: 2.7883  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.9109 (0.9452)  acc1: 82.4000 (82.7636)  acc5: 96.4000 (96.6545)  time: 0.6172  data: 0.3273  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9427 (0.9654)  acc1: 80.8000 (81.5619)  acc5: 96.4000 (96.6857)  time: 0.3254  data: 0.0408  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1436 (1.0829)  acc1: 76.4000 (79.2516)  acc5: 93.6000 (95.1742)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3757 (1.1617)  acc1: 72.8000 (77.2683)  acc5: 90.8000 (94.1366)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3731 (1.1761)  acc1: 71.2000 (76.6960)  acc5: 91.2000 (93.9840)  time: 0.2845  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3608 s / it)
* Acc@1 76.432 Acc@5 93.834 loss 1.188
Accuracy of the model on the 50000 test images: 76.4%
Max accuracy: 76.46%
Epoch: [89]  [   0/2502]  eta: 2:00:29  lr: 0.003430  min_lr: 0.003430  loss: 4.2154 (4.2154)  weight_decay: 0.0500 (0.0500)  time: 2.8896  data: 1.5851  max mem: 53905
Epoch: [89]  [ 200/2502]  eta: 0:23:10  lr: 0.003429  min_lr: 0.003429  loss: 3.6537 (3.5293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6039 (0.5923)  time: 0.5981  data: 0.0004  max mem: 53905
Epoch: [89]  [ 400/2502]  eta: 0:20:57  lr: 0.003428  min_lr: 0.003428  loss: 3.7146 (3.5315)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5218 (0.5763)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [89]  [ 600/2502]  eta: 0:18:54  lr: 0.003426  min_lr: 0.003426  loss: 3.6588 (3.5378)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5436 (0.5745)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [89]  [ 800/2502]  eta: 0:16:53  lr: 0.003425  min_lr: 0.003425  loss: 3.5679 (3.5322)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5266 (0.5654)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [89]  [1000/2502]  eta: 0:14:53  lr: 0.003424  min_lr: 0.003424  loss: 3.5641 (3.5294)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5419 (0.5665)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [89]  [1200/2502]  eta: 0:12:54  lr: 0.003423  min_lr: 0.003423  loss: 3.7315 (3.5341)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6182 (nan)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [89]  [1400/2502]  eta: 0:10:55  lr: 0.003421  min_lr: 0.003421  loss: 3.7088 (3.5318)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5797 (nan)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [89]  [1600/2502]  eta: 0:08:56  lr: 0.003420  min_lr: 0.003420  loss: 3.3930 (3.5276)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5288 (nan)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [89]  [1800/2502]  eta: 0:06:57  lr: 0.003419  min_lr: 0.003419  loss: 3.7360 (3.5266)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5537 (nan)  time: 0.5938  data: 0.0005  max mem: 53905
Epoch: [89]  [2000/2502]  eta: 0:04:58  lr: 0.003418  min_lr: 0.003418  loss: 3.5328 (3.5268)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5674 (nan)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [89]  [2200/2502]  eta: 0:02:59  lr: 0.003416  min_lr: 0.003416  loss: 3.5614 (3.5258)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5393 (nan)  time: 0.5930  data: 0.0006  max mem: 53905
Epoch: [89]  [2400/2502]  eta: 0:01:00  lr: 0.003415  min_lr: 0.003415  loss: 3.5795 (3.5263)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5784 (nan)  time: 0.5971  data: 0.0004  max mem: 53905
Epoch: [89]  [2501/2502]  eta: 0:00:00  lr: 0.003414  min_lr: 0.003414  loss: 3.6617 (3.5284)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5653 (nan)  time: 0.5336  data: 0.0011  max mem: 53905
Epoch: [89] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.003414  min_lr: 0.003414  loss: 3.6617 (3.5341)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5653 (nan)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.7089 (0.7089)  acc1: 91.6000 (91.6000)  acc5: 98.8000 (98.8000)  time: 3.0047  data: 2.6782  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 1.0118 (1.0436)  acc1: 82.8000 (81.9636)  acc5: 96.4000 (96.0727)  time: 0.5322  data: 0.2440  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.0513 (1.0653)  acc1: 81.2000 (81.0667)  acc5: 96.8000 (96.3429)  time: 0.2851  data: 0.0006  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2999 (1.1717)  acc1: 77.2000 (78.8516)  acc5: 93.2000 (94.9161)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4583 (1.2494)  acc1: 72.0000 (76.8098)  acc5: 91.2000 (93.9512)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4487 (1.2654)  acc1: 71.6000 (76.3120)  acc5: 91.2000 (93.8080)  time: 0.2847  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3421 s / it)
* Acc@1 76.270 Acc@5 93.798 loss 1.265
Accuracy of the model on the 50000 test images: 76.3%
Max accuracy: 76.46%
Epoch: [90]  [   0/2502]  eta: 1:45:15  lr: 0.003414  min_lr: 0.003414  loss: 4.0668 (4.0668)  weight_decay: 0.0500 (0.0500)  time: 2.5243  data: 1.9179  max mem: 53905
Epoch: [90]  [ 200/2502]  eta: 0:23:10  lr: 0.003413  min_lr: 0.003413  loss: 3.6320 (3.5406)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5432 (0.5693)  time: 0.6007  data: 0.0004  max mem: 53905
Epoch: [90]  [ 400/2502]  eta: 0:20:57  lr: 0.003412  min_lr: 0.003412  loss: 3.6379 (3.5404)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5402 (0.5652)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [90]  [ 600/2502]  eta: 0:18:55  lr: 0.003411  min_lr: 0.003411  loss: 3.6381 (3.5521)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5554 (0.5777)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [90]  [ 800/2502]  eta: 0:16:53  lr: 0.003409  min_lr: 0.003409  loss: 3.6214 (3.5409)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5118 (0.5689)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [90]  [1000/2502]  eta: 0:14:53  lr: 0.003408  min_lr: 0.003408  loss: 3.8624 (3.5464)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5906 (0.5769)  time: 0.5979  data: 0.0004  max mem: 53905
Epoch: [90]  [1200/2502]  eta: 0:12:54  lr: 0.003407  min_lr: 0.003407  loss: 3.7634 (3.5542)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5681 (0.5760)  time: 0.5962  data: 0.0005  max mem: 53905
Epoch: [90]  [1400/2502]  eta: 0:10:55  lr: 0.003405  min_lr: 0.003405  loss: 3.7254 (3.5528)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6145 (0.5800)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [90]  [1600/2502]  eta: 0:08:56  lr: 0.003404  min_lr: 0.003404  loss: 3.6579 (3.5523)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5480 (0.5770)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [90]  [1800/2502]  eta: 0:06:57  lr: 0.003403  min_lr: 0.003403  loss: 3.7138 (3.5446)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5324 (0.5754)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [90]  [2000/2502]  eta: 0:04:58  lr: 0.003402  min_lr: 0.003402  loss: 3.6718 (3.5475)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5567 (0.5763)  time: 0.5934  data: 0.0004  max mem: 53905
Epoch: [90]  [2200/2502]  eta: 0:02:59  lr: 0.003400  min_lr: 0.003400  loss: 3.7916 (3.5531)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5830 (0.5763)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [90]  [2400/2502]  eta: 0:01:00  lr: 0.003399  min_lr: 0.003399  loss: 3.5304 (3.5503)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6119 (0.5774)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [90]  [2501/2502]  eta: 0:00:00  lr: 0.003398  min_lr: 0.003398  loss: 3.3660 (3.5488)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5445 (0.5774)  time: 0.5328  data: 0.0009  max mem: 53905
Epoch: [90] Total time: 0:24:46 (0.5943 s / it)
Averaged stats: lr: 0.003398  min_lr: 0.003398  loss: 3.3660 (3.5383)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5445 (0.5774)
Test:  [ 0/50]  eta: 0:03:10  loss: 0.6274 (0.6274)  acc1: 92.4000 (92.4000)  acc5: 99.2000 (99.2000)  time: 3.8055  data: 3.4963  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 0.9432 (0.9560)  acc1: 81.6000 (82.1091)  acc5: 96.4000 (96.4727)  time: 0.6355  data: 0.3419  max mem: 53905
Test:  [20/50]  eta: 0:00:14  loss: 0.9380 (0.9480)  acc1: 80.4000 (81.6571)  acc5: 96.4000 (96.4000)  time: 0.3012  data: 0.0135  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1030 (1.0725)  acc1: 78.4000 (79.2774)  acc5: 93.2000 (94.7871)  time: 0.2837  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3979 (1.1499)  acc1: 72.0000 (77.2195)  acc5: 90.4000 (93.8049)  time: 0.2834  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3575 (1.1668)  acc1: 71.6000 (76.6240)  acc5: 90.8000 (93.7680)  time: 0.2833  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3639 s / it)
* Acc@1 76.458 Acc@5 93.806 loss 1.169
Accuracy of the model on the 50000 test images: 76.5%
Max accuracy: 76.46%
Epoch: [91]  [   0/2502]  eta: 1:51:30  lr: 0.003398  min_lr: 0.003398  loss: 3.6001 (3.6001)  weight_decay: 0.0500 (0.0500)  time: 2.6741  data: 2.0800  max mem: 53905
Epoch: [91]  [ 200/2502]  eta: 0:23:12  lr: 0.003397  min_lr: 0.003397  loss: 3.4209 (3.5284)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5928 (0.5757)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [91]  [ 400/2502]  eta: 0:21:01  lr: 0.003396  min_lr: 0.003396  loss: 3.6375 (3.5329)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5724 (0.5895)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [91]  [ 600/2502]  eta: 0:18:56  lr: 0.003395  min_lr: 0.003395  loss: 3.6200 (3.5333)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5984 (0.5841)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [91]  [ 800/2502]  eta: 0:16:55  lr: 0.003393  min_lr: 0.003393  loss: 3.6302 (3.5236)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5379 (0.5783)  time: 0.5935  data: 0.0005  max mem: 53905
Epoch: [91]  [1000/2502]  eta: 0:14:55  lr: 0.003392  min_lr: 0.003392  loss: 3.3815 (3.5219)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5843 (0.5835)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [91]  [1200/2502]  eta: 0:12:55  lr: 0.003391  min_lr: 0.003391  loss: 3.4700 (3.5214)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5797 (0.5839)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [91]  [1400/2502]  eta: 0:10:56  lr: 0.003389  min_lr: 0.003389  loss: 3.4596 (3.5194)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5227 (0.5793)  time: 0.5971  data: 0.0005  max mem: 53905
Epoch: [91]  [1600/2502]  eta: 0:08:56  lr: 0.003388  min_lr: 0.003388  loss: 3.4724 (3.5224)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6085 (0.5809)  time: 0.5931  data: 0.0006  max mem: 53905
Epoch: [91]  [1800/2502]  eta: 0:06:57  lr: 0.003387  min_lr: 0.003387  loss: 3.4918 (3.5180)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5818 (0.5801)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [91]  [2000/2502]  eta: 0:04:58  lr: 0.003386  min_lr: 0.003386  loss: 3.7858 (3.5254)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5821 (0.5820)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [91]  [2200/2502]  eta: 0:02:59  lr: 0.003384  min_lr: 0.003384  loss: 3.8114 (3.5324)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5313 (0.5809)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [91]  [2400/2502]  eta: 0:01:00  lr: 0.003383  min_lr: 0.003383  loss: 3.6594 (3.5283)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5718 (0.5794)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [91]  [2501/2502]  eta: 0:00:00  lr: 0.003382  min_lr: 0.003382  loss: 3.6229 (3.5275)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5402 (0.5795)  time: 0.5331  data: 0.0009  max mem: 53905
Epoch: [91] Total time: 0:24:46 (0.5943 s / it)
Averaged stats: lr: 0.003382  min_lr: 0.003382  loss: 3.6229 (3.5221)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5402 (0.5795)
Test:  [ 0/50]  eta: 0:02:50  loss: 0.7070 (0.7070)  acc1: 93.6000 (93.6000)  acc5: 98.0000 (98.0000)  time: 3.4096  data: 3.0864  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8872 (0.9734)  acc1: 82.4000 (82.6909)  acc5: 96.4000 (96.2909)  time: 0.5690  data: 0.2811  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0035 (0.9887)  acc1: 80.4000 (81.6000)  acc5: 96.4000 (96.5333)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1650 (1.1073)  acc1: 77.2000 (79.2516)  acc5: 94.4000 (94.9032)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4018 (1.1777)  acc1: 72.0000 (77.3171)  acc5: 91.2000 (93.9902)  time: 0.2851  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4082 (1.1976)  acc1: 70.8000 (76.7040)  acc5: 91.6000 (93.7600)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3500 s / it)
* Acc@1 76.708 Acc@5 93.828 loss 1.199
Accuracy of the model on the 50000 test images: 76.7%
Max accuracy: 76.71%
Epoch: [92]  [   0/2502]  eta: 1:48:21  lr: 0.003382  min_lr: 0.003382  loss: 4.1836 (4.1836)  weight_decay: 0.0500 (0.0500)  time: 2.5984  data: 2.0075  max mem: 53905
Epoch: [92]  [ 200/2502]  eta: 0:23:09  lr: 0.003381  min_lr: 0.003381  loss: 3.6765 (3.5886)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5690 (0.5777)  time: 0.5958  data: 0.0005  max mem: 53905
Epoch: [92]  [ 400/2502]  eta: 0:20:59  lr: 0.003380  min_lr: 0.003380  loss: 3.7276 (3.5469)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5274 (0.5754)  time: 0.5994  data: 0.0005  max mem: 53905
Epoch: [92]  [ 600/2502]  eta: 0:18:56  lr: 0.003378  min_lr: 0.003378  loss: 3.7294 (3.5347)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5192 (0.5702)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [92]  [ 800/2502]  eta: 0:16:54  lr: 0.003377  min_lr: 0.003377  loss: 3.6193 (3.5373)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5690 (0.5717)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [92]  [1000/2502]  eta: 0:14:54  lr: 0.003376  min_lr: 0.003376  loss: 3.5804 (3.5263)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6008 (0.5734)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [92]  [1200/2502]  eta: 0:12:54  lr: 0.003374  min_lr: 0.003374  loss: 3.5017 (3.5094)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5417 (0.5727)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [92]  [1400/2502]  eta: 0:10:55  lr: 0.003373  min_lr: 0.003373  loss: 3.5368 (3.5164)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5579 (0.5736)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [92]  [1600/2502]  eta: 0:08:56  lr: 0.003372  min_lr: 0.003372  loss: 3.6942 (3.5174)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5449 (0.5727)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [92]  [1800/2502]  eta: 0:06:57  lr: 0.003371  min_lr: 0.003371  loss: 3.5843 (3.5177)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5077 (0.5757)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [92]  [2000/2502]  eta: 0:04:58  lr: 0.003369  min_lr: 0.003369  loss: 3.8082 (3.5111)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5612 (0.5742)  time: 0.5932  data: 0.0006  max mem: 53905
Epoch: [92]  [2200/2502]  eta: 0:02:59  lr: 0.003368  min_lr: 0.003368  loss: 3.4593 (3.5146)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5712 (0.5768)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [92]  [2400/2502]  eta: 0:01:00  lr: 0.003367  min_lr: 0.003367  loss: 3.3266 (3.5144)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5554 (0.5756)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [92]  [2501/2502]  eta: 0:00:00  lr: 0.003366  min_lr: 0.003366  loss: 3.7510 (3.5158)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5915 (0.5759)  time: 0.5343  data: 0.0007  max mem: 53905
Epoch: [92] Total time: 0:24:45 (0.5939 s / it)
Averaged stats: lr: 0.003366  min_lr: 0.003366  loss: 3.7510 (3.5220)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5915 (0.5759)
Test:  [ 0/50]  eta: 0:02:33  loss: 0.6107 (0.6107)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 3.0693  data: 2.7455  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.8973 (0.9863)  acc1: 80.0000 (81.6727)  acc5: 96.8000 (96.4727)  time: 0.5384  data: 0.2501  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9590 (0.9949)  acc1: 80.0000 (81.0286)  acc5: 96.8000 (96.4952)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1006 (1.1129)  acc1: 76.8000 (78.9936)  acc5: 93.6000 (94.8000)  time: 0.2854  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4273 (1.1906)  acc1: 72.4000 (76.9561)  acc5: 90.4000 (93.7854)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3974 (1.2049)  acc1: 72.0000 (76.3680)  acc5: 90.8000 (93.6560)  time: 0.2849  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3436 s / it)
* Acc@1 76.430 Acc@5 93.732 loss 1.207
Accuracy of the model on the 50000 test images: 76.4%
Max accuracy: 76.71%
Epoch: [93]  [   0/2502]  eta: 1:57:41  lr: 0.003366  min_lr: 0.003366  loss: 4.2961 (4.2961)  weight_decay: 0.0500 (0.0500)  time: 2.8222  data: 1.9493  max mem: 53905
Epoch: [93]  [ 200/2502]  eta: 0:23:15  lr: 0.003365  min_lr: 0.003365  loss: 3.6107 (3.4776)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5410 (0.5653)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [93]  [ 400/2502]  eta: 0:21:00  lr: 0.003363  min_lr: 0.003363  loss: 3.3943 (3.4992)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5695 (0.5716)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [93]  [ 600/2502]  eta: 0:18:56  lr: 0.003362  min_lr: 0.003362  loss: 3.7571 (3.5097)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5454 (0.5745)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [93]  [ 800/2502]  eta: 0:16:55  lr: 0.003361  min_lr: 0.003361  loss: 3.4423 (3.5149)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5109 (0.5658)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [93]  [1000/2502]  eta: 0:14:55  lr: 0.003359  min_lr: 0.003359  loss: 3.5375 (3.5187)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5217 (0.5658)  time: 0.5961  data: 0.0005  max mem: 53905
Epoch: [93]  [1200/2502]  eta: 0:12:55  lr: 0.003358  min_lr: 0.003358  loss: 3.7794 (3.5266)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5505 (0.5680)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [93]  [1400/2502]  eta: 0:10:56  lr: 0.003357  min_lr: 0.003357  loss: 3.6158 (3.5189)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5176 (0.5687)  time: 0.5935  data: 0.0005  max mem: 53905
Epoch: [93]  [1600/2502]  eta: 0:08:57  lr: 0.003355  min_lr: 0.003355  loss: 3.3937 (3.5203)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5285 (0.5666)  time: 0.5938  data: 0.0005  max mem: 53905
Epoch: [93]  [1800/2502]  eta: 0:06:58  lr: 0.003354  min_lr: 0.003354  loss: 3.7419 (3.5262)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5941 (0.5690)  time: 0.6054  data: 0.0005  max mem: 53905
Epoch: [93]  [2000/2502]  eta: 0:04:58  lr: 0.003353  min_lr: 0.003353  loss: 3.7073 (3.5249)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5287 (0.5682)  time: 0.5942  data: 0.0005  max mem: 53905
Epoch: [93]  [2200/2502]  eta: 0:02:59  lr: 0.003351  min_lr: 0.003351  loss: 3.5155 (3.5239)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5651 (0.5687)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [93]  [2400/2502]  eta: 0:01:00  lr: 0.003350  min_lr: 0.003350  loss: 3.3593 (3.5229)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5719 (0.5689)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [93]  [2501/2502]  eta: 0:00:00  lr: 0.003350  min_lr: 0.003350  loss: 3.2568 (3.5212)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5966 (0.5713)  time: 0.5342  data: 0.0009  max mem: 53905
Epoch: [93] Total time: 0:24:48 (0.5948 s / it)
Averaged stats: lr: 0.003350  min_lr: 0.003350  loss: 3.2568 (3.5151)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5966 (0.5713)
Test:  [ 0/50]  eta: 0:03:25  loss: 0.6448 (0.6448)  acc1: 90.0000 (90.0000)  acc5: 98.4000 (98.4000)  time: 4.1138  data: 3.7942  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 0.8707 (0.9116)  acc1: 83.2000 (82.1818)  acc5: 96.8000 (96.2545)  time: 0.6330  data: 0.3454  max mem: 53905
Test:  [20/50]  eta: 0:00:14  loss: 0.9014 (0.9352)  acc1: 80.4000 (81.2762)  acc5: 96.0000 (96.3238)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1439 (1.0660)  acc1: 76.4000 (78.7871)  acc5: 93.6000 (94.7097)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3431 (1.1399)  acc1: 71.2000 (77.0244)  acc5: 90.4000 (93.9024)  time: 0.2851  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3157 (1.1576)  acc1: 71.2000 (76.5680)  acc5: 91.6000 (93.7520)  time: 0.2849  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3643 s / it)
* Acc@1 76.438 Acc@5 93.842 loss 1.159
Accuracy of the model on the 50000 test images: 76.4%
Max accuracy: 76.71%
Epoch: [94]  [   0/2502]  eta: 1:38:42  lr: 0.003350  min_lr: 0.003350  loss: 4.1868 (4.1868)  weight_decay: 0.0500 (0.0500)  time: 2.3670  data: 1.5901  max mem: 53905
Epoch: [94]  [ 200/2502]  eta: 0:23:04  lr: 0.003348  min_lr: 0.003348  loss: 3.5408 (3.5120)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5666 (0.5742)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [94]  [ 400/2502]  eta: 0:20:57  lr: 0.003347  min_lr: 0.003347  loss: 3.6513 (3.5272)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5471 (0.5698)  time: 0.5986  data: 0.0005  max mem: 53905
Epoch: [94]  [ 600/2502]  eta: 0:18:54  lr: 0.003346  min_lr: 0.003346  loss: 3.4861 (3.5041)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5285 (0.5660)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [94]  [ 800/2502]  eta: 0:16:53  lr: 0.003344  min_lr: 0.003344  loss: 3.3986 (3.4896)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5744 (0.5722)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [94]  [1000/2502]  eta: 0:14:53  lr: 0.003343  min_lr: 0.003343  loss: 3.4826 (3.5042)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5782 (0.5722)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [94]  [1200/2502]  eta: 0:12:54  lr: 0.003342  min_lr: 0.003342  loss: 3.6894 (3.5056)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5536 (0.5716)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [94]  [1400/2502]  eta: 0:10:54  lr: 0.003340  min_lr: 0.003340  loss: 3.5845 (3.5022)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5878 (0.5733)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [94]  [1600/2502]  eta: 0:08:56  lr: 0.003339  min_lr: 0.003339  loss: 3.4634 (3.5016)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5663 (0.5742)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [94]  [1800/2502]  eta: 0:06:57  lr: 0.003338  min_lr: 0.003338  loss: 3.3272 (3.4992)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5407 (0.5728)  time: 0.5972  data: 0.0005  max mem: 53905
Epoch: [94]  [2000/2502]  eta: 0:04:58  lr: 0.003336  min_lr: 0.003336  loss: 3.5244 (3.5032)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5451 (0.5693)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [94]  [2200/2502]  eta: 0:02:59  lr: 0.003335  min_lr: 0.003335  loss: 3.7504 (3.5069)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6088 (0.5717)  time: 0.5984  data: 0.0005  max mem: 53905
Epoch: [94]  [2400/2502]  eta: 0:01:00  lr: 0.003334  min_lr: 0.003334  loss: 3.5852 (3.5081)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6067 (0.5721)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [94]  [2501/2502]  eta: 0:00:00  lr: 0.003333  min_lr: 0.003333  loss: 3.8083 (3.5097)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5860 (0.5735)  time: 0.5330  data: 0.0009  max mem: 53905
Epoch: [94] Total time: 0:24:45 (0.5938 s / it)
Averaged stats: lr: 0.003333  min_lr: 0.003333  loss: 3.8083 (3.5127)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5860 (0.5735)
Test:  [ 0/50]  eta: 0:03:17  loss: 0.7358 (0.7358)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 3.9421  data: 3.6360  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 1.0167 (1.0404)  acc1: 81.2000 (82.4000)  acc5: 96.8000 (96.4727)  time: 0.6151  data: 0.3309  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0355 (1.0531)  acc1: 80.0000 (81.4857)  acc5: 96.8000 (96.5524)  time: 0.2826  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1789 (1.1683)  acc1: 77.2000 (79.0839)  acc5: 94.4000 (95.0839)  time: 0.2828  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4504 (1.2384)  acc1: 73.2000 (77.4146)  acc5: 90.8000 (94.1366)  time: 0.2825  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4021 (1.2519)  acc1: 73.2000 (76.9520)  acc5: 92.0000 (94.0080)  time: 0.2823  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3580 s / it)
* Acc@1 76.760 Acc@5 94.016 loss 1.255
Accuracy of the model on the 50000 test images: 76.8%
Max accuracy: 76.76%
Epoch: [95]  [   0/2502]  eta: 1:38:07  lr: 0.003333  min_lr: 0.003333  loss: 3.7710 (3.7710)  weight_decay: 0.0500 (0.0500)  time: 2.3530  data: 1.7567  max mem: 53905
Epoch: [95]  [ 200/2502]  eta: 0:23:05  lr: 0.003332  min_lr: 0.003332  loss: 3.5865 (3.4645)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5453 (0.5617)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [95]  [ 400/2502]  eta: 0:20:56  lr: 0.003330  min_lr: 0.003330  loss: 3.6694 (3.4848)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5562 (0.5726)  time: 0.5944  data: 0.0004  max mem: 53905
Epoch: [95]  [ 600/2502]  eta: 0:18:54  lr: 0.003329  min_lr: 0.003329  loss: 3.3298 (3.4890)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5563 (0.5735)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [95]  [ 800/2502]  eta: 0:16:54  lr: 0.003327  min_lr: 0.003327  loss: 3.6570 (3.5049)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5614 (0.5734)  time: 0.6008  data: 0.0004  max mem: 53905
Epoch: [95]  [1000/2502]  eta: 0:14:54  lr: 0.003326  min_lr: 0.003326  loss: 3.3242 (3.5076)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5547 (0.5705)  time: 0.5945  data: 0.0005  max mem: 53905
Epoch: [95]  [1200/2502]  eta: 0:12:54  lr: 0.003325  min_lr: 0.003325  loss: 3.6458 (3.5121)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5501 (0.5727)  time: 0.6018  data: 0.0004  max mem: 53905
Epoch: [95]  [1400/2502]  eta: 0:10:55  lr: 0.003323  min_lr: 0.003323  loss: 3.8455 (3.5124)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5632 (0.5784)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [95]  [1600/2502]  eta: 0:08:56  lr: 0.003322  min_lr: 0.003322  loss: 3.7435 (3.5130)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5787 (0.5777)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [95]  [1800/2502]  eta: 0:06:57  lr: 0.003321  min_lr: 0.003321  loss: 3.7016 (3.5176)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5498 (0.5769)  time: 0.5988  data: 0.0004  max mem: 53905
Epoch: [95]  [2000/2502]  eta: 0:04:58  lr: 0.003319  min_lr: 0.003319  loss: 3.7606 (3.5136)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5188 (0.5740)  time: 0.5944  data: 0.0004  max mem: 53905
Epoch: [95]  [2200/2502]  eta: 0:02:59  lr: 0.003318  min_lr: 0.003318  loss: 3.5027 (3.5124)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6020 (0.5749)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [95]  [2400/2502]  eta: 0:01:00  lr: 0.003317  min_lr: 0.003317  loss: 3.4114 (3.5118)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5619 (0.5778)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [95]  [2501/2502]  eta: 0:00:00  lr: 0.003316  min_lr: 0.003316  loss: 3.6358 (3.5135)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5975 (0.5805)  time: 0.5342  data: 0.0007  max mem: 53905
Epoch: [95] Total time: 0:24:46 (0.5941 s / it)
Averaged stats: lr: 0.003316  min_lr: 0.003316  loss: 3.6358 (3.5068)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5975 (0.5805)
Test:  [ 0/50]  eta: 0:02:27  loss: 0.6114 (0.6114)  acc1: 91.2000 (91.2000)  acc5: 98.8000 (98.8000)  time: 2.9500  data: 2.6428  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8560 (0.9134)  acc1: 84.0000 (83.5636)  acc5: 97.6000 (96.6546)  time: 0.5891  data: 0.3029  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9350 (0.9364)  acc1: 82.4000 (82.4381)  acc5: 96.8000 (96.6476)  time: 0.3191  data: 0.0347  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0630 (1.0499)  acc1: 77.6000 (79.9613)  acc5: 94.4000 (95.1484)  time: 0.2852  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3502 (1.1160)  acc1: 72.4000 (78.0390)  acc5: 91.6000 (94.3317)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3418 (1.1414)  acc1: 72.0000 (77.0800)  acc5: 91.2000 (94.0800)  time: 0.2848  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3546 s / it)
* Acc@1 77.002 Acc@5 94.072 loss 1.137
Accuracy of the model on the 50000 test images: 77.0%
Max accuracy: 77.00%
Epoch: [96]  [   0/2502]  eta: 1:31:36  lr: 0.003316  min_lr: 0.003316  loss: 3.7250 (3.7250)  weight_decay: 0.0500 (0.0500)  time: 2.1968  data: 1.5901  max mem: 53905
Epoch: [96]  [ 200/2502]  eta: 0:23:04  lr: 0.003315  min_lr: 0.003315  loss: 3.6111 (3.4504)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5280 (0.5493)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [96]  [ 400/2502]  eta: 0:20:56  lr: 0.003313  min_lr: 0.003313  loss: 3.7099 (3.4678)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5743 (0.5617)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [96]  [ 600/2502]  eta: 0:18:53  lr: 0.003312  min_lr: 0.003312  loss: 3.6405 (3.4623)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5097 (0.5636)  time: 0.5994  data: 0.0004  max mem: 53905
Epoch: [96]  [ 800/2502]  eta: 0:16:52  lr: 0.003311  min_lr: 0.003311  loss: 3.5208 (3.4670)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5528 (0.5665)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [96]  [1000/2502]  eta: 0:14:52  lr: 0.003309  min_lr: 0.003309  loss: 3.5211 (3.4721)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5706 (0.5719)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [96]  [1200/2502]  eta: 0:12:53  lr: 0.003308  min_lr: 0.003308  loss: 3.5608 (3.4827)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6277 (0.5788)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [96]  [1400/2502]  eta: 0:10:54  lr: 0.003307  min_lr: 0.003307  loss: 3.3512 (3.4831)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5732 (0.5782)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [96]  [1600/2502]  eta: 0:08:55  lr: 0.003305  min_lr: 0.003305  loss: 3.5752 (3.4922)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5490 (0.5789)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [96]  [1800/2502]  eta: 0:06:56  lr: 0.003304  min_lr: 0.003304  loss: 3.7018 (3.4936)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5343 (0.5771)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [96]  [2000/2502]  eta: 0:04:57  lr: 0.003302  min_lr: 0.003302  loss: 3.7891 (3.4960)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5414 (0.5783)  time: 0.5952  data: 0.0004  max mem: 53905
Epoch: [96]  [2200/2502]  eta: 0:02:59  lr: 0.003301  min_lr: 0.003301  loss: 3.7296 (3.4949)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5630 (0.5777)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [96]  [2400/2502]  eta: 0:01:00  lr: 0.003300  min_lr: 0.003300  loss: 3.6867 (3.4952)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.4960 (0.5785)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [96]  [2501/2502]  eta: 0:00:00  lr: 0.003299  min_lr: 0.003299  loss: 3.7380 (3.4987)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5859 (0.5795)  time: 0.5320  data: 0.0007  max mem: 53905
Epoch: [96] Total time: 0:24:43 (0.5928 s / it)
Averaged stats: lr: 0.003299  min_lr: 0.003299  loss: 3.7380 (3.5080)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5859 (0.5795)
Test:  [ 0/50]  eta: 0:03:09  loss: 0.7719 (0.7719)  acc1: 88.8000 (88.8000)  acc5: 98.0000 (98.0000)  time: 3.7885  data: 3.4803  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.9611 (1.0207)  acc1: 80.4000 (81.8545)  acc5: 97.2000 (96.6546)  time: 0.6009  data: 0.3168  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9983 (1.0412)  acc1: 80.4000 (81.0667)  acc5: 96.8000 (96.6476)  time: 0.2824  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1470 (1.1451)  acc1: 77.6000 (78.9290)  acc5: 93.6000 (95.0839)  time: 0.2827  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4126 (1.2141)  acc1: 72.0000 (77.1415)  acc5: 91.6000 (94.1951)  time: 0.2826  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4126 (1.2248)  acc1: 72.0000 (76.8480)  acc5: 91.6000 (94.0560)  time: 0.2824  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3549 s / it)
* Acc@1 77.040 Acc@5 94.200 loss 1.225
Accuracy of the model on the 50000 test images: 77.0%
Max accuracy: 77.04%
Epoch: [97]  [   0/2502]  eta: 1:48:43  lr: 0.003299  min_lr: 0.003299  loss: 3.3460 (3.3460)  weight_decay: 0.0500 (0.0500)  time: 2.6072  data: 2.0054  max mem: 53905
Epoch: [97]  [ 200/2502]  eta: 0:23:11  lr: 0.003298  min_lr: 0.003298  loss: 3.4878 (3.4732)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5662 (0.5816)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [97]  [ 400/2502]  eta: 0:20:58  lr: 0.003296  min_lr: 0.003296  loss: 3.5026 (3.4627)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5735 (0.5809)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [97]  [ 600/2502]  eta: 0:18:54  lr: 0.003295  min_lr: 0.003295  loss: 3.6462 (3.4676)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5470 (0.5773)  time: 0.5968  data: 0.0006  max mem: 53905
Epoch: [97]  [ 800/2502]  eta: 0:16:54  lr: 0.003294  min_lr: 0.003294  loss: 3.4890 (3.4723)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5987 (0.5815)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [97]  [1000/2502]  eta: 0:14:54  lr: 0.003292  min_lr: 0.003292  loss: 3.6563 (3.4883)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5528 (0.5802)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [97]  [1200/2502]  eta: 0:12:54  lr: 0.003291  min_lr: 0.003291  loss: 3.4274 (3.4921)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6297 (0.5835)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [97]  [1400/2502]  eta: 0:10:55  lr: 0.003289  min_lr: 0.003289  loss: 3.5991 (3.4941)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6195 (0.5856)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [97]  [1600/2502]  eta: 0:08:56  lr: 0.003288  min_lr: 0.003288  loss: 3.6563 (3.4979)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5367 (0.5806)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [97]  [1800/2502]  eta: 0:06:57  lr: 0.003287  min_lr: 0.003287  loss: 3.6701 (3.5046)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5643 (0.5806)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [97]  [2000/2502]  eta: 0:04:58  lr: 0.003285  min_lr: 0.003285  loss: 3.3770 (3.5030)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6521 (0.5826)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [97]  [2200/2502]  eta: 0:02:59  lr: 0.003284  min_lr: 0.003284  loss: 3.7345 (3.5067)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5730 (0.5850)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [97]  [2400/2502]  eta: 0:01:00  lr: 0.003283  min_lr: 0.003283  loss: 3.5672 (3.5067)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5914 (0.5839)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [97]  [2501/2502]  eta: 0:00:00  lr: 0.003282  min_lr: 0.003282  loss: 3.5761 (3.5051)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5576 (0.5836)  time: 0.5334  data: 0.0007  max mem: 53905
Epoch: [97] Total time: 0:24:45 (0.5939 s / it)
Averaged stats: lr: 0.003282  min_lr: 0.003282  loss: 3.5761 (3.5046)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5576 (0.5836)
Test:  [ 0/50]  eta: 0:02:42  loss: 0.6419 (0.6419)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 3.2550  data: 2.9196  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.9510 (0.9723)  acc1: 82.8000 (82.7273)  acc5: 96.4000 (96.5818)  time: 0.5788  data: 0.2880  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9631 (0.9901)  acc1: 81.2000 (82.0571)  acc5: 96.4000 (96.5905)  time: 0.2982  data: 0.0127  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1750 (1.1150)  acc1: 77.6000 (79.3419)  acc5: 94.4000 (95.2516)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4244 (1.1886)  acc1: 72.8000 (77.6976)  acc5: 91.6000 (94.4000)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4027 (1.2051)  acc1: 72.8000 (77.2320)  acc5: 92.0000 (94.1920)  time: 0.2846  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3518 s / it)
* Acc@1 76.836 Acc@5 93.968 loss 1.214
Accuracy of the model on the 50000 test images: 76.8%
Max accuracy: 77.04%
Epoch: [98]  [   0/2502]  eta: 2:00:56  lr: 0.003282  min_lr: 0.003282  loss: 3.5731 (3.5731)  weight_decay: 0.0500 (0.0500)  time: 2.9004  data: 1.8217  max mem: 53905
Epoch: [98]  [ 200/2502]  eta: 0:23:08  lr: 0.003281  min_lr: 0.003281  loss: 3.6867 (3.4945)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5102 (0.5823)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [98]  [ 400/2502]  eta: 0:20:58  lr: 0.003279  min_lr: 0.003279  loss: 3.5468 (3.4967)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5549 (0.5719)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [98]  [ 600/2502]  eta: 0:18:54  lr: 0.003278  min_lr: 0.003278  loss: 3.5135 (3.4860)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5537 (0.5764)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [98]  [ 800/2502]  eta: 0:16:53  lr: 0.003276  min_lr: 0.003276  loss: 3.5016 (3.4757)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5389 (0.5831)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [98]  [1000/2502]  eta: 0:14:53  lr: 0.003275  min_lr: 0.003275  loss: 3.4972 (3.4863)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5537 (0.5831)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [98]  [1200/2502]  eta: 0:12:53  lr: 0.003274  min_lr: 0.003274  loss: 3.6066 (3.4841)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5760 (0.5836)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [98]  [1400/2502]  eta: 0:10:54  lr: 0.003272  min_lr: 0.003272  loss: 3.5588 (3.4847)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5537 (0.5838)  time: 0.5950  data: 0.0005  max mem: 53905
Epoch: [98]  [1600/2502]  eta: 0:08:55  lr: 0.003271  min_lr: 0.003271  loss: 3.6225 (3.4874)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5383 (0.5817)  time: 0.5912  data: 0.0003  max mem: 53905
Epoch: [98]  [1800/2502]  eta: 0:06:56  lr: 0.003269  min_lr: 0.003269  loss: 3.6254 (3.4830)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5430 (0.5814)  time: 0.5925  data: 0.0006  max mem: 53905
Epoch: [98]  [2000/2502]  eta: 0:04:57  lr: 0.003268  min_lr: 0.003268  loss: 3.4813 (3.4809)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6012 (0.5853)  time: 0.5974  data: 0.0004  max mem: 53905
Epoch: [98]  [2200/2502]  eta: 0:02:59  lr: 0.003267  min_lr: 0.003267  loss: 3.1331 (3.4788)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5693 (0.5867)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [98]  [2400/2502]  eta: 0:01:00  lr: 0.003265  min_lr: 0.003265  loss: 3.7579 (3.4772)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5557 (0.5854)  time: 0.5934  data: 0.0005  max mem: 53905
Epoch: [98]  [2501/2502]  eta: 0:00:00  lr: 0.003265  min_lr: 0.003265  loss: 3.6220 (3.4804)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5452 (0.5845)  time: 0.5331  data: 0.0007  max mem: 53905
Epoch: [98] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.003265  min_lr: 0.003265  loss: 3.6220 (3.4955)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5452 (0.5845)
Test:  [ 0/50]  eta: 0:02:58  loss: 0.6944 (0.6944)  acc1: 90.8000 (90.8000)  acc5: 99.2000 (99.2000)  time: 3.5693  data: 3.2475  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.9371 (0.9744)  acc1: 80.4000 (82.5091)  acc5: 97.2000 (96.6546)  time: 0.5837  data: 0.2956  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9371 (0.9880)  acc1: 80.4000 (81.9619)  acc5: 97.2000 (96.7238)  time: 0.2855  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1445 (1.1153)  acc1: 76.8000 (79.2645)  acc5: 93.6000 (95.0452)  time: 0.2858  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4215 (1.1886)  acc1: 71.6000 (77.3756)  acc5: 91.6000 (94.2634)  time: 0.2855  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3773 (1.1991)  acc1: 71.6000 (77.0160)  acc5: 91.6000 (94.0880)  time: 0.2852  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3531 s / it)
* Acc@1 77.024 Acc@5 94.088 loss 1.202
Accuracy of the model on the 50000 test images: 77.0%
Max accuracy: 77.04%
Epoch: [99]  [   0/2502]  eta: 1:58:26  lr: 0.003265  min_lr: 0.003265  loss: 3.6076 (3.6076)  weight_decay: 0.0500 (0.0500)  time: 2.8403  data: 1.5956  max mem: 53905
Epoch: [99]  [ 200/2502]  eta: 0:23:12  lr: 0.003263  min_lr: 0.003263  loss: 3.4588 (3.4423)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [99]  [ 400/2502]  eta: 0:20:57  lr: 0.003262  min_lr: 0.003262  loss: 3.6397 (3.4939)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6134 (nan)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [99]  [ 600/2502]  eta: 0:18:55  lr: 0.003260  min_lr: 0.003260  loss: 3.4474 (3.4865)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5484 (nan)  time: 0.5950  data: 0.0005  max mem: 53905
Epoch: [99]  [ 800/2502]  eta: 0:16:54  lr: 0.003259  min_lr: 0.003259  loss: 3.6347 (3.4911)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5719 (nan)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [99]  [1000/2502]  eta: 0:14:54  lr: 0.003258  min_lr: 0.003258  loss: 3.3775 (3.4751)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5998 (nan)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [99]  [1200/2502]  eta: 0:12:54  lr: 0.003256  min_lr: 0.003256  loss: 3.8160 (3.4843)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5681 (nan)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [99]  [1400/2502]  eta: 0:10:55  lr: 0.003255  min_lr: 0.003255  loss: 3.5454 (3.4900)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5680 (nan)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [99]  [1600/2502]  eta: 0:08:56  lr: 0.003253  min_lr: 0.003253  loss: 3.6762 (3.4932)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5908 (nan)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [99]  [1800/2502]  eta: 0:06:57  lr: 0.003252  min_lr: 0.003252  loss: 3.5126 (3.4976)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5496 (nan)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [99]  [2000/2502]  eta: 0:04:58  lr: 0.003251  min_lr: 0.003251  loss: 3.5799 (3.4952)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5507 (nan)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [99]  [2200/2502]  eta: 0:02:59  lr: 0.003249  min_lr: 0.003249  loss: 3.7474 (3.4953)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5866 (nan)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [99]  [2400/2502]  eta: 0:01:00  lr: 0.003248  min_lr: 0.003248  loss: 3.2153 (3.4943)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5695 (nan)  time: 0.5966  data: 0.0005  max mem: 53905
Epoch: [99]  [2501/2502]  eta: 0:00:00  lr: 0.003247  min_lr: 0.003247  loss: 3.5590 (3.4942)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6084 (nan)  time: 0.5366  data: 0.0009  max mem: 53905
Epoch: [99] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.003247  min_lr: 0.003247  loss: 3.5590 (3.4952)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6084 (nan)
Test:  [ 0/50]  eta: 0:03:37  loss: 0.6059 (0.6059)  acc1: 92.4000 (92.4000)  acc5: 98.4000 (98.4000)  time: 4.3595  data: 4.0358  max mem: 53905
Test:  [10/50]  eta: 0:00:30  loss: 0.9155 (0.9727)  acc1: 83.2000 (82.6545)  acc5: 97.2000 (96.5455)  time: 0.7546  data: 0.4559  max mem: 53905
Test:  [20/50]  eta: 0:00:17  loss: 0.9633 (0.9989)  acc1: 81.6000 (81.8667)  acc5: 96.8000 (96.6095)  time: 0.3782  data: 0.0854  max mem: 53905
Test:  [30/50]  eta: 0:00:09  loss: 1.1672 (1.1117)  acc1: 77.2000 (79.3419)  acc5: 94.0000 (95.0710)  time: 0.3233  data: 0.0366  max mem: 53905
Test:  [40/50]  eta: 0:00:04  loss: 1.3984 (1.1913)  acc1: 71.2000 (77.4439)  acc5: 91.2000 (94.1366)  time: 0.2841  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3992 (1.2065)  acc1: 70.8000 (76.9680)  acc5: 92.0000 (94.0000)  time: 0.2838  data: 0.0002  max mem: 53905
Test: Total time: 0:00:20 (0.4054 s / it)
* Acc@1 76.876 Acc@5 94.010 loss 1.208
Accuracy of the model on the 50000 test images: 76.9%
Max accuracy: 77.04%
Epoch: [100]  [   0/2502]  eta: 1:58:35  lr: 0.003247  min_lr: 0.003247  loss: 3.7971 (3.7971)  weight_decay: 0.0500 (0.0500)  time: 2.8440  data: 2.2253  max mem: 53905
Epoch: [100]  [ 200/2502]  eta: 0:23:11  lr: 0.003246  min_lr: 0.003246  loss: 3.6675 (3.4982)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6360 (0.6213)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [100]  [ 400/2502]  eta: 0:20:59  lr: 0.003244  min_lr: 0.003244  loss: 3.4583 (3.4725)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5476 (nan)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [100]  [ 600/2502]  eta: 0:18:55  lr: 0.003243  min_lr: 0.003243  loss: 3.5525 (3.4668)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6166 (nan)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [100]  [ 800/2502]  eta: 0:16:54  lr: 0.003242  min_lr: 0.003242  loss: 3.4644 (3.4703)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5953 (nan)  time: 0.5968  data: 0.0004  max mem: 53905
Epoch: [100]  [1000/2502]  eta: 0:14:54  lr: 0.003240  min_lr: 0.003240  loss: 3.7777 (3.4781)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5557 (nan)  time: 0.5995  data: 0.0004  max mem: 53905
Epoch: [100]  [1200/2502]  eta: 0:12:54  lr: 0.003239  min_lr: 0.003239  loss: 3.4074 (3.4679)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5893 (nan)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [100]  [1400/2502]  eta: 0:10:55  lr: 0.003237  min_lr: 0.003237  loss: 3.6538 (3.4748)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6457 (nan)  time: 0.5990  data: 0.0004  max mem: 53905
Epoch: [100]  [1600/2502]  eta: 0:08:56  lr: 0.003236  min_lr: 0.003236  loss: 3.3483 (3.4790)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5617 (nan)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [100]  [1800/2502]  eta: 0:06:57  lr: 0.003234  min_lr: 0.003234  loss: 3.6804 (3.4826)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5728 (nan)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [100]  [2000/2502]  eta: 0:04:58  lr: 0.003233  min_lr: 0.003233  loss: 3.7215 (3.4818)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5991 (nan)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [100]  [2200/2502]  eta: 0:02:59  lr: 0.003232  min_lr: 0.003232  loss: 3.6158 (3.4837)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6228 (nan)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [100]  [2400/2502]  eta: 0:01:00  lr: 0.003230  min_lr: 0.003230  loss: 3.6940 (3.4839)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5624 (nan)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [100]  [2501/2502]  eta: 0:00:00  lr: 0.003230  min_lr: 0.003230  loss: 3.5247 (3.4851)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5667 (nan)  time: 0.5388  data: 0.0008  max mem: 53905
Epoch: [100] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.003230  min_lr: 0.003230  loss: 3.5247 (3.4907)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5667 (nan)
Test:  [ 0/50]  eta: 0:02:38  loss: 0.5614 (0.5614)  acc1: 92.0000 (92.0000)  acc5: 99.2000 (99.2000)  time: 3.1606  data: 2.8475  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.8704 (0.8933)  acc1: 82.4000 (82.8727)  acc5: 96.4000 (96.8000)  time: 0.5463  data: 0.2593  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8883 (0.9186)  acc1: 81.6000 (82.2095)  acc5: 96.8000 (96.8191)  time: 0.2851  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0524 (1.0425)  acc1: 78.0000 (79.8710)  acc5: 94.8000 (95.2258)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3611 (1.1185)  acc1: 73.2000 (77.9805)  acc5: 91.2000 (94.4098)  time: 0.2851  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3341 (1.1397)  acc1: 73.2000 (77.4400)  acc5: 92.4000 (94.2160)  time: 0.2849  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3452 s / it)
* Acc@1 77.226 Acc@5 94.246 loss 1.147
Accuracy of the model on the 50000 test images: 77.2%
Max accuracy: 77.23%
Epoch: [101]  [   0/2502]  eta: 1:40:31  lr: 0.003230  min_lr: 0.003230  loss: 3.9736 (3.9736)  weight_decay: 0.0500 (0.0500)  time: 2.4107  data: 1.8165  max mem: 53905
Epoch: [101]  [ 200/2502]  eta: 0:23:06  lr: 0.003228  min_lr: 0.003228  loss: 3.4994 (3.4662)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5795 (0.5615)  time: 0.5936  data: 0.0004  max mem: 53905
Epoch: [101]  [ 400/2502]  eta: 0:20:56  lr: 0.003227  min_lr: 0.003227  loss: 3.3144 (3.4780)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6156 (0.5836)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [101]  [ 600/2502]  eta: 0:18:54  lr: 0.003225  min_lr: 0.003225  loss: 3.3250 (3.4489)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6180 (0.5928)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [101]  [ 800/2502]  eta: 0:16:53  lr: 0.003224  min_lr: 0.003224  loss: 3.7213 (3.4449)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5855 (0.5967)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [101]  [1000/2502]  eta: 0:14:53  lr: 0.003222  min_lr: 0.003222  loss: 3.4656 (3.4360)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6166 (0.5910)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [101]  [1200/2502]  eta: 0:12:54  lr: 0.003221  min_lr: 0.003221  loss: 3.3985 (3.4320)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5717 (0.5900)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [101]  [1400/2502]  eta: 0:10:55  lr: 0.003220  min_lr: 0.003220  loss: 3.3631 (3.4416)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5797 (0.5914)  time: 0.6037  data: 0.0007  max mem: 53905
Epoch: [101]  [1600/2502]  eta: 0:08:56  lr: 0.003218  min_lr: 0.003218  loss: 3.4842 (3.4562)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5609 (0.5937)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [101]  [1800/2502]  eta: 0:06:57  lr: 0.003217  min_lr: 0.003217  loss: 3.6942 (3.4608)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6274 (0.5952)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [101]  [2000/2502]  eta: 0:04:58  lr: 0.003215  min_lr: 0.003215  loss: 3.3685 (3.4691)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5702 (0.5969)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [101]  [2200/2502]  eta: 0:02:59  lr: 0.003214  min_lr: 0.003214  loss: 3.4977 (3.4736)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5732 (0.5938)  time: 0.5991  data: 0.0004  max mem: 53905
Epoch: [101]  [2400/2502]  eta: 0:01:00  lr: 0.003212  min_lr: 0.003212  loss: 3.5089 (3.4782)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6014 (0.5938)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [101]  [2501/2502]  eta: 0:00:00  lr: 0.003212  min_lr: 0.003212  loss: 3.6023 (3.4796)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5486 (0.5929)  time: 0.5346  data: 0.0007  max mem: 53905
Epoch: [101] Total time: 0:24:44 (0.5935 s / it)
Averaged stats: lr: 0.003212  min_lr: 0.003212  loss: 3.6023 (3.4839)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5486 (0.5929)
Test:  [ 0/50]  eta: 0:03:17  loss: 0.6540 (0.6540)  acc1: 91.6000 (91.6000)  acc5: 98.8000 (98.8000)  time: 3.9476  data: 3.6133  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.9604 (1.0101)  acc1: 84.4000 (82.5818)  acc5: 96.4000 (96.7636)  time: 0.6177  data: 0.3289  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0332 (1.0242)  acc1: 80.8000 (82.0571)  acc5: 96.4000 (96.8762)  time: 0.2849  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1180 (1.1299)  acc1: 78.0000 (79.9226)  acc5: 94.4000 (95.2000)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4321 (1.2035)  acc1: 73.2000 (78.0098)  acc5: 91.2000 (94.2829)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4061 (1.2224)  acc1: 72.4000 (77.3760)  acc5: 92.0000 (94.0480)  time: 0.2847  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3604 s / it)
* Acc@1 77.074 Acc@5 94.172 loss 1.229
Accuracy of the model on the 50000 test images: 77.1%
Max accuracy: 77.23%
Epoch: [102]  [   0/2502]  eta: 1:54:00  lr: 0.003212  min_lr: 0.003212  loss: 2.2134 (2.2134)  weight_decay: 0.0500 (0.0500)  time: 2.7338  data: 1.5075  max mem: 53905
Epoch: [102]  [ 200/2502]  eta: 0:23:08  lr: 0.003210  min_lr: 0.003210  loss: 3.6478 (3.4756)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5480 (0.5694)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [102]  [ 400/2502]  eta: 0:20:59  lr: 0.003209  min_lr: 0.003209  loss: 3.6583 (3.4739)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5604 (0.5643)  time: 0.6015  data: 0.0005  max mem: 53905
Epoch: [102]  [ 600/2502]  eta: 0:18:56  lr: 0.003207  min_lr: 0.003207  loss: 3.3524 (3.4536)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5707 (0.5707)  time: 0.5935  data: 0.0004  max mem: 53905
Epoch: [102]  [ 800/2502]  eta: 0:16:55  lr: 0.003206  min_lr: 0.003206  loss: 3.6972 (3.4619)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5550 (0.5806)  time: 0.5970  data: 0.0006  max mem: 53905
Epoch: [102]  [1000/2502]  eta: 0:14:54  lr: 0.003205  min_lr: 0.003205  loss: 3.4695 (3.4556)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5829 (0.5818)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [102]  [1200/2502]  eta: 0:12:55  lr: 0.003203  min_lr: 0.003203  loss: 3.3297 (3.4565)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6254 (0.5811)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [102]  [1400/2502]  eta: 0:10:55  lr: 0.003202  min_lr: 0.003202  loss: 3.1556 (3.4611)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5808 (0.5833)  time: 0.5937  data: 0.0005  max mem: 53905
Epoch: [102]  [1600/2502]  eta: 0:08:56  lr: 0.003200  min_lr: 0.003200  loss: 3.4894 (3.4579)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6161 (0.5862)  time: 0.5998  data: 0.0007  max mem: 53905
Epoch: [102]  [1800/2502]  eta: 0:06:57  lr: 0.003199  min_lr: 0.003199  loss: 3.6090 (3.4602)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5645 (0.5850)  time: 0.5935  data: 0.0005  max mem: 53905
Epoch: [102]  [2000/2502]  eta: 0:04:58  lr: 0.003197  min_lr: 0.003197  loss: 3.6009 (3.4619)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5557 (0.5879)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [102]  [2200/2502]  eta: 0:02:59  lr: 0.003196  min_lr: 0.003196  loss: 3.4213 (3.4651)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5574 (0.5890)  time: 0.5980  data: 0.0007  max mem: 53905
Epoch: [102]  [2400/2502]  eta: 0:01:00  lr: 0.003195  min_lr: 0.003195  loss: 3.5775 (3.4643)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5927 (0.5883)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [102]  [2501/2502]  eta: 0:00:00  lr: 0.003194  min_lr: 0.003194  loss: 3.6043 (3.4648)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6026 (0.5889)  time: 0.5332  data: 0.0009  max mem: 53905
Epoch: [102] Total time: 0:24:46 (0.5942 s / it)
Averaged stats: lr: 0.003194  min_lr: 0.003194  loss: 3.6043 (3.4813)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6026 (0.5889)
Test:  [ 0/50]  eta: 0:03:14  loss: 0.6428 (0.6428)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 3.8928  data: 3.5756  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.9847 (0.9704)  acc1: 80.8000 (82.2545)  acc5: 96.4000 (96.2545)  time: 0.6129  data: 0.3255  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0178 (0.9952)  acc1: 80.8000 (81.4095)  acc5: 96.8000 (96.6095)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1454 (1.1086)  acc1: 78.8000 (79.0710)  acc5: 94.8000 (95.0452)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4210 (1.1781)  acc1: 73.6000 (77.3951)  acc5: 91.2000 (94.2244)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4136 (1.1953)  acc1: 73.2000 (76.9440)  acc5: 91.6000 (94.0320)  time: 0.2849  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3602 s / it)
* Acc@1 77.278 Acc@5 94.134 loss 1.193
Accuracy of the model on the 50000 test images: 77.3%
Max accuracy: 77.28%
Epoch: [103]  [   0/2502]  eta: 1:44:19  lr: 0.003194  min_lr: 0.003194  loss: 2.0060 (2.0060)  weight_decay: 0.0500 (0.0500)  time: 2.5019  data: 1.8998  max mem: 53905
Epoch: [103]  [ 200/2502]  eta: 0:23:10  lr: 0.003192  min_lr: 0.003192  loss: 3.5754 (3.4602)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5850 (0.5869)  time: 0.5919  data: 0.0008  max mem: 53905
Epoch: [103]  [ 400/2502]  eta: 0:20:57  lr: 0.003191  min_lr: 0.003191  loss: 3.4040 (3.4549)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5798 (0.5934)  time: 0.5927  data: 0.0006  max mem: 53905
Epoch: [103]  [ 600/2502]  eta: 0:18:55  lr: 0.003190  min_lr: 0.003190  loss: 3.6259 (3.4712)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5674 (0.5953)  time: 0.5918  data: 0.0007  max mem: 53905
Epoch: [103]  [ 800/2502]  eta: 0:16:54  lr: 0.003188  min_lr: 0.003188  loss: 3.3823 (3.4577)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5362 (0.5923)  time: 0.5920  data: 0.0008  max mem: 53905
Epoch: [103]  [1000/2502]  eta: 0:14:53  lr: 0.003187  min_lr: 0.003187  loss: 3.6505 (3.4682)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5891 (0.5913)  time: 0.5926  data: 0.0007  max mem: 53905
Epoch: [103]  [1200/2502]  eta: 0:12:54  lr: 0.003185  min_lr: 0.003185  loss: 3.4397 (3.4667)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5952 (0.5943)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [103]  [1400/2502]  eta: 0:10:55  lr: 0.003184  min_lr: 0.003184  loss: 3.4866 (3.4684)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6013 (0.5940)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [103]  [1600/2502]  eta: 0:08:56  lr: 0.003182  min_lr: 0.003182  loss: 3.7031 (3.4731)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5629 (0.5924)  time: 0.5978  data: 0.0006  max mem: 53905
Epoch: [103]  [1800/2502]  eta: 0:06:57  lr: 0.003181  min_lr: 0.003181  loss: 3.8266 (3.4783)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5572 (0.5897)  time: 0.5917  data: 0.0007  max mem: 53905
Epoch: [103]  [2000/2502]  eta: 0:04:58  lr: 0.003179  min_lr: 0.003179  loss: 3.4125 (3.4758)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5960 (0.5920)  time: 0.5918  data: 0.0007  max mem: 53905
Epoch: [103]  [2200/2502]  eta: 0:02:59  lr: 0.003178  min_lr: 0.003178  loss: 3.2624 (3.4724)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5787 (0.5929)  time: 0.5932  data: 0.0006  max mem: 53905
Epoch: [103]  [2400/2502]  eta: 0:01:00  lr: 0.003177  min_lr: 0.003177  loss: 3.3744 (3.4751)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5509 (0.5911)  time: 0.5949  data: 0.0006  max mem: 53905
Epoch: [103]  [2501/2502]  eta: 0:00:00  lr: 0.003176  min_lr: 0.003176  loss: 3.7040 (3.4783)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5923 (0.5923)  time: 0.5342  data: 0.0007  max mem: 53905
Epoch: [103] Total time: 0:24:45 (0.5936 s / it)
Averaged stats: lr: 0.003176  min_lr: 0.003176  loss: 3.7040 (3.4823)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5923 (0.5923)
Test:  [ 0/50]  eta: 0:03:15  loss: 0.6409 (0.6409)  acc1: 91.6000 (91.6000)  acc5: 99.2000 (99.2000)  time: 3.9162  data: 3.5950  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.9272 (0.9610)  acc1: 82.0000 (82.4364)  acc5: 97.6000 (96.8364)  time: 0.6146  data: 0.3272  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9617 (0.9852)  acc1: 81.2000 (81.5238)  acc5: 96.8000 (96.4571)  time: 0.2847  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.0656 (1.0877)  acc1: 75.6000 (79.1613)  acc5: 93.2000 (95.1097)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3964 (1.1582)  acc1: 73.2000 (77.4927)  acc5: 91.2000 (94.1659)  time: 0.2847  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3601 (1.1774)  acc1: 73.2000 (77.0160)  acc5: 91.6000 (93.9440)  time: 0.2845  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3597 s / it)
* Acc@1 76.958 Acc@5 94.026 loss 1.181
Accuracy of the model on the 50000 test images: 77.0%
Max accuracy: 77.28%
Epoch: [104]  [   0/2502]  eta: 1:54:23  lr: 0.003176  min_lr: 0.003176  loss: 3.9468 (3.9468)  weight_decay: 0.0500 (0.0500)  time: 2.7432  data: 1.8056  max mem: 53905
Epoch: [104]  [ 200/2502]  eta: 0:23:07  lr: 0.003174  min_lr: 0.003174  loss: 3.6061 (3.4804)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5982 (0.5752)  time: 0.5915  data: 0.0006  max mem: 53905
Epoch: [104]  [ 400/2502]  eta: 0:20:58  lr: 0.003173  min_lr: 0.003173  loss: 3.4284 (3.4750)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6124 (0.5854)  time: 0.5912  data: 0.0006  max mem: 53905
Epoch: [104]  [ 600/2502]  eta: 0:18:54  lr: 0.003171  min_lr: 0.003171  loss: 3.4942 (3.4694)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5753 (0.5930)  time: 0.5932  data: 0.0006  max mem: 53905
Epoch: [104]  [ 800/2502]  eta: 0:16:53  lr: 0.003170  min_lr: 0.003170  loss: 3.4467 (3.4619)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6025 (0.5894)  time: 0.5913  data: 0.0007  max mem: 53905
Epoch: [104]  [1000/2502]  eta: 0:14:53  lr: 0.003169  min_lr: 0.003169  loss: 3.7988 (3.4712)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5829 (0.5886)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [104]  [1200/2502]  eta: 0:12:54  lr: 0.003167  min_lr: 0.003167  loss: 3.4040 (3.4627)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6036 (0.5912)  time: 0.5925  data: 0.0006  max mem: 53905
Epoch: [104]  [1400/2502]  eta: 0:10:55  lr: 0.003166  min_lr: 0.003166  loss: 3.4283 (3.4740)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5951 (0.5918)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [104]  [1600/2502]  eta: 0:08:56  lr: 0.003164  min_lr: 0.003164  loss: 3.6743 (3.4762)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5441 (0.5895)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [104]  [1800/2502]  eta: 0:06:56  lr: 0.003163  min_lr: 0.003163  loss: 3.4440 (3.4849)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5711 (0.5899)  time: 0.5907  data: 0.0006  max mem: 53905
Epoch: [104]  [2000/2502]  eta: 0:04:58  lr: 0.003161  min_lr: 0.003161  loss: 3.6933 (3.4868)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5558 (0.5895)  time: 0.5978  data: 0.0005  max mem: 53905
Epoch: [104]  [2200/2502]  eta: 0:02:59  lr: 0.003160  min_lr: 0.003160  loss: 3.5871 (3.4850)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5518 (0.5888)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [104]  [2400/2502]  eta: 0:01:00  lr: 0.003158  min_lr: 0.003158  loss: 3.4548 (3.4873)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5422 (0.5906)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [104]  [2501/2502]  eta: 0:00:00  lr: 0.003158  min_lr: 0.003158  loss: 3.4843 (3.4853)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5301 (0.5888)  time: 0.5333  data: 0.0007  max mem: 53905
Epoch: [104] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.003158  min_lr: 0.003158  loss: 3.4843 (3.4745)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5301 (0.5888)
Test:  [ 0/50]  eta: 0:02:33  loss: 0.6639 (0.6639)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 3.0712  data: 2.7551  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8752 (0.9597)  acc1: 81.6000 (82.9455)  acc5: 97.2000 (96.7636)  time: 0.5736  data: 0.2872  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0023 (0.9947)  acc1: 80.8000 (82.0191)  acc5: 96.4000 (96.6857)  time: 0.3039  data: 0.0205  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1236 (1.0927)  acc1: 78.4000 (79.9871)  acc5: 94.0000 (95.2774)  time: 0.2840  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3938 (1.1613)  acc1: 73.6000 (78.1561)  acc5: 92.0000 (94.4098)  time: 0.2835  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3277 (1.1720)  acc1: 72.8000 (77.4080)  acc5: 92.0000 (94.2960)  time: 0.2831  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3505 s / it)
* Acc@1 77.214 Acc@5 94.200 loss 1.176
Accuracy of the model on the 50000 test images: 77.2%
Max accuracy: 77.28%
Epoch: [105]  [   0/2502]  eta: 1:57:36  lr: 0.003158  min_lr: 0.003158  loss: 3.7199 (3.7199)  weight_decay: 0.0500 (0.0500)  time: 2.8205  data: 1.9594  max mem: 53905
Epoch: [105]  [ 200/2502]  eta: 0:23:07  lr: 0.003156  min_lr: 0.003156  loss: 3.7444 (3.4576)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5726 (0.5822)  time: 0.5907  data: 0.0007  max mem: 53905
Epoch: [105]  [ 400/2502]  eta: 0:20:55  lr: 0.003155  min_lr: 0.003155  loss: 3.8724 (3.4483)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5405 (nan)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [105]  [ 600/2502]  eta: 0:18:54  lr: 0.003153  min_lr: 0.003153  loss: 3.5818 (3.4781)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5643 (nan)  time: 0.5923  data: 0.0006  max mem: 53905
Epoch: [105]  [ 800/2502]  eta: 0:16:53  lr: 0.003152  min_lr: 0.003152  loss: 3.1233 (3.4678)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5984 (nan)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [105]  [1000/2502]  eta: 0:14:53  lr: 0.003150  min_lr: 0.003150  loss: 3.5661 (3.4616)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5792 (nan)  time: 0.5929  data: 0.0007  max mem: 53905
Epoch: [105]  [1200/2502]  eta: 0:12:54  lr: 0.003149  min_lr: 0.003149  loss: 3.5723 (3.4661)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5541 (nan)  time: 0.5936  data: 0.0006  max mem: 53905
Epoch: [105]  [1400/2502]  eta: 0:10:55  lr: 0.003147  min_lr: 0.003147  loss: 3.7612 (3.4665)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6291 (nan)  time: 0.5935  data: 0.0007  max mem: 53905
Epoch: [105]  [1600/2502]  eta: 0:08:56  lr: 0.003146  min_lr: 0.003146  loss: 3.6058 (3.4707)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5444 (nan)  time: 0.5976  data: 0.0006  max mem: 53905
Epoch: [105]  [1800/2502]  eta: 0:06:57  lr: 0.003144  min_lr: 0.003144  loss: 3.5880 (3.4778)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5589 (nan)  time: 0.5922  data: 0.0007  max mem: 53905
Epoch: [105]  [2000/2502]  eta: 0:04:58  lr: 0.003143  min_lr: 0.003143  loss: 3.3138 (3.4765)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5939 (nan)  time: 0.5925  data: 0.0008  max mem: 53905
Epoch: [105]  [2200/2502]  eta: 0:02:59  lr: 0.003141  min_lr: 0.003141  loss: 3.6800 (3.4792)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5696 (nan)  time: 0.5924  data: 0.0007  max mem: 53905
Epoch: [105]  [2400/2502]  eta: 0:01:00  lr: 0.003140  min_lr: 0.003140  loss: 3.4911 (3.4779)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5979 (nan)  time: 0.5926  data: 0.0008  max mem: 53905
Epoch: [105]  [2501/2502]  eta: 0:00:00  lr: 0.003139  min_lr: 0.003139  loss: 3.6691 (3.4769)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6013 (nan)  time: 0.5335  data: 0.0008  max mem: 53905
Epoch: [105] Total time: 0:24:46 (0.5939 s / it)
Averaged stats: lr: 0.003139  min_lr: 0.003139  loss: 3.6691 (3.4682)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6013 (nan)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.5935 (0.5935)  acc1: 89.2000 (89.2000)  acc5: 99.2000 (99.2000)  time: 3.0440  data: 2.7063  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8902 (0.8841)  acc1: 82.4000 (82.5091)  acc5: 97.2000 (97.0182)  time: 0.5770  data: 0.2887  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9065 (0.9111)  acc1: 80.8000 (81.8667)  acc5: 97.2000 (97.0857)  time: 0.3081  data: 0.0237  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1275 (1.0230)  acc1: 77.6000 (79.8968)  acc5: 94.8000 (95.3290)  time: 0.2914  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3393 (1.1021)  acc1: 74.0000 (78.1756)  acc5: 91.6000 (94.5756)  time: 0.2908  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3393 (1.1258)  acc1: 73.6000 (77.6080)  acc5: 92.0000 (94.3680)  time: 0.2846  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3539 s / it)
* Acc@1 77.406 Acc@5 94.272 loss 1.135
Accuracy of the model on the 50000 test images: 77.4%
Max accuracy: 77.41%
Epoch: [106]  [   0/2502]  eta: 1:44:48  lr: 0.003139  min_lr: 0.003139  loss: 3.3724 (3.3724)  weight_decay: 0.0500 (0.0500)  time: 2.5135  data: 1.9193  max mem: 53905
Epoch: [106]  [ 200/2502]  eta: 0:23:11  lr: 0.003138  min_lr: 0.003138  loss: 3.3752 (3.4496)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5689 (0.5967)  time: 0.6110  data: 0.0005  max mem: 53905
Epoch: [106]  [ 400/2502]  eta: 0:20:58  lr: 0.003136  min_lr: 0.003136  loss: 3.5491 (3.4370)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5596 (0.5883)  time: 0.5929  data: 0.0006  max mem: 53905
Epoch: [106]  [ 600/2502]  eta: 0:18:55  lr: 0.003135  min_lr: 0.003135  loss: 3.6394 (3.4562)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6079 (0.5989)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [106]  [ 800/2502]  eta: 0:16:54  lr: 0.003133  min_lr: 0.003133  loss: 3.0650 (3.4616)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5849 (0.6046)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [106]  [1000/2502]  eta: 0:14:54  lr: 0.003132  min_lr: 0.003132  loss: 3.6023 (3.4550)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5562 (0.5993)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [106]  [1200/2502]  eta: 0:12:54  lr: 0.003130  min_lr: 0.003130  loss: 3.5258 (3.4529)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5673 (0.5978)  time: 0.5946  data: 0.0005  max mem: 53905
Epoch: [106]  [1400/2502]  eta: 0:10:55  lr: 0.003129  min_lr: 0.003129  loss: 3.3204 (3.4556)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5758 (0.5961)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [106]  [1600/2502]  eta: 0:08:56  lr: 0.003127  min_lr: 0.003127  loss: 3.6729 (3.4616)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6047 (0.5987)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [106]  [1800/2502]  eta: 0:06:57  lr: 0.003126  min_lr: 0.003126  loss: 3.6510 (3.4663)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5473 (0.5962)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [106]  [2000/2502]  eta: 0:04:58  lr: 0.003124  min_lr: 0.003124  loss: 3.3981 (3.4661)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5905 (0.5956)  time: 0.5956  data: 0.0006  max mem: 53905
Epoch: [106]  [2200/2502]  eta: 0:02:59  lr: 0.003123  min_lr: 0.003123  loss: 3.3067 (3.4595)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5691 (0.5962)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [106]  [2400/2502]  eta: 0:01:00  lr: 0.003121  min_lr: 0.003121  loss: 3.7606 (3.4611)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5815 (0.5975)  time: 0.6007  data: 0.0005  max mem: 53905
Epoch: [106]  [2501/2502]  eta: 0:00:00  lr: 0.003121  min_lr: 0.003121  loss: 3.4606 (3.4616)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5808 (0.5979)  time: 0.5337  data: 0.0009  max mem: 53905
Epoch: [106] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.003121  min_lr: 0.003121  loss: 3.4606 (3.4624)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5808 (0.5979)
Test:  [ 0/50]  eta: 0:03:03  loss: 0.6338 (0.6338)  acc1: 92.4000 (92.4000)  acc5: 98.0000 (98.0000)  time: 3.6788  data: 3.3777  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8846 (0.9077)  acc1: 82.8000 (83.7091)  acc5: 97.2000 (96.5455)  time: 0.5981  data: 0.3130  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9031 (0.9132)  acc1: 81.6000 (82.6857)  acc5: 97.2000 (96.8952)  time: 0.2872  data: 0.0035  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0381 (1.0331)  acc1: 77.6000 (80.1806)  acc5: 96.0000 (95.3677)  time: 0.2844  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3189 (1.1107)  acc1: 73.2000 (78.3220)  acc5: 91.6000 (94.4683)  time: 0.2840  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3179 (1.1262)  acc1: 72.8000 (77.5760)  acc5: 92.4000 (94.3920)  time: 0.2837  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3555 s / it)
* Acc@1 77.546 Acc@5 94.318 loss 1.133
Accuracy of the model on the 50000 test images: 77.5%
Max accuracy: 77.55%
Epoch: [107]  [   0/2502]  eta: 1:33:06  lr: 0.003121  min_lr: 0.003121  loss: 3.8423 (3.8423)  weight_decay: 0.0500 (0.0500)  time: 2.2327  data: 1.6346  max mem: 53905
Epoch: [107]  [ 200/2502]  eta: 0:23:05  lr: 0.003119  min_lr: 0.003119  loss: 3.3533 (3.5023)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5681 (0.6131)  time: 0.5993  data: 0.0005  max mem: 53905
Epoch: [107]  [ 400/2502]  eta: 0:20:54  lr: 0.003118  min_lr: 0.003118  loss: 3.5610 (3.4566)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5824 (0.6033)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [107]  [ 600/2502]  eta: 0:18:54  lr: 0.003116  min_lr: 0.003116  loss: 3.7047 (3.4626)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5615 (0.5958)  time: 0.5988  data: 0.0005  max mem: 53905
Epoch: [107]  [ 800/2502]  eta: 0:16:53  lr: 0.003115  min_lr: 0.003115  loss: 3.4172 (3.4676)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5837 (0.6028)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [107]  [1000/2502]  eta: 0:14:53  lr: 0.003113  min_lr: 0.003113  loss: 3.3951 (3.4600)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5605 (0.6000)  time: 0.5992  data: 0.0005  max mem: 53905
Epoch: [107]  [1200/2502]  eta: 0:12:54  lr: 0.003112  min_lr: 0.003112  loss: 3.5106 (3.4590)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5612 (0.5984)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [107]  [1400/2502]  eta: 0:10:55  lr: 0.003110  min_lr: 0.003110  loss: 3.6168 (3.4478)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5835 (0.5981)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [107]  [1600/2502]  eta: 0:08:56  lr: 0.003109  min_lr: 0.003109  loss: 3.3392 (3.4425)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5708 (0.5990)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [107]  [1800/2502]  eta: 0:06:57  lr: 0.003107  min_lr: 0.003107  loss: 3.7133 (3.4447)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6395 (0.6009)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [107]  [2000/2502]  eta: 0:04:58  lr: 0.003106  min_lr: 0.003106  loss: 3.3143 (3.4488)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5753 (0.5997)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [107]  [2200/2502]  eta: 0:02:59  lr: 0.003104  min_lr: 0.003104  loss: 3.6294 (3.4524)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5485 (0.5968)  time: 0.5934  data: 0.0005  max mem: 53905
Epoch: [107]  [2400/2502]  eta: 0:01:00  lr: 0.003103  min_lr: 0.003103  loss: 3.3435 (3.4534)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5855 (0.5986)  time: 0.5912  data: 0.0006  max mem: 53905
Epoch: [107]  [2501/2502]  eta: 0:00:00  lr: 0.003102  min_lr: 0.003102  loss: 3.8075 (3.4544)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6092 (0.6001)  time: 0.5331  data: 0.0009  max mem: 53905
Epoch: [107] Total time: 0:24:45 (0.5938 s / it)
Averaged stats: lr: 0.003102  min_lr: 0.003102  loss: 3.8075 (3.4584)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6092 (0.6001)
Test:  [ 0/50]  eta: 0:02:45  loss: 0.7960 (0.7960)  acc1: 92.0000 (92.0000)  acc5: 98.4000 (98.4000)  time: 3.3058  data: 2.9868  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 1.0513 (1.0909)  acc1: 82.0000 (81.9636)  acc5: 97.2000 (96.7273)  time: 0.5585  data: 0.2719  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.1046 (1.1167)  acc1: 80.0000 (81.3524)  acc5: 96.8000 (96.7810)  time: 0.2840  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2495 (1.2205)  acc1: 77.6000 (79.3936)  acc5: 94.0000 (95.1742)  time: 0.2843  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4758 (1.2847)  acc1: 74.4000 (77.7073)  acc5: 91.6000 (94.3415)  time: 0.2840  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4613 (1.2945)  acc1: 72.4000 (77.2480)  acc5: 92.0000 (94.1280)  time: 0.2838  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3481 s / it)
* Acc@1 77.300 Acc@5 94.250 loss 1.300
Accuracy of the model on the 50000 test images: 77.3%
Max accuracy: 77.55%
Epoch: [108]  [   0/2502]  eta: 1:50:46  lr: 0.003102  min_lr: 0.003102  loss: 3.5913 (3.5913)  weight_decay: 0.0500 (0.0500)  time: 2.6564  data: 2.0520  max mem: 53905
Epoch: [108]  [ 200/2502]  eta: 0:23:11  lr: 0.003101  min_lr: 0.003101  loss: 3.5287 (3.4282)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5853 (0.5869)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [108]  [ 400/2502]  eta: 0:20:59  lr: 0.003099  min_lr: 0.003099  loss: 3.7084 (3.4623)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5884 (0.5919)  time: 0.5988  data: 0.0005  max mem: 53905
Epoch: [108]  [ 600/2502]  eta: 0:18:55  lr: 0.003098  min_lr: 0.003098  loss: 3.6695 (3.4510)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5807 (0.5925)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [108]  [ 800/2502]  eta: 0:16:54  lr: 0.003096  min_lr: 0.003096  loss: 3.6556 (3.4717)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5831 (0.5992)  time: 0.5943  data: 0.0005  max mem: 53905
Epoch: [108]  [1000/2502]  eta: 0:14:55  lr: 0.003095  min_lr: 0.003095  loss: 3.2465 (3.4704)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6078 (0.5985)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [108]  [1200/2502]  eta: 0:12:55  lr: 0.003093  min_lr: 0.003093  loss: 3.4873 (3.4705)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5645 (0.5962)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [108]  [1400/2502]  eta: 0:10:55  lr: 0.003092  min_lr: 0.003092  loss: 3.5617 (3.4709)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6204 (0.6020)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [108]  [1600/2502]  eta: 0:08:56  lr: 0.003090  min_lr: 0.003090  loss: 3.7156 (3.4738)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6129 (0.6017)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [108]  [1800/2502]  eta: 0:06:57  lr: 0.003089  min_lr: 0.003089  loss: 3.4780 (3.4707)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5681 (0.6017)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [108]  [2000/2502]  eta: 0:04:58  lr: 0.003087  min_lr: 0.003087  loss: 3.6403 (3.4683)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5632 (0.6009)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [108]  [2200/2502]  eta: 0:02:59  lr: 0.003085  min_lr: 0.003085  loss: 3.6571 (3.4718)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5655 (0.6010)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [108]  [2400/2502]  eta: 0:01:00  lr: 0.003084  min_lr: 0.003084  loss: 3.6790 (3.4725)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5633 (0.5975)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [108]  [2501/2502]  eta: 0:00:00  lr: 0.003083  min_lr: 0.003083  loss: 3.3517 (3.4731)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5632 (0.5970)  time: 0.5338  data: 0.0009  max mem: 53905
Epoch: [108] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.003083  min_lr: 0.003083  loss: 3.3517 (3.4591)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5632 (0.5970)
Test:  [ 0/50]  eta: 0:02:58  loss: 0.6503 (0.6503)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 3.5782  data: 3.2623  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.9416 (0.9510)  acc1: 80.8000 (82.9818)  acc5: 96.4000 (96.5455)  time: 0.5845  data: 0.2970  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9896 (0.9829)  acc1: 78.4000 (81.1810)  acc5: 96.4000 (96.5143)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2208 (1.1059)  acc1: 76.4000 (78.7097)  acc5: 93.6000 (94.9032)  time: 0.2855  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3905 (1.1799)  acc1: 72.8000 (77.1317)  acc5: 90.8000 (93.8634)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3748 (1.1967)  acc1: 73.6000 (76.7520)  acc5: 90.8000 (93.6960)  time: 0.2849  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3534 s / it)
* Acc@1 77.072 Acc@5 93.990 loss 1.194
Accuracy of the model on the 50000 test images: 77.1%
Max accuracy: 77.55%
Epoch: [109]  [   0/2502]  eta: 2:00:39  lr: 0.003083  min_lr: 0.003083  loss: 3.9671 (3.9671)  weight_decay: 0.0500 (0.0500)  time: 2.8935  data: 1.7986  max mem: 53905
Epoch: [109]  [ 200/2502]  eta: 0:23:12  lr: 0.003082  min_lr: 0.003082  loss: 3.4937 (3.4915)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6158 (0.6049)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [109]  [ 400/2502]  eta: 0:20:57  lr: 0.003080  min_lr: 0.003080  loss: 3.6257 (3.4886)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6141 (0.6043)  time: 0.5987  data: 0.0005  max mem: 53905
Epoch: [109]  [ 600/2502]  eta: 0:18:54  lr: 0.003079  min_lr: 0.003079  loss: 3.7456 (3.4777)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5983 (0.5992)  time: 0.5935  data: 0.0005  max mem: 53905
Epoch: [109]  [ 800/2502]  eta: 0:16:53  lr: 0.003077  min_lr: 0.003077  loss: 3.5459 (3.4697)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5429 (0.5953)  time: 0.5937  data: 0.0005  max mem: 53905
Epoch: [109]  [1000/2502]  eta: 0:14:54  lr: 0.003076  min_lr: 0.003076  loss: 3.6837 (3.4728)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5565 (0.5966)  time: 0.5935  data: 0.0004  max mem: 53905
Epoch: [109]  [1200/2502]  eta: 0:12:55  lr: 0.003074  min_lr: 0.003074  loss: 3.6454 (3.4766)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5646 (0.5940)  time: 0.5934  data: 0.0004  max mem: 53905
Epoch: [109]  [1400/2502]  eta: 0:10:55  lr: 0.003073  min_lr: 0.003073  loss: 3.7330 (3.4737)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5791 (0.5920)  time: 0.5939  data: 0.0005  max mem: 53905
Epoch: [109]  [1600/2502]  eta: 0:08:56  lr: 0.003071  min_lr: 0.003071  loss: 3.3605 (3.4655)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6060 (0.5940)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [109]  [1800/2502]  eta: 0:06:57  lr: 0.003070  min_lr: 0.003070  loss: 3.3517 (3.4672)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5671 (0.5978)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [109]  [2000/2502]  eta: 0:04:58  lr: 0.003068  min_lr: 0.003068  loss: 3.3517 (3.4646)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5509 (0.5952)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [109]  [2200/2502]  eta: 0:02:59  lr: 0.003067  min_lr: 0.003067  loss: 3.5581 (3.4634)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5669 (0.5949)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [109]  [2400/2502]  eta: 0:01:00  lr: 0.003065  min_lr: 0.003065  loss: 3.2311 (3.4639)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [109]  [2501/2502]  eta: 0:00:00  lr: 0.003064  min_lr: 0.003064  loss: 3.5275 (3.4621)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5808 (nan)  time: 0.5338  data: 0.0007  max mem: 53905
Epoch: [109] Total time: 0:24:45 (0.5938 s / it)
Averaged stats: lr: 0.003064  min_lr: 0.003064  loss: 3.5275 (3.4679)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5808 (nan)
Test:  [ 0/50]  eta: 0:02:59  loss: 0.6969 (0.6969)  acc1: 90.8000 (90.8000)  acc5: 98.4000 (98.4000)  time: 3.5822  data: 3.2762  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.9027 (0.9448)  acc1: 84.0000 (83.6000)  acc5: 96.8000 (96.7273)  time: 0.5848  data: 0.2983  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9554 (0.9655)  acc1: 80.8000 (82.0381)  acc5: 96.8000 (96.7619)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1052 (1.0854)  acc1: 77.6000 (79.6258)  acc5: 94.0000 (95.1226)  time: 0.2855  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3816 (1.1551)  acc1: 71.2000 (77.7268)  acc5: 91.2000 (94.2244)  time: 0.2851  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3816 (1.1691)  acc1: 70.8000 (76.9520)  acc5: 92.4000 (94.1360)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3536 s / it)
* Acc@1 77.168 Acc@5 94.206 loss 1.169
Accuracy of the model on the 50000 test images: 77.2%
Max accuracy: 77.55%
Epoch: [110]  [   0/2502]  eta: 1:46:30  lr: 0.003064  min_lr: 0.003064  loss: 3.6510 (3.6510)  weight_decay: 0.0500 (0.0500)  time: 2.5541  data: 1.9285  max mem: 53905
Epoch: [110]  [ 200/2502]  eta: 0:23:10  lr: 0.003063  min_lr: 0.003063  loss: 3.6023 (3.4374)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6179 (0.6170)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [110]  [ 400/2502]  eta: 0:20:58  lr: 0.003061  min_lr: 0.003061  loss: 3.7482 (3.4578)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5474 (0.6073)  time: 0.5924  data: 0.0006  max mem: 53905
Epoch: [110]  [ 600/2502]  eta: 0:18:54  lr: 0.003060  min_lr: 0.003060  loss: 3.5443 (3.4592)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6247 (0.6082)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [110]  [ 800/2502]  eta: 0:16:54  lr: 0.003058  min_lr: 0.003058  loss: 3.5695 (3.4787)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5725 (0.5995)  time: 0.6039  data: 0.0005  max mem: 53905
Epoch: [110]  [1000/2502]  eta: 0:14:53  lr: 0.003057  min_lr: 0.003057  loss: 3.5750 (3.4717)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5971 (0.6045)  time: 0.5912  data: 0.0007  max mem: 53905
Epoch: [110]  [1200/2502]  eta: 0:12:53  lr: 0.003055  min_lr: 0.003055  loss: 3.6903 (3.4821)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6183 (0.6057)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [110]  [1400/2502]  eta: 0:10:54  lr: 0.003054  min_lr: 0.003054  loss: 3.4335 (3.4862)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5695 (0.6030)  time: 0.5947  data: 0.0008  max mem: 53905
Epoch: [110]  [1600/2502]  eta: 0:08:55  lr: 0.003052  min_lr: 0.003052  loss: 3.2461 (3.4822)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5419 (0.5980)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [110]  [1800/2502]  eta: 0:06:56  lr: 0.003051  min_lr: 0.003051  loss: 3.5190 (3.4751)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5792 (0.5970)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [110]  [2000/2502]  eta: 0:04:57  lr: 0.003049  min_lr: 0.003049  loss: 3.5169 (3.4714)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5653 (0.5962)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [110]  [2200/2502]  eta: 0:02:59  lr: 0.003048  min_lr: 0.003048  loss: 3.5730 (3.4700)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5996 (0.5961)  time: 0.5924  data: 0.0007  max mem: 53905
Epoch: [110]  [2400/2502]  eta: 0:01:00  lr: 0.003046  min_lr: 0.003046  loss: 3.3313 (3.4712)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6108 (0.5969)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [110]  [2501/2502]  eta: 0:00:00  lr: 0.003045  min_lr: 0.003045  loss: 3.4045 (3.4708)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6253 (0.5988)  time: 0.5353  data: 0.0009  max mem: 53905
Epoch: [110] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.003045  min_lr: 0.003045  loss: 3.4045 (3.4528)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6253 (0.5988)
Test:  [ 0/50]  eta: 0:03:33  loss: 0.5750 (0.5750)  acc1: 90.0000 (90.0000)  acc5: 98.4000 (98.4000)  time: 4.2704  data: 3.9595  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 0.8123 (0.8920)  acc1: 84.0000 (82.3636)  acc5: 97.6000 (96.9818)  time: 0.6468  data: 0.3604  max mem: 53905
Test:  [20/50]  eta: 0:00:14  loss: 0.9079 (0.9113)  acc1: 81.2000 (81.7524)  acc5: 97.2000 (97.0286)  time: 0.2847  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1258 (1.0307)  acc1: 77.2000 (79.4065)  acc5: 94.4000 (95.3548)  time: 0.2848  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3218 (1.1078)  acc1: 72.8000 (77.6878)  acc5: 90.8000 (94.3122)  time: 0.2844  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3090 (1.1214)  acc1: 73.2000 (77.1600)  acc5: 91.2000 (94.2240)  time: 0.2841  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3672 s / it)
* Acc@1 77.276 Acc@5 94.214 loss 1.125
Accuracy of the model on the 50000 test images: 77.3%
Max accuracy: 77.55%
Epoch: [111]  [   0/2502]  eta: 1:55:24  lr: 0.003045  min_lr: 0.003045  loss: 3.7719 (3.7719)  weight_decay: 0.0500 (0.0500)  time: 2.7677  data: 1.9930  max mem: 53905
Epoch: [111]  [ 200/2502]  eta: 0:23:07  lr: 0.003044  min_lr: 0.003044  loss: 3.7483 (3.4178)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5998 (0.5935)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [111]  [ 400/2502]  eta: 0:20:56  lr: 0.003042  min_lr: 0.003042  loss: 3.3406 (3.4346)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6106 (0.5936)  time: 0.5977  data: 0.0005  max mem: 53905
Epoch: [111]  [ 600/2502]  eta: 0:18:54  lr: 0.003041  min_lr: 0.003041  loss: 3.4110 (3.4460)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5870 (0.6114)  time: 0.5923  data: 0.0006  max mem: 53905
Epoch: [111]  [ 800/2502]  eta: 0:16:53  lr: 0.003039  min_lr: 0.003039  loss: 3.6691 (3.4490)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5496 (0.6026)  time: 0.5905  data: 0.0008  max mem: 53905
Epoch: [111]  [1000/2502]  eta: 0:14:52  lr: 0.003038  min_lr: 0.003038  loss: 3.4780 (3.4349)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5759 (0.6010)  time: 0.5920  data: 0.0007  max mem: 53905
Epoch: [111]  [1200/2502]  eta: 0:12:53  lr: 0.003036  min_lr: 0.003036  loss: 3.7045 (3.4438)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5611 (0.5987)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [111]  [1400/2502]  eta: 0:10:54  lr: 0.003035  min_lr: 0.003035  loss: 3.4774 (3.4532)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6085 (0.6017)  time: 0.5937  data: 0.0005  max mem: 53905
Epoch: [111]  [1600/2502]  eta: 0:08:55  lr: 0.003033  min_lr: 0.003033  loss: 3.3484 (3.4531)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6423 (0.6008)  time: 0.5946  data: 0.0007  max mem: 53905
Epoch: [111]  [1800/2502]  eta: 0:06:57  lr: 0.003031  min_lr: 0.003031  loss: 3.5869 (3.4572)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5446 (0.6008)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [111]  [2000/2502]  eta: 0:04:58  lr: 0.003030  min_lr: 0.003030  loss: 3.5998 (3.4584)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5901 (0.5965)  time: 0.5934  data: 0.0005  max mem: 53905
Epoch: [111]  [2200/2502]  eta: 0:02:59  lr: 0.003028  min_lr: 0.003028  loss: 3.5908 (3.4600)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5629 (0.5991)  time: 0.5959  data: 0.0007  max mem: 53905
Epoch: [111]  [2400/2502]  eta: 0:01:00  lr: 0.003027  min_lr: 0.003027  loss: 3.2095 (3.4590)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6150 (0.5994)  time: 0.5924  data: 0.0006  max mem: 53905
Epoch: [111]  [2501/2502]  eta: 0:00:00  lr: 0.003026  min_lr: 0.003026  loss: 3.5810 (3.4613)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5598 (0.5999)  time: 0.5322  data: 0.0008  max mem: 53905
Epoch: [111] Total time: 0:24:45 (0.5939 s / it)
Averaged stats: lr: 0.003026  min_lr: 0.003026  loss: 3.5810 (3.4510)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5598 (0.5999)
Test:  [ 0/50]  eta: 0:02:46  loss: 0.5955 (0.5955)  acc1: 91.6000 (91.6000)  acc5: 99.2000 (99.2000)  time: 3.3211  data: 2.9981  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8883 (0.9371)  acc1: 81.2000 (82.8727)  acc5: 96.8000 (96.6546)  time: 0.5581  data: 0.2730  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9244 (0.9379)  acc1: 80.4000 (81.9429)  acc5: 96.8000 (96.9714)  time: 0.2819  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0529 (1.0570)  acc1: 78.0000 (79.6129)  acc5: 94.8000 (95.2516)  time: 0.2821  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3007 (1.1150)  acc1: 72.8000 (77.9805)  acc5: 92.4000 (94.5073)  time: 0.2819  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2798 (1.1332)  acc1: 72.8000 (77.4080)  acc5: 92.4000 (94.3680)  time: 0.2817  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3447 s / it)
* Acc@1 77.604 Acc@5 94.406 loss 1.130
Accuracy of the model on the 50000 test images: 77.6%
Max accuracy: 77.60%
Epoch: [112]  [   0/2502]  eta: 1:59:09  lr: 0.003026  min_lr: 0.003026  loss: 3.3756 (3.3756)  weight_decay: 0.0500 (0.0500)  time: 2.8575  data: 2.2593  max mem: 53905
Epoch: [112]  [ 200/2502]  eta: 0:23:13  lr: 0.003025  min_lr: 0.003025  loss: 3.4332 (3.4679)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5992 (0.6228)  time: 0.5909  data: 0.0007  max mem: 53905
Epoch: [112]  [ 400/2502]  eta: 0:20:58  lr: 0.003023  min_lr: 0.003023  loss: 3.6249 (3.4689)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5865 (0.6260)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [112]  [ 600/2502]  eta: 0:18:55  lr: 0.003021  min_lr: 0.003021  loss: 3.5564 (3.4643)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6294 (0.6127)  time: 0.5919  data: 0.0007  max mem: 53905
Epoch: [112]  [ 800/2502]  eta: 0:16:54  lr: 0.003020  min_lr: 0.003020  loss: 3.5502 (3.4471)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5858 (0.6087)  time: 0.5958  data: 0.0008  max mem: 53905
Epoch: [112]  [1000/2502]  eta: 0:14:54  lr: 0.003018  min_lr: 0.003018  loss: 3.6999 (3.4569)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6229 (0.6108)  time: 0.5933  data: 0.0008  max mem: 53905
Epoch: [112]  [1200/2502]  eta: 0:12:55  lr: 0.003017  min_lr: 0.003017  loss: 3.6013 (3.4518)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5504 (0.6059)  time: 0.6108  data: 0.0005  max mem: 53905
Epoch: [112]  [1400/2502]  eta: 0:10:56  lr: 0.003015  min_lr: 0.003015  loss: 3.5983 (3.4516)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5959 (0.6040)  time: 0.5929  data: 0.0006  max mem: 53905
Epoch: [112]  [1600/2502]  eta: 0:08:56  lr: 0.003014  min_lr: 0.003014  loss: 3.5344 (3.4481)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5946 (0.6075)  time: 0.5925  data: 0.0007  max mem: 53905
Epoch: [112]  [1800/2502]  eta: 0:06:57  lr: 0.003012  min_lr: 0.003012  loss: 3.4518 (3.4460)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5890 (0.6067)  time: 0.5933  data: 0.0008  max mem: 53905
Epoch: [112]  [2000/2502]  eta: 0:04:58  lr: 0.003011  min_lr: 0.003011  loss: 3.7001 (3.4466)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5976 (0.6050)  time: 0.5935  data: 0.0006  max mem: 53905
Epoch: [112]  [2200/2502]  eta: 0:02:59  lr: 0.003009  min_lr: 0.003009  loss: 3.4362 (3.4457)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5792 (0.6049)  time: 0.5929  data: 0.0008  max mem: 53905
Epoch: [112]  [2400/2502]  eta: 0:01:00  lr: 0.003007  min_lr: 0.003007  loss: 3.4843 (3.4452)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6016 (0.6047)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [112]  [2501/2502]  eta: 0:00:00  lr: 0.003007  min_lr: 0.003007  loss: 3.8398 (3.4453)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5567 (0.6035)  time: 0.5331  data: 0.0008  max mem: 53905
Epoch: [112] Total time: 0:24:46 (0.5942 s / it)
Averaged stats: lr: 0.003007  min_lr: 0.003007  loss: 3.8398 (3.4445)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5567 (0.6035)
Test:  [ 0/50]  eta: 0:02:26  loss: 0.6712 (0.6712)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 2.9268  data: 2.6033  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.9434 (0.9706)  acc1: 81.2000 (83.7818)  acc5: 97.6000 (96.8000)  time: 0.6158  data: 0.3282  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9922 (1.0002)  acc1: 79.6000 (82.4381)  acc5: 97.2000 (97.0667)  time: 0.3349  data: 0.0506  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1099 (1.1230)  acc1: 78.0000 (79.7936)  acc5: 94.8000 (95.5226)  time: 0.2851  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4379 (1.2013)  acc1: 72.0000 (77.8146)  acc5: 91.6000 (94.5463)  time: 0.2902  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4375 (1.2185)  acc1: 72.8000 (77.3200)  acc5: 91.6000 (94.3200)  time: 0.2900  data: 0.0001  max mem: 53905
Test: Total time: 0:00:18 (0.3622 s / it)
* Acc@1 77.540 Acc@5 94.376 loss 1.218
Accuracy of the model on the 50000 test images: 77.5%
Max accuracy: 77.60%
Epoch: [113]  [   0/2502]  eta: 1:52:11  lr: 0.003007  min_lr: 0.003007  loss: 3.9142 (3.9142)  weight_decay: 0.0500 (0.0500)  time: 2.6903  data: 2.0803  max mem: 53905
Epoch: [113]  [ 200/2502]  eta: 0:23:08  lr: 0.003005  min_lr: 0.003005  loss: 3.5146 (3.4214)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6004 (0.5923)  time: 0.5926  data: 0.0006  max mem: 53905
Epoch: [113]  [ 400/2502]  eta: 0:20:58  lr: 0.003004  min_lr: 0.003004  loss: 3.5195 (3.4711)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6185 (0.6003)  time: 0.5930  data: 0.0007  max mem: 53905
Epoch: [113]  [ 600/2502]  eta: 0:18:55  lr: 0.003002  min_lr: 0.003002  loss: 3.6943 (3.4656)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5620 (0.5930)  time: 0.5924  data: 0.0007  max mem: 53905
Epoch: [113]  [ 800/2502]  eta: 0:16:54  lr: 0.003001  min_lr: 0.003001  loss: 3.2240 (3.4386)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.6026  data: 0.0007  max mem: 53905
Epoch: [113]  [1000/2502]  eta: 0:14:54  lr: 0.002999  min_lr: 0.002999  loss: 3.6026 (3.4458)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5645 (nan)  time: 0.5928  data: 0.0006  max mem: 53905
Epoch: [113]  [1200/2502]  eta: 0:12:54  lr: 0.002997  min_lr: 0.002997  loss: 3.6958 (3.4490)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6006 (nan)  time: 0.5928  data: 0.0006  max mem: 53905
Epoch: [113]  [1400/2502]  eta: 0:10:55  lr: 0.002996  min_lr: 0.002996  loss: 3.6617 (3.4573)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5866 (nan)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [113]  [1600/2502]  eta: 0:08:56  lr: 0.002994  min_lr: 0.002994  loss: 3.3957 (3.4586)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6018 (nan)  time: 0.5920  data: 0.0007  max mem: 53905
Epoch: [113]  [1800/2502]  eta: 0:06:57  lr: 0.002993  min_lr: 0.002993  loss: 3.3827 (3.4538)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6402 (nan)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [113]  [2000/2502]  eta: 0:04:58  lr: 0.002991  min_lr: 0.002991  loss: 3.5782 (3.4520)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5832 (nan)  time: 0.5990  data: 0.0007  max mem: 53905
Epoch: [113]  [2200/2502]  eta: 0:02:59  lr: 0.002990  min_lr: 0.002990  loss: 3.4562 (3.4476)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5319 (nan)  time: 0.5936  data: 0.0007  max mem: 53905
Epoch: [113]  [2400/2502]  eta: 0:01:00  lr: 0.002988  min_lr: 0.002988  loss: 3.5030 (3.4487)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6024 (nan)  time: 0.5918  data: 0.0007  max mem: 53905
Epoch: [113]  [2501/2502]  eta: 0:00:00  lr: 0.002987  min_lr: 0.002987  loss: 3.5536 (3.4472)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5903 (nan)  time: 0.5401  data: 0.0008  max mem: 53905
Epoch: [113] Total time: 0:24:45 (0.5939 s / it)
Averaged stats: lr: 0.002987  min_lr: 0.002987  loss: 3.5536 (3.4420)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5903 (nan)
Test:  [ 0/50]  eta: 0:03:31  loss: 0.5648 (0.5648)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 4.2283  data: 3.8961  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 0.9361 (0.9414)  acc1: 85.2000 (83.5636)  acc5: 97.6000 (97.0909)  time: 0.6432  data: 0.3546  max mem: 53905
Test:  [20/50]  eta: 0:00:14  loss: 0.9361 (0.9477)  acc1: 80.0000 (81.7905)  acc5: 97.6000 (97.0095)  time: 0.2849  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1304 (1.0692)  acc1: 77.6000 (79.5871)  acc5: 95.2000 (95.5355)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3224 (1.1395)  acc1: 72.8000 (78.0488)  acc5: 92.4000 (94.8098)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2943 (1.1531)  acc1: 72.8000 (77.7520)  acc5: 92.8000 (94.6560)  time: 0.2844  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3662 s / it)
* Acc@1 77.762 Acc@5 94.480 loss 1.153
Accuracy of the model on the 50000 test images: 77.8%
Max accuracy: 77.76%
Epoch: [114]  [   0/2502]  eta: 1:35:14  lr: 0.002987  min_lr: 0.002987  loss: 2.9736 (2.9736)  weight_decay: 0.0500 (0.0500)  time: 2.2839  data: 1.6815  max mem: 53905
Epoch: [114]  [ 200/2502]  eta: 0:23:03  lr: 0.002986  min_lr: 0.002986  loss: 3.2934 (3.3550)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5876 (0.6392)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [114]  [ 400/2502]  eta: 0:20:55  lr: 0.002984  min_lr: 0.002984  loss: 3.5975 (3.4013)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6498 (0.6200)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [114]  [ 600/2502]  eta: 0:18:53  lr: 0.002983  min_lr: 0.002983  loss: 3.5642 (3.4090)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6249 (nan)  time: 0.6040  data: 0.0005  max mem: 53905
Epoch: [114]  [ 800/2502]  eta: 0:16:53  lr: 0.002981  min_lr: 0.002981  loss: 3.7709 (3.4344)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5603 (nan)  time: 0.5941  data: 0.0004  max mem: 53905
Epoch: [114]  [1000/2502]  eta: 0:14:53  lr: 0.002979  min_lr: 0.002979  loss: 3.6755 (3.4458)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5990 (nan)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [114]  [1200/2502]  eta: 0:12:54  lr: 0.002978  min_lr: 0.002978  loss: 3.4535 (3.4497)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6347 (nan)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [114]  [1400/2502]  eta: 0:10:55  lr: 0.002976  min_lr: 0.002976  loss: 3.4714 (3.4510)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5764 (nan)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [114]  [1600/2502]  eta: 0:08:56  lr: 0.002975  min_lr: 0.002975  loss: 3.4323 (3.4543)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6457 (nan)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [114]  [1800/2502]  eta: 0:06:57  lr: 0.002973  min_lr: 0.002973  loss: 3.6054 (3.4408)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5521 (nan)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [114]  [2000/2502]  eta: 0:04:58  lr: 0.002972  min_lr: 0.002972  loss: 3.4420 (3.4470)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5906 (nan)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [114]  [2200/2502]  eta: 0:02:59  lr: 0.002970  min_lr: 0.002970  loss: 3.5492 (3.4449)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5948 (nan)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [114]  [2400/2502]  eta: 0:01:00  lr: 0.002968  min_lr: 0.002968  loss: 3.7914 (3.4452)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6400 (nan)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [114]  [2501/2502]  eta: 0:00:00  lr: 0.002968  min_lr: 0.002968  loss: 3.5223 (3.4452)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6302 (nan)  time: 0.5324  data: 0.0007  max mem: 53905
Epoch: [114] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.002968  min_lr: 0.002968  loss: 3.5223 (3.4384)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6302 (nan)
Test:  [ 0/50]  eta: 0:03:36  loss: 0.5797 (0.5797)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 4.3264  data: 4.0242  max mem: 53905
Test:  [10/50]  eta: 0:00:26  loss: 0.9177 (0.9155)  acc1: 83.6000 (84.0000)  acc5: 97.2000 (96.6182)  time: 0.6507  data: 0.3662  max mem: 53905
Test:  [20/50]  eta: 0:00:14  loss: 0.9347 (0.9395)  acc1: 81.6000 (82.5714)  acc5: 97.2000 (96.7048)  time: 0.2834  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.0712 (1.0442)  acc1: 77.6000 (80.3226)  acc5: 94.4000 (95.2903)  time: 0.2835  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2968 (1.1140)  acc1: 74.8000 (78.4976)  acc5: 92.0000 (94.4390)  time: 0.2832  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2686 (1.1354)  acc1: 74.0000 (77.8880)  acc5: 92.4000 (94.3120)  time: 0.2830  data: 0.0001  max mem: 53905
Test: Total time: 0:00:18 (0.3675 s / it)
* Acc@1 77.626 Acc@5 94.334 loss 1.141
Accuracy of the model on the 50000 test images: 77.6%
Max accuracy: 77.76%
Epoch: [115]  [   0/2502]  eta: 1:49:44  lr: 0.002968  min_lr: 0.002968  loss: 3.3232 (3.3232)  weight_decay: 0.0500 (0.0500)  time: 2.6316  data: 2.0176  max mem: 53905
Epoch: [115]  [ 200/2502]  eta: 0:23:02  lr: 0.002966  min_lr: 0.002966  loss: 3.5519 (3.3953)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5878 (0.5757)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [115]  [ 400/2502]  eta: 0:20:53  lr: 0.002965  min_lr: 0.002965  loss: 3.3783 (3.4317)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6028 (0.5864)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [115]  [ 600/2502]  eta: 0:18:52  lr: 0.002963  min_lr: 0.002963  loss: 3.4400 (3.4144)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5949 (0.5986)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [115]  [ 800/2502]  eta: 0:16:51  lr: 0.002961  min_lr: 0.002961  loss: 3.4600 (3.4230)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6467 (0.6130)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [115]  [1000/2502]  eta: 0:14:51  lr: 0.002960  min_lr: 0.002960  loss: 3.5558 (3.4188)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5459 (0.6068)  time: 0.5903  data: 0.0005  max mem: 53905
Epoch: [115]  [1200/2502]  eta: 0:12:52  lr: 0.002958  min_lr: 0.002958  loss: 3.2310 (3.4170)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6319 (0.6062)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [115]  [1400/2502]  eta: 0:10:53  lr: 0.002957  min_lr: 0.002957  loss: 3.4976 (3.4155)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5894 (0.6067)  time: 0.5950  data: 0.0005  max mem: 53905
Epoch: [115]  [1600/2502]  eta: 0:08:55  lr: 0.002955  min_lr: 0.002955  loss: 3.5397 (3.4324)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5619 (0.6025)  time: 0.5985  data: 0.0004  max mem: 53905
Epoch: [115]  [1800/2502]  eta: 0:06:56  lr: 0.002954  min_lr: 0.002954  loss: 3.6586 (3.4310)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5923 (0.6025)  time: 0.5904  data: 0.0005  max mem: 53905
Epoch: [115]  [2000/2502]  eta: 0:04:57  lr: 0.002952  min_lr: 0.002952  loss: 3.1377 (3.4307)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6225 (0.6037)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [115]  [2200/2502]  eta: 0:02:59  lr: 0.002950  min_lr: 0.002950  loss: 3.7045 (3.4336)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6672 (0.6061)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [115]  [2400/2502]  eta: 0:01:00  lr: 0.002949  min_lr: 0.002949  loss: 3.4559 (3.4362)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6133 (0.6048)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [115]  [2501/2502]  eta: 0:00:00  lr: 0.002948  min_lr: 0.002948  loss: 3.3674 (3.4340)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6304 (0.6050)  time: 0.5335  data: 0.0007  max mem: 53905
Epoch: [115] Total time: 0:24:42 (0.5925 s / it)
Averaged stats: lr: 0.002948  min_lr: 0.002948  loss: 3.3674 (3.4409)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6304 (0.6050)
Test:  [ 0/50]  eta: 0:02:28  loss: 0.6739 (0.6739)  acc1: 90.4000 (90.4000)  acc5: 98.4000 (98.4000)  time: 2.9698  data: 2.6587  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8461 (0.9188)  acc1: 80.8000 (82.9455)  acc5: 97.2000 (97.0182)  time: 0.6043  data: 0.3186  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9388 (0.9390)  acc1: 80.8000 (82.2286)  acc5: 97.2000 (96.8762)  time: 0.3259  data: 0.0426  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0802 (1.0439)  acc1: 78.0000 (79.9742)  acc5: 94.8000 (95.3548)  time: 0.2841  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3000 (1.1121)  acc1: 73.6000 (78.2927)  acc5: 92.0000 (94.5463)  time: 0.2838  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3000 (1.1296)  acc1: 72.4000 (77.7760)  acc5: 92.8000 (94.3680)  time: 0.2835  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3562 s / it)
* Acc@1 77.862 Acc@5 94.484 loss 1.131
Accuracy of the model on the 50000 test images: 77.9%
Max accuracy: 77.86%
Epoch: [116]  [   0/2502]  eta: 1:31:22  lr: 0.002948  min_lr: 0.002948  loss: 3.9250 (3.9250)  weight_decay: 0.0500 (0.0500)  time: 2.1913  data: 1.5922  max mem: 53905
Epoch: [116]  [ 200/2502]  eta: 0:23:05  lr: 0.002946  min_lr: 0.002946  loss: 3.7527 (3.3782)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6476 (0.6346)  time: 0.5997  data: 0.0004  max mem: 53905
Epoch: [116]  [ 400/2502]  eta: 0:20:54  lr: 0.002945  min_lr: 0.002945  loss: 3.0603 (3.3925)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5587 (0.6239)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [116]  [ 600/2502]  eta: 0:18:53  lr: 0.002943  min_lr: 0.002943  loss: 3.5423 (3.4218)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6073 (0.6212)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [116]  [ 800/2502]  eta: 0:16:53  lr: 0.002942  min_lr: 0.002942  loss: 3.3583 (3.4112)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6681 (0.6253)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [116]  [1000/2502]  eta: 0:14:53  lr: 0.002940  min_lr: 0.002940  loss: 3.3294 (3.4058)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6025 (0.6254)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [116]  [1200/2502]  eta: 0:12:54  lr: 0.002939  min_lr: 0.002939  loss: 3.7198 (3.4127)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6040 (0.6241)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [116]  [1400/2502]  eta: 0:10:55  lr: 0.002937  min_lr: 0.002937  loss: 3.2762 (3.4093)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5820 (0.6209)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [116]  [1600/2502]  eta: 0:08:56  lr: 0.002935  min_lr: 0.002935  loss: 3.5486 (3.4079)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5652 (0.6177)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [116]  [1800/2502]  eta: 0:06:57  lr: 0.002934  min_lr: 0.002934  loss: 3.5356 (3.4124)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6164 (0.6163)  time: 0.5984  data: 0.0004  max mem: 53905
Epoch: [116]  [2000/2502]  eta: 0:04:58  lr: 0.002932  min_lr: 0.002932  loss: 3.4022 (3.4163)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5595 (0.6162)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [116]  [2200/2502]  eta: 0:02:59  lr: 0.002931  min_lr: 0.002931  loss: 3.7428 (3.4173)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6216 (0.6178)  time: 0.5965  data: 0.0004  max mem: 53905
Epoch: [116]  [2400/2502]  eta: 0:01:00  lr: 0.002929  min_lr: 0.002929  loss: 3.4106 (3.4136)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6439 (0.6180)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [116]  [2501/2502]  eta: 0:00:00  lr: 0.002928  min_lr: 0.002928  loss: 3.6492 (3.4179)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5990 (0.6177)  time: 0.5323  data: 0.0006  max mem: 53905
Epoch: [116] Total time: 0:24:45 (0.5936 s / it)
Averaged stats: lr: 0.002928  min_lr: 0.002928  loss: 3.6492 (3.4185)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5990 (0.6177)
Test:  [ 0/50]  eta: 0:02:29  loss: 0.6830 (0.6830)  acc1: 91.2000 (91.2000)  acc5: 98.8000 (98.8000)  time: 2.9902  data: 2.6620  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.8965 (0.9795)  acc1: 83.2000 (83.2727)  acc5: 97.6000 (97.0545)  time: 0.5428  data: 0.2557  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.0299 (1.0238)  acc1: 81.2000 (81.6571)  acc5: 96.8000 (96.7238)  time: 0.2910  data: 0.0078  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2291 (1.1311)  acc1: 76.4000 (79.6000)  acc5: 94.0000 (95.2516)  time: 0.2838  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3624 (1.1940)  acc1: 74.8000 (78.1561)  acc5: 91.6000 (94.4293)  time: 0.2835  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3597 (1.2091)  acc1: 74.0000 (77.6960)  acc5: 92.4000 (94.2160)  time: 0.2851  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3439 s / it)
* Acc@1 77.872 Acc@5 94.430 loss 1.204
Accuracy of the model on the 50000 test images: 77.9%
Max accuracy: 77.87%
Epoch: [117]  [   0/2502]  eta: 1:34:48  lr: 0.002928  min_lr: 0.002928  loss: 3.7567 (3.7567)  weight_decay: 0.0500 (0.0500)  time: 2.2734  data: 1.6662  max mem: 53905
Epoch: [117]  [ 200/2502]  eta: 0:22:59  lr: 0.002927  min_lr: 0.002927  loss: 3.4267 (3.3834)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5764 (0.5981)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [117]  [ 400/2502]  eta: 0:20:54  lr: 0.002925  min_lr: 0.002925  loss: 3.5813 (3.3957)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5503 (0.6021)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [117]  [ 600/2502]  eta: 0:18:52  lr: 0.002923  min_lr: 0.002923  loss: 3.4637 (3.3971)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5737 (0.6026)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [117]  [ 800/2502]  eta: 0:16:52  lr: 0.002922  min_lr: 0.002922  loss: 3.5198 (3.4040)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5992 (0.5985)  time: 0.6153  data: 0.0004  max mem: 53905
Epoch: [117]  [1000/2502]  eta: 0:14:52  lr: 0.002920  min_lr: 0.002920  loss: 3.6781 (3.4062)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5903 (0.5989)  time: 0.5958  data: 0.0004  max mem: 53905
Epoch: [117]  [1200/2502]  eta: 0:12:53  lr: 0.002919  min_lr: 0.002919  loss: 3.3663 (3.4145)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6097 (0.5999)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [117]  [1400/2502]  eta: 0:10:54  lr: 0.002917  min_lr: 0.002917  loss: 3.5987 (3.4246)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5980 (0.6027)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [117]  [1600/2502]  eta: 0:08:55  lr: 0.002915  min_lr: 0.002915  loss: 3.4666 (3.4234)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5703 (0.6028)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [117]  [1800/2502]  eta: 0:06:56  lr: 0.002914  min_lr: 0.002914  loss: 3.5620 (3.4150)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5975 (0.6058)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [117]  [2000/2502]  eta: 0:04:58  lr: 0.002912  min_lr: 0.002912  loss: 3.2276 (3.4111)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5829 (0.6056)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [117]  [2200/2502]  eta: 0:02:59  lr: 0.002911  min_lr: 0.002911  loss: 3.3599 (3.4104)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5798 (0.6046)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [117]  [2400/2502]  eta: 0:01:00  lr: 0.002909  min_lr: 0.002909  loss: 3.6406 (3.4146)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5948 (0.6055)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [117]  [2501/2502]  eta: 0:00:00  lr: 0.002908  min_lr: 0.002908  loss: 3.5339 (3.4120)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6163 (0.6062)  time: 0.5328  data: 0.0009  max mem: 53905
Epoch: [117] Total time: 0:24:44 (0.5935 s / it)
Averaged stats: lr: 0.002908  min_lr: 0.002908  loss: 3.5339 (3.4244)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6163 (0.6062)
Test:  [ 0/50]  eta: 0:03:02  loss: 0.5750 (0.5750)  acc1: 90.8000 (90.8000)  acc5: 98.4000 (98.4000)  time: 3.6436  data: 3.3182  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8224 (0.9190)  acc1: 84.4000 (82.8727)  acc5: 97.6000 (96.7636)  time: 0.5896  data: 0.3021  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9433 (0.9446)  acc1: 80.8000 (82.0571)  acc5: 97.2000 (96.7810)  time: 0.2845  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1046 (1.0558)  acc1: 76.4000 (79.6129)  acc5: 94.4000 (95.4839)  time: 0.2848  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3110 (1.1189)  acc1: 74.8000 (78.0488)  acc5: 92.0000 (94.6146)  time: 0.2845  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3013 (1.1368)  acc1: 73.6000 (77.3520)  acc5: 92.4000 (94.4720)  time: 0.2842  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3544 s / it)
* Acc@1 77.426 Acc@5 94.416 loss 1.137
Accuracy of the model on the 50000 test images: 77.4%
Max accuracy: 77.87%
Epoch: [118]  [   0/2502]  eta: 1:54:36  lr: 0.002908  min_lr: 0.002908  loss: 4.1300 (4.1300)  weight_decay: 0.0500 (0.0500)  time: 2.7483  data: 2.1362  max mem: 53905
Epoch: [118]  [ 200/2502]  eta: 0:23:08  lr: 0.002907  min_lr: 0.002907  loss: 3.4921 (3.4388)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5851 (0.6149)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [118]  [ 400/2502]  eta: 0:20:55  lr: 0.002905  min_lr: 0.002905  loss: 3.6258 (3.4443)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6131 (0.6106)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [118]  [ 600/2502]  eta: 0:18:54  lr: 0.002903  min_lr: 0.002903  loss: 3.3108 (3.4473)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5722 (0.6167)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [118]  [ 800/2502]  eta: 0:16:53  lr: 0.002902  min_lr: 0.002902  loss: 3.6937 (3.4493)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6382 (0.6178)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [118]  [1000/2502]  eta: 0:14:53  lr: 0.002900  min_lr: 0.002900  loss: 3.3784 (3.4388)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5744 (0.6171)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [118]  [1200/2502]  eta: 0:12:54  lr: 0.002899  min_lr: 0.002899  loss: 3.3980 (3.4281)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5942 (0.6162)  time: 0.6011  data: 0.0005  max mem: 53905
Epoch: [118]  [1400/2502]  eta: 0:10:55  lr: 0.002897  min_lr: 0.002897  loss: 3.3666 (3.4329)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6132 (0.6161)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [118]  [1600/2502]  eta: 0:08:56  lr: 0.002895  min_lr: 0.002895  loss: 3.4394 (3.4269)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6152 (0.6139)  time: 0.5951  data: 0.0004  max mem: 53905
Epoch: [118]  [1800/2502]  eta: 0:06:57  lr: 0.002894  min_lr: 0.002894  loss: 3.5937 (3.4324)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5980 (0.6145)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [118]  [2000/2502]  eta: 0:04:58  lr: 0.002892  min_lr: 0.002892  loss: 3.3724 (3.4318)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5949 (0.6134)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [118]  [2200/2502]  eta: 0:02:59  lr: 0.002891  min_lr: 0.002891  loss: 3.2018 (3.4300)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6023 (0.6129)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [118]  [2400/2502]  eta: 0:01:00  lr: 0.002889  min_lr: 0.002889  loss: 3.4360 (3.4332)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5898 (0.6120)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [118]  [2501/2502]  eta: 0:00:00  lr: 0.002888  min_lr: 0.002888  loss: 3.3918 (3.4328)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5952 (0.6120)  time: 0.5338  data: 0.0009  max mem: 53905
Epoch: [118] Total time: 0:24:44 (0.5935 s / it)
Averaged stats: lr: 0.002888  min_lr: 0.002888  loss: 3.3918 (3.4243)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5952 (0.6120)
Test:  [ 0/50]  eta: 0:02:36  loss: 0.6833 (0.6833)  acc1: 91.6000 (91.6000)  acc5: 98.4000 (98.4000)  time: 3.1232  data: 2.8074  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.9403 (0.9645)  acc1: 82.8000 (83.5636)  acc5: 97.6000 (96.9818)  time: 0.5834  data: 0.2977  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0089 (0.9990)  acc1: 82.0000 (82.5905)  acc5: 96.8000 (96.8381)  time: 0.3065  data: 0.0236  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1255 (1.0937)  acc1: 78.8000 (80.4645)  acc5: 94.0000 (95.4710)  time: 0.2840  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3671 (1.1685)  acc1: 75.2000 (78.7610)  acc5: 91.6000 (94.6342)  time: 0.2900  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3712 (1.1902)  acc1: 72.0000 (78.2080)  acc5: 92.4000 (94.4320)  time: 0.2908  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3548 s / it)
* Acc@1 77.826 Acc@5 94.458 loss 1.197
Accuracy of the model on the 50000 test images: 77.8%
Max accuracy: 77.87%
Epoch: [119]  [   0/2502]  eta: 1:45:35  lr: 0.002888  min_lr: 0.002888  loss: 3.6643 (3.6643)  weight_decay: 0.0500 (0.0500)  time: 2.5323  data: 1.7256  max mem: 53905
Epoch: [119]  [ 200/2502]  eta: 0:23:08  lr: 0.002887  min_lr: 0.002887  loss: 3.4987 (3.3919)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6364 (0.6138)  time: 0.5988  data: 0.0004  max mem: 53905
Epoch: [119]  [ 400/2502]  eta: 0:20:57  lr: 0.002885  min_lr: 0.002885  loss: 3.4919 (3.4256)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6532 (0.6280)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [119]  [ 600/2502]  eta: 0:18:53  lr: 0.002883  min_lr: 0.002883  loss: 3.5189 (3.4213)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6736 (0.6355)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [119]  [ 800/2502]  eta: 0:16:54  lr: 0.002882  min_lr: 0.002882  loss: 3.2621 (3.4165)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6398 (0.6363)  time: 0.5944  data: 0.0005  max mem: 53905
Epoch: [119]  [1000/2502]  eta: 0:14:54  lr: 0.002880  min_lr: 0.002880  loss: 3.6284 (3.4199)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5854 (0.6267)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [119]  [1200/2502]  eta: 0:12:54  lr: 0.002879  min_lr: 0.002879  loss: 3.2175 (3.4211)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5506 (0.6213)  time: 0.5954  data: 0.0004  max mem: 53905
Epoch: [119]  [1400/2502]  eta: 0:10:55  lr: 0.002877  min_lr: 0.002877  loss: 3.5523 (3.4126)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6368 (0.6225)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [119]  [1600/2502]  eta: 0:08:55  lr: 0.002875  min_lr: 0.002875  loss: 3.3268 (3.4127)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5952 (0.6208)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [119]  [1800/2502]  eta: 0:06:56  lr: 0.002874  min_lr: 0.002874  loss: 3.6187 (3.4159)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6211 (0.6219)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [119]  [2000/2502]  eta: 0:04:58  lr: 0.002872  min_lr: 0.002872  loss: 3.5782 (3.4157)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5750 (0.6195)  time: 0.5973  data: 0.0005  max mem: 53905
Epoch: [119]  [2200/2502]  eta: 0:02:59  lr: 0.002870  min_lr: 0.002870  loss: 3.5244 (3.4144)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5743 (0.6177)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [119]  [2400/2502]  eta: 0:01:00  lr: 0.002869  min_lr: 0.002869  loss: 3.6457 (3.4114)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6347 (0.6178)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [119]  [2501/2502]  eta: 0:00:00  lr: 0.002868  min_lr: 0.002868  loss: 3.4734 (3.4147)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5920 (0.6176)  time: 0.5328  data: 0.0007  max mem: 53905
Epoch: [119] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.002868  min_lr: 0.002868  loss: 3.4734 (3.4158)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5920 (0.6176)
Test:  [ 0/50]  eta: 0:03:24  loss: 0.6093 (0.6093)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 4.0984  data: 3.7746  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 0.8906 (0.9255)  acc1: 84.0000 (83.0909)  acc5: 96.8000 (96.8364)  time: 0.6304  data: 0.3435  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9266 (0.9526)  acc1: 82.4000 (81.6191)  acc5: 96.8000 (96.8191)  time: 0.2840  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.0438 (1.0595)  acc1: 77.2000 (79.4968)  acc5: 94.4000 (95.3548)  time: 0.2842  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3387 (1.1270)  acc1: 73.6000 (77.8341)  acc5: 92.0000 (94.5171)  time: 0.2838  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2576 (1.1365)  acc1: 73.6000 (77.4640)  acc5: 92.4000 (94.3920)  time: 0.2835  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3633 s / it)
* Acc@1 77.632 Acc@5 94.484 loss 1.136
Accuracy of the model on the 50000 test images: 77.6%
Max accuracy: 77.87%
Epoch: [120]  [   0/2502]  eta: 1:54:51  lr: 0.002868  min_lr: 0.002868  loss: 3.8072 (3.8072)  weight_decay: 0.0500 (0.0500)  time: 2.7543  data: 1.9124  max mem: 53905
Epoch: [120]  [ 200/2502]  eta: 0:23:08  lr: 0.002866  min_lr: 0.002866  loss: 3.1488 (3.3852)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6190 (0.6173)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [120]  [ 400/2502]  eta: 0:20:56  lr: 0.002865  min_lr: 0.002865  loss: 3.3939 (3.3996)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6121 (0.6236)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [120]  [ 600/2502]  eta: 0:18:53  lr: 0.002863  min_lr: 0.002863  loss: 3.1048 (3.3972)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5747 (0.6165)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [120]  [ 800/2502]  eta: 0:16:52  lr: 0.002862  min_lr: 0.002862  loss: 3.3859 (3.3938)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5747 (0.6127)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [120]  [1000/2502]  eta: 0:14:52  lr: 0.002860  min_lr: 0.002860  loss: 3.4702 (3.4044)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5756 (0.6150)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [120]  [1200/2502]  eta: 0:12:53  lr: 0.002858  min_lr: 0.002858  loss: 3.6366 (3.4077)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6014 (0.6171)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [120]  [1400/2502]  eta: 0:10:54  lr: 0.002857  min_lr: 0.002857  loss: 3.6838 (3.4160)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5847 (0.6138)  time: 0.5974  data: 0.0005  max mem: 53905
Epoch: [120]  [1600/2502]  eta: 0:08:55  lr: 0.002855  min_lr: 0.002855  loss: 3.5368 (3.4221)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6095 (0.6151)  time: 0.5969  data: 0.0004  max mem: 53905
Epoch: [120]  [1800/2502]  eta: 0:06:56  lr: 0.002853  min_lr: 0.002853  loss: 3.5239 (3.4288)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5745 (0.6121)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [120]  [2000/2502]  eta: 0:04:57  lr: 0.002852  min_lr: 0.002852  loss: 3.4546 (3.4239)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6337 (0.6150)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [120]  [2200/2502]  eta: 0:02:59  lr: 0.002850  min_lr: 0.002850  loss: 3.4807 (3.4264)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6690 (0.6163)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [120]  [2400/2502]  eta: 0:01:00  lr: 0.002849  min_lr: 0.002849  loss: 3.5597 (3.4316)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5884 (0.6145)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [120]  [2501/2502]  eta: 0:00:00  lr: 0.002848  min_lr: 0.002848  loss: 3.4034 (3.4301)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5926 (0.6145)  time: 0.5327  data: 0.0007  max mem: 53905
Epoch: [120] Total time: 0:24:43 (0.5929 s / it)
Averaged stats: lr: 0.002848  min_lr: 0.002848  loss: 3.4034 (3.4158)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5926 (0.6145)
Test:  [ 0/50]  eta: 0:02:56  loss: 0.5507 (0.5507)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 3.5285  data: 3.2122  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8522 (0.9018)  acc1: 84.0000 (83.2727)  acc5: 97.2000 (96.8364)  time: 0.5784  data: 0.2924  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9548 (0.9204)  acc1: 80.4000 (82.3810)  acc5: 97.2000 (96.9905)  time: 0.2836  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0575 (1.0188)  acc1: 79.2000 (80.3226)  acc5: 94.8000 (95.4968)  time: 0.2839  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2597 (1.0849)  acc1: 75.2000 (78.8976)  acc5: 92.0000 (94.7317)  time: 0.2837  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2495 (1.1069)  acc1: 74.8000 (78.0880)  acc5: 93.2000 (94.6160)  time: 0.2834  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3516 s / it)
* Acc@1 78.202 Acc@5 94.640 loss 1.116
Accuracy of the model on the 50000 test images: 78.2%
Max accuracy: 78.20%
Epoch: [121]  [   0/2502]  eta: 1:57:49  lr: 0.002848  min_lr: 0.002848  loss: 2.5786 (2.5786)  weight_decay: 0.0500 (0.0500)  time: 2.8256  data: 2.2220  max mem: 53905
Epoch: [121]  [ 200/2502]  eta: 0:23:11  lr: 0.002846  min_lr: 0.002846  loss: 3.0972 (3.4052)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5718 (0.6140)  time: 0.6013  data: 0.0004  max mem: 53905
Epoch: [121]  [ 400/2502]  eta: 0:20:58  lr: 0.002845  min_lr: 0.002845  loss: 3.5772 (3.4191)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5896 (0.6134)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [121]  [ 600/2502]  eta: 0:18:54  lr: 0.002843  min_lr: 0.002843  loss: 3.6644 (3.4276)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6263 (0.6236)  time: 0.5899  data: 0.0007  max mem: 53905
Epoch: [121]  [ 800/2502]  eta: 0:16:53  lr: 0.002841  min_lr: 0.002841  loss: 3.6433 (3.4232)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5664 (0.6154)  time: 0.5915  data: 0.0008  max mem: 53905
Epoch: [121]  [1000/2502]  eta: 0:14:53  lr: 0.002840  min_lr: 0.002840  loss: 3.5502 (3.4099)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6314 (0.6199)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [121]  [1200/2502]  eta: 0:12:53  lr: 0.002838  min_lr: 0.002838  loss: 3.3899 (3.4046)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6210 (0.6212)  time: 0.5913  data: 0.0006  max mem: 53905
Epoch: [121]  [1400/2502]  eta: 0:10:54  lr: 0.002836  min_lr: 0.002836  loss: 3.3450 (3.4098)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6014 (0.6217)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [121]  [1600/2502]  eta: 0:08:55  lr: 0.002835  min_lr: 0.002835  loss: 3.4786 (3.4078)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6483 (0.6239)  time: 0.5922  data: 0.0008  max mem: 53905
Epoch: [121]  [1800/2502]  eta: 0:06:56  lr: 0.002833  min_lr: 0.002833  loss: 3.4795 (3.4078)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6176 (0.6210)  time: 0.5913  data: 0.0008  max mem: 53905
Epoch: [121]  [2000/2502]  eta: 0:04:58  lr: 0.002831  min_lr: 0.002831  loss: 3.4134 (3.4122)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5983 (0.6219)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [121]  [2200/2502]  eta: 0:02:59  lr: 0.002830  min_lr: 0.002830  loss: 3.5613 (3.4205)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5846 (0.6212)  time: 0.5910  data: 0.0008  max mem: 53905
Epoch: [121]  [2400/2502]  eta: 0:01:00  lr: 0.002828  min_lr: 0.002828  loss: 3.6388 (3.4204)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6240 (0.6237)  time: 0.5977  data: 0.0007  max mem: 53905
Epoch: [121]  [2501/2502]  eta: 0:00:00  lr: 0.002827  min_lr: 0.002827  loss: 3.2973 (3.4184)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5705 (0.6221)  time: 0.5319  data: 0.0008  max mem: 53905
Epoch: [121] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.002827  min_lr: 0.002827  loss: 3.2973 (3.4162)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5705 (0.6221)
Test:  [ 0/50]  eta: 0:02:31  loss: 0.5512 (0.5512)  acc1: 92.4000 (92.4000)  acc5: 98.4000 (98.4000)  time: 3.0379  data: 2.7065  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8253 (0.8757)  acc1: 82.8000 (83.0545)  acc5: 98.0000 (96.9818)  time: 0.6225  data: 0.3355  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8693 (0.8869)  acc1: 81.6000 (82.6286)  acc5: 96.8000 (97.1238)  time: 0.3323  data: 0.0495  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: nan (nan)  acc1: 79.6000 (80.6194)  acc5: 95.2000 (95.7161)  time: 0.2837  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: nan (nan)  acc1: 74.4000 (78.6049)  acc5: 92.8000 (94.9366)  time: 0.2834  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: nan (nan)  acc1: 73.2000 (78.1520)  acc5: 93.2000 (94.7120)  time: 0.2829  data: 0.0001  max mem: 53905
Test: Total time: 0:00:18 (0.3606 s / it)
* Acc@1 78.030 Acc@5 94.658 loss nan
Accuracy of the model on the 50000 test images: 78.0%
Max accuracy: 78.20%
Epoch: [122]  [   0/2502]  eta: 1:47:48  lr: 0.002827  min_lr: 0.002827  loss: 3.7771 (3.7771)  weight_decay: 0.0500 (0.0500)  time: 2.5855  data: 1.7806  max mem: 53905
Epoch: [122]  [ 200/2502]  eta: 0:23:07  lr: 0.002826  min_lr: 0.002826  loss: 3.5818 (3.4099)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5935 (0.6249)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [122]  [ 400/2502]  eta: 0:20:55  lr: 0.002824  min_lr: 0.002824  loss: 3.5072 (3.4254)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6510 (0.6249)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [122]  [ 600/2502]  eta: 0:18:53  lr: 0.002823  min_lr: 0.002823  loss: 3.5798 (3.4079)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5796 (0.6185)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [122]  [ 800/2502]  eta: 0:16:53  lr: 0.002821  min_lr: 0.002821  loss: 3.3467 (3.4224)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6712 (0.6244)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [122]  [1000/2502]  eta: 0:14:53  lr: 0.002819  min_lr: 0.002819  loss: 3.4698 (3.4063)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5854 (0.6181)  time: 0.5918  data: 0.0007  max mem: 53905
Epoch: [122]  [1200/2502]  eta: 0:12:54  lr: 0.002818  min_lr: 0.002818  loss: 3.5152 (3.4177)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5719 (0.6186)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [122]  [1400/2502]  eta: 0:10:55  lr: 0.002816  min_lr: 0.002816  loss: 3.5657 (3.4147)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5833 (0.6168)  time: 0.5914  data: 0.0010  max mem: 53905
Epoch: [122]  [1600/2502]  eta: 0:08:56  lr: 0.002814  min_lr: 0.002814  loss: 3.5500 (3.4156)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6080 (0.6153)  time: 0.5914  data: 0.0008  max mem: 53905
Epoch: [122]  [1800/2502]  eta: 0:06:57  lr: 0.002813  min_lr: 0.002813  loss: 3.6382 (3.4219)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6159 (0.6183)  time: 0.5916  data: 0.0007  max mem: 53905
Epoch: [122]  [2000/2502]  eta: 0:04:58  lr: 0.002811  min_lr: 0.002811  loss: 3.5028 (3.4255)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6082 (0.6197)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [122]  [2200/2502]  eta: 0:02:59  lr: 0.002809  min_lr: 0.002809  loss: 3.3387 (3.4224)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5939 (0.6195)  time: 0.5923  data: 0.0007  max mem: 53905
Epoch: [122]  [2400/2502]  eta: 0:01:00  lr: 0.002808  min_lr: 0.002808  loss: 3.5037 (3.4179)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5993 (0.6202)  time: 0.5913  data: 0.0006  max mem: 53905
Epoch: [122]  [2501/2502]  eta: 0:00:00  lr: 0.002807  min_lr: 0.002807  loss: 3.3368 (3.4173)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6012 (0.6209)  time: 0.5322  data: 0.0007  max mem: 53905
Epoch: [122] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.002807  min_lr: 0.002807  loss: 3.3368 (3.4100)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6012 (0.6209)
Test:  [ 0/50]  eta: 0:02:44  loss: 0.5590 (0.5590)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 3.2932  data: 2.9477  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8480 (0.8658)  acc1: 85.2000 (83.9636)  acc5: 97.2000 (96.7636)  time: 0.5553  data: 0.2684  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8626 (0.8830)  acc1: 80.0000 (82.4191)  acc5: 97.2000 (96.8571)  time: 0.2815  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0730 (0.9962)  acc1: 76.8000 (80.3226)  acc5: 94.8000 (95.4065)  time: 0.2817  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3077 (1.0714)  acc1: 73.2000 (78.2439)  acc5: 91.6000 (94.5268)  time: 0.2814  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2643 (1.0801)  acc1: 72.0000 (77.6800)  acc5: 93.2000 (94.5440)  time: 0.2811  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3441 s / it)
* Acc@1 77.768 Acc@5 94.546 loss 1.082
Accuracy of the model on the 50000 test images: 77.8%
Max accuracy: 78.20%
Epoch: [123]  [   0/2502]  eta: 1:53:04  lr: 0.002807  min_lr: 0.002807  loss: 3.4002 (3.4002)  weight_decay: 0.0500 (0.0500)  time: 2.7117  data: 1.8712  max mem: 53905
Epoch: [123]  [ 200/2502]  eta: 0:23:10  lr: 0.002805  min_lr: 0.002805  loss: 3.5948 (3.3492)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6642 (0.6384)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [123]  [ 400/2502]  eta: 0:20:58  lr: 0.002804  min_lr: 0.002804  loss: 3.6316 (3.3634)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5848 (0.6287)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [123]  [ 600/2502]  eta: 0:18:54  lr: 0.002802  min_lr: 0.002802  loss: 3.4395 (3.3795)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6099 (0.6278)  time: 0.5931  data: 0.0007  max mem: 53905
Epoch: [123]  [ 800/2502]  eta: 0:16:53  lr: 0.002800  min_lr: 0.002800  loss: 3.7282 (3.3841)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5781 (0.6325)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [123]  [1000/2502]  eta: 0:14:53  lr: 0.002799  min_lr: 0.002799  loss: 3.5700 (3.3890)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6448 (0.6385)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [123]  [1200/2502]  eta: 0:12:54  lr: 0.002797  min_lr: 0.002797  loss: 3.4829 (3.3928)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6111 (0.6370)  time: 0.5914  data: 0.0007  max mem: 53905
Epoch: [123]  [1400/2502]  eta: 0:10:54  lr: 0.002795  min_lr: 0.002795  loss: 3.0705 (3.3842)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6321 (0.6329)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [123]  [1600/2502]  eta: 0:08:55  lr: 0.002794  min_lr: 0.002794  loss: 3.6843 (3.3864)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5514 (0.6285)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [123]  [1800/2502]  eta: 0:06:56  lr: 0.002792  min_lr: 0.002792  loss: 3.2805 (3.3780)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6690 (0.6287)  time: 0.5964  data: 0.0006  max mem: 53905
Epoch: [123]  [2000/2502]  eta: 0:04:58  lr: 0.002790  min_lr: 0.002790  loss: 3.5982 (3.3756)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6619 (0.6281)  time: 0.5974  data: 0.0006  max mem: 53905
Epoch: [123]  [2200/2502]  eta: 0:02:59  lr: 0.002789  min_lr: 0.002789  loss: 3.4970 (3.3785)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6098 (0.6257)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [123]  [2400/2502]  eta: 0:01:00  lr: 0.002787  min_lr: 0.002787  loss: 3.6649 (3.3858)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6174 (0.6257)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [123]  [2501/2502]  eta: 0:00:00  lr: 0.002786  min_lr: 0.002786  loss: 3.4678 (3.3877)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6473 (0.6268)  time: 0.5317  data: 0.0006  max mem: 53905
Epoch: [123] Total time: 0:24:44 (0.5931 s / it)
Averaged stats: lr: 0.002786  min_lr: 0.002786  loss: 3.4678 (3.4010)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6473 (0.6268)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.6138 (0.6138)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 3.0175  data: 2.6903  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8626 (0.9230)  acc1: 84.8000 (83.5636)  acc5: 96.8000 (96.7636)  time: 0.5682  data: 0.2803  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9611 (0.9566)  acc1: 80.4000 (81.9619)  acc5: 96.8000 (97.0476)  time: 0.3036  data: 0.0199  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1352 (1.0698)  acc1: 77.2000 (79.6774)  acc5: 96.0000 (95.4839)  time: 0.2840  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3499 (1.1369)  acc1: 74.0000 (78.1073)  acc5: 92.0000 (94.6439)  time: 0.2838  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3345 (1.1469)  acc1: 72.8000 (77.6160)  acc5: 92.4000 (94.5040)  time: 0.2835  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3484 s / it)
* Acc@1 77.964 Acc@5 94.484 loss 1.147
Accuracy of the model on the 50000 test images: 78.0%
Max accuracy: 78.20%
Epoch: [124]  [   0/2502]  eta: 1:45:53  lr: 0.002786  min_lr: 0.002786  loss: 3.2235 (3.2235)  weight_decay: 0.0500 (0.0500)  time: 2.5393  data: 1.9303  max mem: 53905
Epoch: [124]  [ 200/2502]  eta: 0:23:05  lr: 0.002785  min_lr: 0.002785  loss: 3.3070 (3.3103)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6060 (0.6425)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [124]  [ 400/2502]  eta: 0:20:55  lr: 0.002783  min_lr: 0.002783  loss: 3.5629 (3.3772)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5979 (0.6369)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [124]  [ 600/2502]  eta: 0:18:53  lr: 0.002781  min_lr: 0.002781  loss: 3.5633 (3.3895)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6073 (0.6276)  time: 0.5979  data: 0.0005  max mem: 53905
Epoch: [124]  [ 800/2502]  eta: 0:16:52  lr: 0.002780  min_lr: 0.002780  loss: 3.5173 (3.3887)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5990 (0.6226)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [124]  [1000/2502]  eta: 0:14:52  lr: 0.002778  min_lr: 0.002778  loss: 3.5691 (3.3956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6539 (0.6223)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [124]  [1200/2502]  eta: 0:12:53  lr: 0.002776  min_lr: 0.002776  loss: 3.4685 (3.4004)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6044 (0.6202)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [124]  [1400/2502]  eta: 0:10:54  lr: 0.002775  min_lr: 0.002775  loss: 3.3670 (3.3976)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6508 (0.6203)  time: 0.5946  data: 0.0004  max mem: 53905
Epoch: [124]  [1600/2502]  eta: 0:08:55  lr: 0.002773  min_lr: 0.002773  loss: 3.4294 (3.4016)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5723 (0.6224)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [124]  [1800/2502]  eta: 0:06:56  lr: 0.002771  min_lr: 0.002771  loss: 3.4859 (3.4046)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5958 (0.6246)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [124]  [2000/2502]  eta: 0:04:58  lr: 0.002770  min_lr: 0.002770  loss: 3.7312 (3.4058)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6220 (0.6247)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [124]  [2200/2502]  eta: 0:02:59  lr: 0.002768  min_lr: 0.002768  loss: 3.4792 (3.3977)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5924 (0.6234)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [124]  [2400/2502]  eta: 0:01:00  lr: 0.002767  min_lr: 0.002767  loss: 3.4644 (3.3977)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6315 (0.6249)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [124]  [2501/2502]  eta: 0:00:00  lr: 0.002766  min_lr: 0.002766  loss: 3.6009 (3.3992)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5917 (0.6249)  time: 0.5333  data: 0.0008  max mem: 53905
Epoch: [124] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.002766  min_lr: 0.002766  loss: 3.6009 (3.3949)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5917 (0.6249)
Test:  [ 0/50]  eta: 0:03:08  loss: 0.6190 (0.6190)  acc1: 93.6000 (93.6000)  acc5: 99.2000 (99.2000)  time: 3.7706  data: 3.4478  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.9203 (0.9299)  acc1: 83.2000 (83.7091)  acc5: 97.2000 (97.0182)  time: 0.6005  data: 0.3139  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9222 (0.9452)  acc1: 82.0000 (82.8762)  acc5: 96.8000 (96.9905)  time: 0.2839  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1091 (1.0614)  acc1: 78.8000 (80.6581)  acc5: 95.2000 (95.5484)  time: 0.2842  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3203 (1.1355)  acc1: 75.2000 (78.9073)  acc5: 92.0000 (94.7902)  time: 0.2838  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3203 (1.1567)  acc1: 73.6000 (78.3120)  acc5: 92.8000 (94.5680)  time: 0.2835  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3566 s / it)
* Acc@1 78.184 Acc@5 94.660 loss 1.159
Accuracy of the model on the 50000 test images: 78.2%
Max accuracy: 78.20%
Epoch: [125]  [   0/2502]  eta: 1:51:54  lr: 0.002766  min_lr: 0.002766  loss: 3.0421 (3.0421)  weight_decay: 0.0500 (0.0500)  time: 2.6838  data: 2.0911  max mem: 53905
Epoch: [125]  [ 200/2502]  eta: 0:23:13  lr: 0.002764  min_lr: 0.002764  loss: 3.5106 (3.4104)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6122 (0.6171)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [125]  [ 400/2502]  eta: 0:20:58  lr: 0.002762  min_lr: 0.002762  loss: 3.3791 (3.4025)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6307 (0.6281)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [125]  [ 600/2502]  eta: 0:18:54  lr: 0.002761  min_lr: 0.002761  loss: 3.4752 (3.3915)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6068 (nan)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [125]  [ 800/2502]  eta: 0:16:53  lr: 0.002759  min_lr: 0.002759  loss: 3.6358 (3.3820)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6094 (nan)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [125]  [1000/2502]  eta: 0:14:53  lr: 0.002757  min_lr: 0.002757  loss: 3.7089 (3.3926)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6497 (nan)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [125]  [1200/2502]  eta: 0:12:53  lr: 0.002756  min_lr: 0.002756  loss: 3.2595 (3.3969)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5829 (nan)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [125]  [1400/2502]  eta: 0:10:54  lr: 0.002754  min_lr: 0.002754  loss: 3.5045 (3.3954)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6425 (nan)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [125]  [1600/2502]  eta: 0:08:55  lr: 0.002752  min_lr: 0.002752  loss: 3.6151 (3.3932)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6082 (nan)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [125]  [1800/2502]  eta: 0:06:56  lr: 0.002751  min_lr: 0.002751  loss: 3.5575 (3.3930)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6383 (nan)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [125]  [2000/2502]  eta: 0:04:58  lr: 0.002749  min_lr: 0.002749  loss: 2.9422 (3.3901)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6387 (nan)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [125]  [2200/2502]  eta: 0:02:59  lr: 0.002747  min_lr: 0.002747  loss: 3.5661 (3.3907)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6039 (nan)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [125]  [2400/2502]  eta: 0:01:00  lr: 0.002746  min_lr: 0.002746  loss: 3.5322 (3.3895)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6147 (nan)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [125]  [2501/2502]  eta: 0:00:00  lr: 0.002745  min_lr: 0.002745  loss: 3.4215 (3.3872)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5915 (nan)  time: 0.5328  data: 0.0007  max mem: 53905
Epoch: [125] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.002745  min_lr: 0.002745  loss: 3.4215 (3.3865)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5915 (nan)
Test:  [ 0/50]  eta: 0:02:42  loss: 0.4411 (0.4411)  acc1: 93.2000 (93.2000)  acc5: 99.6000 (99.6000)  time: 3.2424  data: 2.9084  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8338 (0.8279)  acc1: 83.2000 (83.5636)  acc5: 96.8000 (96.5818)  time: 0.5951  data: 0.3077  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8338 (0.8480)  acc1: 82.0000 (82.7810)  acc5: 96.8000 (96.7810)  time: 0.3073  data: 0.0240  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0004 (0.9511)  acc1: 76.8000 (80.5290)  acc5: 94.8000 (95.5742)  time: 0.2900  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2075 (1.0178)  acc1: 74.8000 (78.8000)  acc5: 92.8000 (94.8195)  time: 0.2920  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1882 (1.0340)  acc1: 74.8000 (78.2880)  acc5: 92.8000 (94.6000)  time: 0.2864  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3577 s / it)
* Acc@1 78.282 Acc@5 94.652 loss 1.042
Accuracy of the model on the 50000 test images: 78.3%
Max accuracy: 78.28%
Epoch: [126]  [   0/2502]  eta: 1:51:53  lr: 0.002745  min_lr: 0.002745  loss: 3.1771 (3.1771)  weight_decay: 0.0500 (0.0500)  time: 2.6832  data: 2.0829  max mem: 53905
Epoch: [126]  [ 200/2502]  eta: 0:23:07  lr: 0.002743  min_lr: 0.002743  loss: 3.4973 (3.3766)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6247 (0.6347)  time: 0.5937  data: 0.0007  max mem: 53905
Epoch: [126]  [ 400/2502]  eta: 0:20:56  lr: 0.002742  min_lr: 0.002742  loss: 3.1690 (3.3611)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6297 (0.6353)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [126]  [ 600/2502]  eta: 0:18:53  lr: 0.002740  min_lr: 0.002740  loss: 3.4220 (3.3670)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5834 (0.6317)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [126]  [ 800/2502]  eta: 0:16:52  lr: 0.002738  min_lr: 0.002738  loss: 3.3420 (3.3593)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6390 (0.6302)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [126]  [1000/2502]  eta: 0:14:52  lr: 0.002737  min_lr: 0.002737  loss: 3.3012 (3.3718)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5966 (0.6331)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [126]  [1200/2502]  eta: 0:12:53  lr: 0.002735  min_lr: 0.002735  loss: 3.6388 (3.3743)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6054 (0.6302)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [126]  [1400/2502]  eta: 0:10:54  lr: 0.002733  min_lr: 0.002733  loss: 3.4764 (3.3823)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6099 (0.6317)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [126]  [1600/2502]  eta: 0:08:55  lr: 0.002732  min_lr: 0.002732  loss: 3.4342 (3.3825)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5646 (0.6304)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [126]  [1800/2502]  eta: 0:06:56  lr: 0.002730  min_lr: 0.002730  loss: 3.4028 (3.3787)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5888 (0.6262)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [126]  [2000/2502]  eta: 0:04:57  lr: 0.002728  min_lr: 0.002728  loss: 3.6971 (3.3842)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6487 (0.6286)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [126]  [2200/2502]  eta: 0:02:59  lr: 0.002727  min_lr: 0.002727  loss: 3.3548 (3.3861)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5682 (0.6274)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [126]  [2400/2502]  eta: 0:01:00  lr: 0.002725  min_lr: 0.002725  loss: 3.4061 (3.3840)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6756 (0.6274)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [126]  [2501/2502]  eta: 0:00:00  lr: 0.002724  min_lr: 0.002724  loss: 3.3155 (3.3822)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5616 (0.6258)  time: 0.5326  data: 0.0009  max mem: 53905
Epoch: [126] Total time: 0:24:44 (0.5931 s / it)
Averaged stats: lr: 0.002724  min_lr: 0.002724  loss: 3.3155 (3.3872)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5616 (0.6258)
Test:  [ 0/50]  eta: 0:02:55  loss: 0.5551 (0.5551)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 3.5178  data: 3.2049  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8502 (0.8943)  acc1: 83.2000 (84.4727)  acc5: 96.4000 (96.5455)  time: 0.5778  data: 0.2917  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9124 (0.9162)  acc1: 82.8000 (83.6000)  acc5: 96.4000 (96.7810)  time: 0.2841  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1037 (1.0218)  acc1: 79.6000 (81.3806)  acc5: 94.8000 (95.3290)  time: 0.2843  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2121 (1.0827)  acc1: 74.0000 (79.4244)  acc5: 92.4000 (94.6537)  time: 0.2839  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1999 (1.0945)  acc1: 72.8000 (78.8800)  acc5: 93.2000 (94.5840)  time: 0.2837  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3518 s / it)
* Acc@1 78.316 Acc@5 94.634 loss 1.103
Accuracy of the model on the 50000 test images: 78.3%
Max accuracy: 78.32%
Epoch: [127]  [   0/2502]  eta: 1:40:46  lr: 0.002724  min_lr: 0.002724  loss: 3.1193 (3.1193)  weight_decay: 0.0500 (0.0500)  time: 2.4166  data: 1.8216  max mem: 53905
Epoch: [127]  [ 200/2502]  eta: 0:23:06  lr: 0.002722  min_lr: 0.002722  loss: 2.9132 (3.2901)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6907 (0.6575)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [127]  [ 400/2502]  eta: 0:20:57  lr: 0.002721  min_lr: 0.002721  loss: 3.5561 (3.3437)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6542 (0.6471)  time: 0.5939  data: 0.0005  max mem: 53905
Epoch: [127]  [ 600/2502]  eta: 0:18:53  lr: 0.002719  min_lr: 0.002719  loss: 3.3537 (3.3297)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5877 (0.6489)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [127]  [ 800/2502]  eta: 0:16:53  lr: 0.002717  min_lr: 0.002717  loss: 3.5809 (3.3453)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5920 (0.6412)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [127]  [1000/2502]  eta: 0:14:53  lr: 0.002716  min_lr: 0.002716  loss: 3.5160 (3.3427)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5846 (0.6365)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [127]  [1200/2502]  eta: 0:12:54  lr: 0.002714  min_lr: 0.002714  loss: 3.3650 (3.3530)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5893 (0.6320)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [127]  [1400/2502]  eta: 0:10:55  lr: 0.002712  min_lr: 0.002712  loss: 3.2653 (3.3494)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6078 (0.6334)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [127]  [1600/2502]  eta: 0:08:56  lr: 0.002711  min_lr: 0.002711  loss: 3.4811 (3.3509)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5826 (0.6327)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [127]  [1800/2502]  eta: 0:06:57  lr: 0.002709  min_lr: 0.002709  loss: 3.5092 (3.3548)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6310 (0.6346)  time: 0.6005  data: 0.0005  max mem: 53905
Epoch: [127]  [2000/2502]  eta: 0:04:58  lr: 0.002707  min_lr: 0.002707  loss: 3.4464 (3.3645)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6362 (0.6340)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [127]  [2200/2502]  eta: 0:02:59  lr: 0.002706  min_lr: 0.002706  loss: 3.5016 (3.3701)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5566 (0.6322)  time: 0.5922  data: 0.0007  max mem: 53905
Epoch: [127]  [2400/2502]  eta: 0:01:00  lr: 0.002704  min_lr: 0.002704  loss: 3.4143 (3.3758)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6320 (0.6322)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [127]  [2501/2502]  eta: 0:00:00  lr: 0.002703  min_lr: 0.002703  loss: 3.6591 (3.3783)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6528 (0.6330)  time: 0.5330  data: 0.0009  max mem: 53905
Epoch: [127] Total time: 0:24:45 (0.5936 s / it)
Averaged stats: lr: 0.002703  min_lr: 0.002703  loss: 3.6591 (3.3864)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6528 (0.6330)
Test:  [ 0/50]  eta: 0:02:38  loss: 0.7610 (0.7610)  acc1: 90.4000 (90.4000)  acc5: 99.2000 (99.2000)  time: 3.1694  data: 2.8429  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.9321 (0.9752)  acc1: 83.6000 (83.9273)  acc5: 96.8000 (96.7636)  time: 0.5440  data: 0.2589  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.0010 (1.0005)  acc1: 82.4000 (83.0286)  acc5: 96.8000 (96.8571)  time: 0.2815  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.2298 (1.1183)  acc1: 78.0000 (81.1097)  acc5: 94.4000 (95.2903)  time: 0.2816  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.4051 (1.1946)  acc1: 75.2000 (79.2000)  acc5: 91.2000 (94.4488)  time: 0.2813  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.4018 (1.2127)  acc1: 74.8000 (78.5120)  acc5: 92.4000 (94.3040)  time: 0.2811  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3417 s / it)
* Acc@1 78.324 Acc@5 94.566 loss 1.214
Accuracy of the model on the 50000 test images: 78.3%
Max accuracy: 78.32%
Epoch: [128]  [   0/2502]  eta: 1:36:41  lr: 0.002703  min_lr: 0.002703  loss: 3.8371 (3.8371)  weight_decay: 0.0500 (0.0500)  time: 2.3188  data: 1.7292  max mem: 53905
Epoch: [128]  [ 200/2502]  eta: 0:22:59  lr: 0.002701  min_lr: 0.002701  loss: 3.4034 (3.3730)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5851 (0.6156)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [128]  [ 400/2502]  eta: 0:20:52  lr: 0.002700  min_lr: 0.002700  loss: 3.4926 (3.3606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6011 (0.6302)  time: 0.5978  data: 0.0004  max mem: 53905
Epoch: [128]  [ 600/2502]  eta: 0:18:51  lr: 0.002698  min_lr: 0.002698  loss: 3.5638 (3.3587)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5611 (0.6196)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [128]  [ 800/2502]  eta: 0:16:50  lr: 0.002696  min_lr: 0.002696  loss: 3.1961 (3.3598)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6388 (0.6238)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [128]  [1000/2502]  eta: 0:14:51  lr: 0.002695  min_lr: 0.002695  loss: 3.4771 (3.3742)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6182 (0.6281)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [128]  [1200/2502]  eta: 0:12:52  lr: 0.002693  min_lr: 0.002693  loss: 3.3917 (3.3630)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6259 (0.6301)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [128]  [1400/2502]  eta: 0:10:53  lr: 0.002691  min_lr: 0.002691  loss: 3.2068 (3.3715)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6155 (0.6331)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [128]  [1600/2502]  eta: 0:08:54  lr: 0.002690  min_lr: 0.002690  loss: 3.4393 (3.3681)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5807 (0.6286)  time: 0.5935  data: 0.0006  max mem: 53905
Epoch: [128]  [1800/2502]  eta: 0:06:56  lr: 0.002688  min_lr: 0.002688  loss: 3.3977 (3.3728)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6313 (0.6295)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [128]  [2000/2502]  eta: 0:04:57  lr: 0.002686  min_lr: 0.002686  loss: 3.4800 (3.3727)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6078 (nan)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [128]  [2200/2502]  eta: 0:02:59  lr: 0.002685  min_lr: 0.002685  loss: 3.3335 (3.3699)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6351 (nan)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [128]  [2400/2502]  eta: 0:01:00  lr: 0.002683  min_lr: 0.002683  loss: 3.4150 (3.3708)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6061 (nan)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [128]  [2501/2502]  eta: 0:00:00  lr: 0.002682  min_lr: 0.002682  loss: 3.6324 (3.3718)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6452 (nan)  time: 0.5326  data: 0.0007  max mem: 53905
Epoch: [128] Total time: 0:24:42 (0.5925 s / it)
Averaged stats: lr: 0.002682  min_lr: 0.002682  loss: 3.6324 (3.3777)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6452 (nan)
Test:  [ 0/50]  eta: 0:03:24  loss: 0.6372 (0.6372)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 4.0841  data: 3.7778  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 0.9128 (0.9832)  acc1: 82.8000 (84.0364)  acc5: 96.8000 (97.0182)  time: 0.6290  data: 0.3439  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9424 (0.9938)  acc1: 82.4000 (83.4095)  acc5: 97.2000 (97.1619)  time: 0.2836  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1692 (1.1039)  acc1: 77.6000 (81.1226)  acc5: 94.4000 (95.6774)  time: 0.2838  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3618 (1.1775)  acc1: 76.0000 (79.2976)  acc5: 92.0000 (94.7610)  time: 0.2836  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3618 (1.1975)  acc1: 74.4000 (78.6240)  acc5: 92.4000 (94.6400)  time: 0.2834  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3628 s / it)
* Acc@1 78.408 Acc@5 94.676 loss 1.206
Accuracy of the model on the 50000 test images: 78.4%
Max accuracy: 78.41%
Epoch: [129]  [   0/2502]  eta: 1:44:02  lr: 0.002682  min_lr: 0.002682  loss: 3.1039 (3.1039)  weight_decay: 0.0500 (0.0500)  time: 2.4948  data: 1.8853  max mem: 53905
Epoch: [129]  [ 200/2502]  eta: 0:23:11  lr: 0.002680  min_lr: 0.002680  loss: 3.1333 (3.4042)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6000 (0.6239)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [129]  [ 400/2502]  eta: 0:20:58  lr: 0.002679  min_lr: 0.002679  loss: 3.6484 (3.3872)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6192 (0.6324)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [129]  [ 600/2502]  eta: 0:18:54  lr: 0.002677  min_lr: 0.002677  loss: 3.2490 (3.3805)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6339 (0.6326)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [129]  [ 800/2502]  eta: 0:16:54  lr: 0.002675  min_lr: 0.002675  loss: 3.6629 (3.3816)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6136 (0.6288)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [129]  [1000/2502]  eta: 0:14:53  lr: 0.002674  min_lr: 0.002674  loss: 3.3569 (3.3822)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6028 (0.6307)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [129]  [1200/2502]  eta: 0:12:54  lr: 0.002672  min_lr: 0.002672  loss: 3.5991 (3.3840)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6317 (0.6329)  time: 0.5961  data: 0.0005  max mem: 53905
Epoch: [129]  [1400/2502]  eta: 0:10:55  lr: 0.002670  min_lr: 0.002670  loss: 3.6989 (3.3897)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5948 (0.6290)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [129]  [1600/2502]  eta: 0:08:55  lr: 0.002669  min_lr: 0.002669  loss: 3.6186 (3.3988)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6265 (0.6299)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [129]  [1800/2502]  eta: 0:06:56  lr: 0.002667  min_lr: 0.002667  loss: 3.6669 (3.3923)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6498 (0.6338)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [129]  [2000/2502]  eta: 0:04:58  lr: 0.002665  min_lr: 0.002665  loss: 3.5339 (3.3950)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6036 (0.6332)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [129]  [2200/2502]  eta: 0:02:59  lr: 0.002663  min_lr: 0.002663  loss: 3.3750 (3.3959)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6556 (0.6352)  time: 0.5904  data: 0.0005  max mem: 53905
Epoch: [129]  [2400/2502]  eta: 0:01:00  lr: 0.002662  min_lr: 0.002662  loss: 3.4300 (3.3904)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6234 (0.6363)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [129]  [2501/2502]  eta: 0:00:00  lr: 0.002661  min_lr: 0.002661  loss: 3.5067 (3.3896)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6101 (0.6365)  time: 0.5330  data: 0.0008  max mem: 53905
Epoch: [129] Total time: 0:24:44 (0.5931 s / it)
Averaged stats: lr: 0.002661  min_lr: 0.002661  loss: 3.5067 (3.3860)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6101 (0.6365)
Test:  [ 0/50]  eta: 0:02:58  loss: 0.5130 (0.5130)  acc1: 92.4000 (92.4000)  acc5: 99.2000 (99.2000)  time: 3.5644  data: 3.2485  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7925 (0.8581)  acc1: 84.8000 (84.0364)  acc5: 97.6000 (96.8727)  time: 0.5814  data: 0.2957  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8263 (0.8613)  acc1: 81.2000 (82.8191)  acc5: 96.8000 (96.8952)  time: 0.2834  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0122 (0.9693)  acc1: 77.6000 (80.5936)  acc5: 94.4000 (95.4710)  time: 0.2837  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2368 (1.0301)  acc1: 75.2000 (79.0829)  acc5: 92.0000 (94.8000)  time: 0.2834  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2177 (1.0444)  acc1: 74.0000 (78.4880)  acc5: 93.2000 (94.8000)  time: 0.2832  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3515 s / it)
* Acc@1 78.636 Acc@5 94.748 loss 1.047
Accuracy of the model on the 50000 test images: 78.6%
Max accuracy: 78.64%
Epoch: [130]  [   0/2502]  eta: 1:43:01  lr: 0.002661  min_lr: 0.002661  loss: 2.9715 (2.9715)  weight_decay: 0.0500 (0.0500)  time: 2.4707  data: 1.8821  max mem: 53905
Epoch: [130]  [ 200/2502]  eta: 0:23:04  lr: 0.002659  min_lr: 0.002659  loss: 3.4593 (3.4049)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5835 (0.5996)  time: 0.5927  data: 0.0006  max mem: 53905
Epoch: [130]  [ 400/2502]  eta: 0:20:56  lr: 0.002658  min_lr: 0.002658  loss: 3.3849 (3.3804)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6321 (0.6161)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [130]  [ 600/2502]  eta: 0:18:53  lr: 0.002656  min_lr: 0.002656  loss: 3.3394 (3.3723)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6031 (0.6306)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [130]  [ 800/2502]  eta: 0:16:52  lr: 0.002654  min_lr: 0.002654  loss: 3.5238 (3.3621)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6415 (0.6321)  time: 0.5978  data: 0.0006  max mem: 53905
Epoch: [130]  [1000/2502]  eta: 0:14:53  lr: 0.002652  min_lr: 0.002652  loss: 3.5413 (3.3611)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6310 (0.6339)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [130]  [1200/2502]  eta: 0:12:53  lr: 0.002651  min_lr: 0.002651  loss: 3.5189 (3.3673)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6246 (0.6350)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [130]  [1400/2502]  eta: 0:10:54  lr: 0.002649  min_lr: 0.002649  loss: 3.5544 (3.3774)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6197 (0.6365)  time: 0.5919  data: 0.0007  max mem: 53905
Epoch: [130]  [1600/2502]  eta: 0:08:55  lr: 0.002647  min_lr: 0.002647  loss: 3.4811 (3.3668)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6696 (0.6408)  time: 0.5949  data: 0.0005  max mem: 53905
Epoch: [130]  [1800/2502]  eta: 0:06:56  lr: 0.002646  min_lr: 0.002646  loss: 3.2806 (3.3703)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6588 (0.6416)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [130]  [2000/2502]  eta: 0:04:57  lr: 0.002644  min_lr: 0.002644  loss: 3.4779 (3.3717)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6295 (0.6392)  time: 0.5988  data: 0.0006  max mem: 53905
Epoch: [130]  [2200/2502]  eta: 0:02:59  lr: 0.002642  min_lr: 0.002642  loss: 3.2442 (3.3664)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6368 (0.6400)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [130]  [2400/2502]  eta: 0:01:00  lr: 0.002641  min_lr: 0.002641  loss: 3.3560 (3.3668)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6116 (0.6396)  time: 0.5912  data: 0.0007  max mem: 53905
Epoch: [130]  [2501/2502]  eta: 0:00:00  lr: 0.002640  min_lr: 0.002640  loss: 3.6850 (3.3663)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5756 (0.6382)  time: 0.5321  data: 0.0006  max mem: 53905
Epoch: [130] Total time: 0:24:43 (0.5930 s / it)
Averaged stats: lr: 0.002640  min_lr: 0.002640  loss: 3.6850 (3.3744)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5756 (0.6382)
Test:  [ 0/50]  eta: 0:02:43  loss: 0.6296 (0.6296)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 3.2734  data: 2.9375  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8732 (0.9455)  acc1: 83.6000 (83.7818)  acc5: 97.2000 (96.7636)  time: 0.6215  data: 0.3347  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9288 (0.9530)  acc1: 81.6000 (83.3143)  acc5: 97.2000 (96.9333)  time: 0.3195  data: 0.0374  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.1501 (1.0723)  acc1: 77.6000 (80.9419)  acc5: 94.4000 (95.4065)  time: 0.2826  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3269 (1.1317)  acc1: 75.2000 (79.3073)  acc5: 92.0000 (94.6439)  time: 0.2825  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2958 (1.1505)  acc1: 74.4000 (78.7120)  acc5: 92.0000 (94.5520)  time: 0.2841  data: 0.0001  max mem: 53905
Test: Total time: 0:00:18 (0.3602 s / it)
* Acc@1 78.770 Acc@5 94.742 loss 1.154
Accuracy of the model on the 50000 test images: 78.8%
Max accuracy: 78.77%
Epoch: [131]  [   0/2502]  eta: 1:54:55  lr: 0.002640  min_lr: 0.002640  loss: 3.8922 (3.8922)  weight_decay: 0.0500 (0.0500)  time: 2.7561  data: 2.1516  max mem: 53905
Epoch: [131]  [ 200/2502]  eta: 0:23:10  lr: 0.002638  min_lr: 0.002638  loss: 3.5203 (3.3303)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6017 (0.6511)  time: 0.6009  data: 0.0008  max mem: 53905
Epoch: [131]  [ 400/2502]  eta: 0:20:57  lr: 0.002636  min_lr: 0.002636  loss: 3.0701 (3.3257)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6313 (nan)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [131]  [ 600/2502]  eta: 0:18:54  lr: 0.002635  min_lr: 0.002635  loss: 3.5090 (3.3362)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6085 (nan)  time: 0.5978  data: 0.0007  max mem: 53905
Epoch: [131]  [ 800/2502]  eta: 0:16:53  lr: 0.002633  min_lr: 0.002633  loss: 3.4118 (3.3300)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5644 (nan)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [131]  [1000/2502]  eta: 0:14:53  lr: 0.002631  min_lr: 0.002631  loss: 3.2284 (3.3287)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5948 (nan)  time: 0.5917  data: 0.0007  max mem: 53905
Epoch: [131]  [1200/2502]  eta: 0:12:53  lr: 0.002629  min_lr: 0.002629  loss: 3.0393 (3.3214)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6513 (nan)  time: 0.5924  data: 0.0006  max mem: 53905
Epoch: [131]  [1400/2502]  eta: 0:10:54  lr: 0.002628  min_lr: 0.002628  loss: 3.6266 (3.3237)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6793 (nan)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [131]  [1600/2502]  eta: 0:08:55  lr: 0.002626  min_lr: 0.002626  loss: 3.4898 (3.3346)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6320 (nan)  time: 0.5910  data: 0.0006  max mem: 53905
Epoch: [131]  [1800/2502]  eta: 0:06:56  lr: 0.002624  min_lr: 0.002624  loss: 3.3127 (3.3375)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5829 (nan)  time: 0.5913  data: 0.0008  max mem: 53905
Epoch: [131]  [2000/2502]  eta: 0:04:57  lr: 0.002623  min_lr: 0.002623  loss: 3.4067 (3.3411)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6284 (nan)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [131]  [2200/2502]  eta: 0:02:59  lr: 0.002621  min_lr: 0.002621  loss: 3.5628 (3.3518)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6378 (nan)  time: 0.5978  data: 0.0004  max mem: 53905
Epoch: [131]  [2400/2502]  eta: 0:01:00  lr: 0.002619  min_lr: 0.002619  loss: 3.2732 (3.3521)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6119 (nan)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [131]  [2501/2502]  eta: 0:00:00  lr: 0.002618  min_lr: 0.002618  loss: 3.3748 (3.3555)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5976 (nan)  time: 0.5325  data: 0.0007  max mem: 53905
Epoch: [131] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.002618  min_lr: 0.002618  loss: 3.3748 (3.3647)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5976 (nan)
Test:  [ 0/50]  eta: 0:02:29  loss: 0.5931 (0.5931)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 2.9857  data: 2.6673  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8597 (0.9120)  acc1: 83.6000 (84.1091)  acc5: 97.2000 (97.2000)  time: 0.5577  data: 0.2716  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9054 (0.9321)  acc1: 82.8000 (83.1619)  acc5: 97.2000 (97.2762)  time: 0.2995  data: 0.0162  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1054 (1.0460)  acc1: 78.0000 (80.4387)  acc5: 95.6000 (95.6903)  time: 0.2839  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3083 (1.1154)  acc1: 75.6000 (78.8585)  acc5: 92.0000 (94.7707)  time: 0.2835  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2940 (1.1311)  acc1: 76.0000 (78.3200)  acc5: 92.4000 (94.5680)  time: 0.2833  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3461 s / it)
* Acc@1 78.364 Acc@5 94.620 loss 1.134
Accuracy of the model on the 50000 test images: 78.4%
Max accuracy: 78.77%
Epoch: [132]  [   0/2502]  eta: 1:44:52  lr: 0.002618  min_lr: 0.002618  loss: 3.0893 (3.0893)  weight_decay: 0.0500 (0.0500)  time: 2.5151  data: 1.9142  max mem: 53905
Epoch: [132]  [ 200/2502]  eta: 0:23:05  lr: 0.002617  min_lr: 0.002617  loss: 3.3515 (3.3018)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6181 (0.6353)  time: 0.5938  data: 0.0008  max mem: 53905
Epoch: [132]  [ 400/2502]  eta: 0:20:56  lr: 0.002615  min_lr: 0.002615  loss: 3.4011 (3.3160)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6578 (0.6430)  time: 0.5915  data: 0.0007  max mem: 53905
Epoch: [132]  [ 600/2502]  eta: 0:18:53  lr: 0.002613  min_lr: 0.002613  loss: 3.5908 (3.3288)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6322 (0.6427)  time: 0.5909  data: 0.0007  max mem: 53905
Epoch: [132]  [ 800/2502]  eta: 0:16:52  lr: 0.002612  min_lr: 0.002612  loss: 3.4104 (3.3277)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6052 (0.6412)  time: 0.5953  data: 0.0009  max mem: 53905
Epoch: [132]  [1000/2502]  eta: 0:14:52  lr: 0.002610  min_lr: 0.002610  loss: 3.2843 (3.3320)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5997 (0.6425)  time: 0.5906  data: 0.0007  max mem: 53905
Epoch: [132]  [1200/2502]  eta: 0:12:53  lr: 0.002608  min_lr: 0.002608  loss: 3.5000 (3.3424)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6082 (0.6438)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [132]  [1400/2502]  eta: 0:10:54  lr: 0.002606  min_lr: 0.002606  loss: 3.5213 (3.3512)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6075 (0.6431)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [132]  [1600/2502]  eta: 0:08:55  lr: 0.002605  min_lr: 0.002605  loss: 3.2650 (3.3560)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6179 (0.6419)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [132]  [1800/2502]  eta: 0:06:56  lr: 0.002603  min_lr: 0.002603  loss: 3.5583 (3.3576)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6471 (0.6431)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [132]  [2000/2502]  eta: 0:04:57  lr: 0.002601  min_lr: 0.002601  loss: 3.2983 (3.3587)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6091 (0.6432)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [132]  [2200/2502]  eta: 0:02:59  lr: 0.002600  min_lr: 0.002600  loss: 3.3288 (3.3644)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6005 (0.6400)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [132]  [2400/2502]  eta: 0:01:00  lr: 0.002598  min_lr: 0.002598  loss: 3.3741 (3.3660)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6016 (0.6383)  time: 0.5909  data: 0.0006  max mem: 53905
Epoch: [132]  [2501/2502]  eta: 0:00:00  lr: 0.002597  min_lr: 0.002597  loss: 3.5278 (3.3671)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6914 (0.6405)  time: 0.5324  data: 0.0013  max mem: 53905
Epoch: [132] Total time: 0:24:43 (0.5929 s / it)
Averaged stats: lr: 0.002597  min_lr: 0.002597  loss: 3.5278 (3.3628)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6914 (0.6405)
Test:  [ 0/50]  eta: 0:02:50  loss: 0.6160 (0.6160)  acc1: 92.0000 (92.0000)  acc5: 98.0000 (98.0000)  time: 3.4077  data: 3.0942  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8920 (0.9237)  acc1: 84.0000 (84.2545)  acc5: 97.6000 (96.8364)  time: 0.5983  data: 0.3130  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9015 (0.9450)  acc1: 83.2000 (82.8000)  acc5: 97.2000 (96.7429)  time: 0.3003  data: 0.0177  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0762 (1.0408)  acc1: 78.0000 (80.8258)  acc5: 94.8000 (95.5226)  time: 0.2833  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3184 (1.1084)  acc1: 74.8000 (79.1415)  acc5: 92.8000 (94.8390)  time: 0.2831  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3080 (1.1311)  acc1: 74.0000 (78.5120)  acc5: 93.6000 (94.7200)  time: 0.2828  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3548 s / it)
* Acc@1 78.432 Acc@5 94.770 loss 1.136
Accuracy of the model on the 50000 test images: 78.4%
Max accuracy: 78.77%
Epoch: [133]  [   0/2502]  eta: 1:52:26  lr: 0.002597  min_lr: 0.002597  loss: 4.0769 (4.0769)  weight_decay: 0.0500 (0.0500)  time: 2.6966  data: 2.1052  max mem: 53905
Epoch: [133]  [ 200/2502]  eta: 0:23:08  lr: 0.002595  min_lr: 0.002595  loss: 3.5378 (3.3125)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6074 (0.6323)  time: 0.5914  data: 0.0008  max mem: 53905
Epoch: [133]  [ 400/2502]  eta: 0:20:56  lr: 0.002594  min_lr: 0.002594  loss: 3.2303 (3.3290)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6088 (0.6412)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [133]  [ 600/2502]  eta: 0:18:54  lr: 0.002592  min_lr: 0.002592  loss: 3.2377 (3.3290)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6178 (0.6480)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [133]  [ 800/2502]  eta: 0:16:53  lr: 0.002590  min_lr: 0.002590  loss: 3.3537 (3.3375)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6595 (0.6492)  time: 0.5925  data: 0.0007  max mem: 53905
Epoch: [133]  [1000/2502]  eta: 0:14:53  lr: 0.002588  min_lr: 0.002588  loss: 3.4205 (3.3493)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5854 (0.6437)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [133]  [1200/2502]  eta: 0:12:54  lr: 0.002587  min_lr: 0.002587  loss: 3.4757 (3.3576)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6236 (0.6440)  time: 0.5921  data: 0.0007  max mem: 53905
Epoch: [133]  [1400/2502]  eta: 0:10:55  lr: 0.002585  min_lr: 0.002585  loss: 3.4951 (3.3619)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6126 (0.6449)  time: 0.5924  data: 0.0007  max mem: 53905
Epoch: [133]  [1600/2502]  eta: 0:08:56  lr: 0.002583  min_lr: 0.002583  loss: 3.3314 (3.3566)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6700 (0.6448)  time: 0.5979  data: 0.0006  max mem: 53905
Epoch: [133]  [1800/2502]  eta: 0:06:57  lr: 0.002582  min_lr: 0.002582  loss: 3.0760 (3.3571)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6247 (0.6448)  time: 0.5920  data: 0.0007  max mem: 53905
Epoch: [133]  [2000/2502]  eta: 0:04:58  lr: 0.002580  min_lr: 0.002580  loss: 3.4769 (3.3577)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6442 (0.6463)  time: 0.5923  data: 0.0006  max mem: 53905
Epoch: [133]  [2200/2502]  eta: 0:02:59  lr: 0.002578  min_lr: 0.002578  loss: 3.4708 (3.3585)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6406 (0.6455)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [133]  [2400/2502]  eta: 0:01:00  lr: 0.002576  min_lr: 0.002576  loss: 3.6292 (3.3539)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6439 (0.6434)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [133]  [2501/2502]  eta: 0:00:00  lr: 0.002576  min_lr: 0.002576  loss: 3.3494 (3.3541)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6146 (0.6429)  time: 0.5332  data: 0.0008  max mem: 53905
Epoch: [133] Total time: 0:24:45 (0.5939 s / it)
Averaged stats: lr: 0.002576  min_lr: 0.002576  loss: 3.3494 (3.3669)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6146 (0.6429)
Test:  [ 0/50]  eta: 0:02:33  loss: 0.5801 (0.5801)  acc1: 91.2000 (91.2000)  acc5: 98.8000 (98.8000)  time: 3.0698  data: 2.7277  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 0.8128 (0.8840)  acc1: 83.2000 (83.7455)  acc5: 96.8000 (96.8727)  time: 0.6393  data: 0.3506  max mem: 53905
Test:  [20/50]  eta: 0:00:14  loss: 0.9033 (0.9013)  acc1: 82.8000 (83.2000)  acc5: 96.8000 (97.1429)  time: 0.3402  data: 0.0567  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9924 (1.0020)  acc1: 78.4000 (80.9032)  acc5: 94.8000 (95.5871)  time: 0.2843  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2802 (1.0667)  acc1: 74.4000 (79.3268)  acc5: 92.0000 (94.7805)  time: 0.2839  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2539 (1.0808)  acc1: 74.4000 (78.6400)  acc5: 92.4000 (94.6880)  time: 0.2836  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3649 s / it)
* Acc@1 78.436 Acc@5 94.714 loss 1.087
Accuracy of the model on the 50000 test images: 78.4%
Max accuracy: 78.77%
Epoch: [134]  [   0/2502]  eta: 1:51:46  lr: 0.002576  min_lr: 0.002576  loss: 3.9706 (3.9706)  weight_decay: 0.0500 (0.0500)  time: 2.6806  data: 2.0757  max mem: 53905
Epoch: [134]  [ 200/2502]  eta: 0:23:08  lr: 0.002574  min_lr: 0.002574  loss: 3.4641 (3.3564)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6266 (0.6464)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [134]  [ 400/2502]  eta: 0:20:57  lr: 0.002572  min_lr: 0.002572  loss: 3.3393 (3.3438)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6486 (0.6526)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [134]  [ 600/2502]  eta: 0:18:55  lr: 0.002570  min_lr: 0.002570  loss: 3.6097 (3.3464)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6461 (0.6481)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [134]  [ 800/2502]  eta: 0:16:54  lr: 0.002569  min_lr: 0.002569  loss: 3.3862 (3.3390)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6287 (0.6467)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [134]  [1000/2502]  eta: 0:14:54  lr: 0.002567  min_lr: 0.002567  loss: 3.2011 (3.3577)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6274 (0.6455)  time: 0.5997  data: 0.0004  max mem: 53905
Epoch: [134]  [1200/2502]  eta: 0:12:54  lr: 0.002565  min_lr: 0.002565  loss: 3.2417 (3.3525)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6924 (0.6533)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [134]  [1400/2502]  eta: 0:10:55  lr: 0.002564  min_lr: 0.002564  loss: 3.4395 (3.3460)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5973 (0.6497)  time: 0.5977  data: 0.0004  max mem: 53905
Epoch: [134]  [1600/2502]  eta: 0:08:56  lr: 0.002562  min_lr: 0.002562  loss: 3.4482 (3.3496)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [134]  [1800/2502]  eta: 0:06:57  lr: 0.002560  min_lr: 0.002560  loss: 3.2929 (3.3522)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6509 (nan)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [134]  [2000/2502]  eta: 0:04:58  lr: 0.002558  min_lr: 0.002558  loss: 3.3919 (3.3510)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6147 (nan)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [134]  [2200/2502]  eta: 0:02:59  lr: 0.002557  min_lr: 0.002557  loss: 3.4428 (3.3534)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6623 (nan)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [134]  [2400/2502]  eta: 0:01:00  lr: 0.002555  min_lr: 0.002555  loss: 3.3543 (3.3517)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6248 (nan)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [134]  [2501/2502]  eta: 0:00:00  lr: 0.002554  min_lr: 0.002554  loss: 3.5584 (3.3527)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6180 (nan)  time: 0.5381  data: 0.0009  max mem: 53905
Epoch: [134] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.002554  min_lr: 0.002554  loss: 3.5584 (3.3605)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6180 (nan)
Test:  [ 0/50]  eta: 0:02:53  loss: 0.5813 (0.5813)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 3.4611  data: 3.1251  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8412 (0.8812)  acc1: 84.0000 (84.6182)  acc5: 97.6000 (97.1273)  time: 0.5855  data: 0.2974  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9001 (0.9111)  acc1: 81.6000 (83.3333)  acc5: 97.2000 (97.0095)  time: 0.2912  data: 0.0076  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0127 (0.9958)  acc1: 80.4000 (81.4194)  acc5: 94.4000 (95.8194)  time: 0.2845  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2502 (1.0604)  acc1: 74.4000 (79.5024)  acc5: 92.4000 (95.0537)  time: 0.2871  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2502 (1.0779)  acc1: 74.4000 (78.8720)  acc5: 93.2000 (94.8800)  time: 0.2899  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3552 s / it)
* Acc@1 78.784 Acc@5 94.906 loss 1.083
Accuracy of the model on the 50000 test images: 78.8%
Max accuracy: 78.78%
Epoch: [135]  [   0/2502]  eta: 1:41:40  lr: 0.002554  min_lr: 0.002554  loss: 3.7285 (3.7285)  weight_decay: 0.0500 (0.0500)  time: 2.4381  data: 1.8419  max mem: 53905
Epoch: [135]  [ 200/2502]  eta: 0:23:05  lr: 0.002552  min_lr: 0.002552  loss: 3.2074 (3.3156)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6852 (0.6807)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [135]  [ 400/2502]  eta: 0:20:55  lr: 0.002551  min_lr: 0.002551  loss: 3.1566 (3.3148)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5774 (0.6538)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [135]  [ 600/2502]  eta: 0:18:54  lr: 0.002549  min_lr: 0.002549  loss: 3.5644 (3.3124)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6345 (0.6554)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [135]  [ 800/2502]  eta: 0:16:53  lr: 0.002547  min_lr: 0.002547  loss: 3.2216 (3.3306)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5933 (0.6449)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [135]  [1000/2502]  eta: 0:14:53  lr: 0.002545  min_lr: 0.002545  loss: 3.3342 (3.3402)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6403 (0.6505)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [135]  [1200/2502]  eta: 0:12:54  lr: 0.002544  min_lr: 0.002544  loss: 3.1837 (3.3386)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6434 (0.6486)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [135]  [1400/2502]  eta: 0:10:55  lr: 0.002542  min_lr: 0.002542  loss: 3.4663 (3.3435)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6206 (0.6483)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [135]  [1600/2502]  eta: 0:08:56  lr: 0.002540  min_lr: 0.002540  loss: 3.2295 (3.3377)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6430 (0.6491)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [135]  [1800/2502]  eta: 0:06:57  lr: 0.002538  min_lr: 0.002538  loss: 3.4325 (3.3417)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6451 (0.6496)  time: 0.6001  data: 0.0004  max mem: 53905
Epoch: [135]  [2000/2502]  eta: 0:04:58  lr: 0.002537  min_lr: 0.002537  loss: 3.1602 (3.3420)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6857 (0.6525)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [135]  [2200/2502]  eta: 0:02:59  lr: 0.002535  min_lr: 0.002535  loss: 3.3195 (3.3449)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6400 (0.6526)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [135]  [2400/2502]  eta: 0:01:00  lr: 0.002533  min_lr: 0.002533  loss: 3.4984 (3.3476)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6312 (0.6524)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [135]  [2501/2502]  eta: 0:00:00  lr: 0.002532  min_lr: 0.002532  loss: 3.4380 (3.3479)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6007 (0.6518)  time: 0.5325  data: 0.0008  max mem: 53905
Epoch: [135] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.002532  min_lr: 0.002532  loss: 3.4380 (3.3492)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6007 (0.6518)
Test:  [ 0/50]  eta: 0:02:39  loss: 0.5513 (0.5513)  acc1: 92.4000 (92.4000)  acc5: 99.2000 (99.2000)  time: 3.1955  data: 2.8755  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.8483 (0.8588)  acc1: 81.6000 (83.4909)  acc5: 97.2000 (97.1273)  time: 0.5487  data: 0.2619  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8802 (0.8824)  acc1: 81.2000 (83.2762)  acc5: 97.2000 (97.2952)  time: 0.2842  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0334 (0.9743)  acc1: 78.4000 (81.3677)  acc5: 95.6000 (96.0387)  time: 0.2844  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2470 (1.0413)  acc1: 74.8000 (79.4732)  acc5: 92.8000 (95.2000)  time: 0.2842  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2470 (1.0646)  acc1: 73.2000 (78.8080)  acc5: 93.2000 (94.9920)  time: 0.2838  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3458 s / it)
* Acc@1 78.670 Acc@5 94.898 loss 1.068
Accuracy of the model on the 50000 test images: 78.7%
Max accuracy: 78.78%
Epoch: [136]  [   0/2502]  eta: 1:48:06  lr: 0.002532  min_lr: 0.002532  loss: 3.1447 (3.1447)  weight_decay: 0.0500 (0.0500)  time: 2.5924  data: 1.6528  max mem: 53905
Epoch: [136]  [ 200/2502]  eta: 0:23:08  lr: 0.002531  min_lr: 0.002531  loss: 3.5977 (3.4234)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6078 (0.6329)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [136]  [ 400/2502]  eta: 0:20:59  lr: 0.002529  min_lr: 0.002529  loss: 3.5678 (3.3953)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6504 (0.6416)  time: 0.6020  data: 0.0004  max mem: 53905
Epoch: [136]  [ 600/2502]  eta: 0:18:55  lr: 0.002527  min_lr: 0.002527  loss: 3.5452 (3.3902)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6318 (0.6398)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [136]  [ 800/2502]  eta: 0:16:54  lr: 0.002526  min_lr: 0.002526  loss: 3.3430 (3.3868)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6543 (0.6469)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [136]  [1000/2502]  eta: 0:14:54  lr: 0.002524  min_lr: 0.002524  loss: 3.3732 (3.3776)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6003 (0.6426)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [136]  [1200/2502]  eta: 0:12:54  lr: 0.002522  min_lr: 0.002522  loss: 3.5097 (3.3632)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5795 (0.6444)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [136]  [1400/2502]  eta: 0:10:55  lr: 0.002520  min_lr: 0.002520  loss: 3.3145 (3.3620)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6314 (0.6447)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [136]  [1600/2502]  eta: 0:08:56  lr: 0.002519  min_lr: 0.002519  loss: 3.5068 (3.3691)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6569 (0.6467)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [136]  [1800/2502]  eta: 0:06:57  lr: 0.002517  min_lr: 0.002517  loss: 3.4331 (3.3708)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6422 (0.6471)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [136]  [2000/2502]  eta: 0:04:58  lr: 0.002515  min_lr: 0.002515  loss: 3.1318 (3.3663)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6022 (0.6453)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [136]  [2200/2502]  eta: 0:02:59  lr: 0.002513  min_lr: 0.002513  loss: 3.1978 (3.3675)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6111 (0.6462)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [136]  [2400/2502]  eta: 0:01:00  lr: 0.002512  min_lr: 0.002512  loss: 3.2203 (3.3630)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6133 (0.6462)  time: 0.6075  data: 0.0005  max mem: 53905
Epoch: [136]  [2501/2502]  eta: 0:00:00  lr: 0.002511  min_lr: 0.002511  loss: 3.5031 (3.3665)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6363 (0.6459)  time: 0.5324  data: 0.0009  max mem: 53905
Epoch: [136] Total time: 0:24:45 (0.5938 s / it)
Averaged stats: lr: 0.002511  min_lr: 0.002511  loss: 3.5031 (3.3530)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6363 (0.6459)
Test:  [ 0/50]  eta: 0:03:06  loss: 0.6652 (0.6652)  acc1: 92.0000 (92.0000)  acc5: 99.2000 (99.2000)  time: 3.7343  data: 3.4174  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.9972 (1.0162)  acc1: 84.0000 (83.6727)  acc5: 97.2000 (96.9818)  time: 0.5966  data: 0.3111  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 1.0081 (1.0368)  acc1: 80.4000 (82.9143)  acc5: 97.2000 (97.0476)  time: 0.2831  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1782 (1.1351)  acc1: 78.0000 (80.6194)  acc5: 95.6000 (95.7161)  time: 0.2833  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3905 (1.1996)  acc1: 74.0000 (78.8781)  acc5: 92.4000 (94.8195)  time: 0.2830  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3795 (1.2142)  acc1: 74.0000 (78.3360)  acc5: 93.6000 (94.7120)  time: 0.2828  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3548 s / it)
* Acc@1 78.350 Acc@5 94.814 loss 1.214
Accuracy of the model on the 50000 test images: 78.4%
Max accuracy: 78.78%
Epoch: [137]  [   0/2502]  eta: 1:57:06  lr: 0.002511  min_lr: 0.002511  loss: 3.1651 (3.1651)  weight_decay: 0.0500 (0.0500)  time: 2.8084  data: 2.0836  max mem: 53905
Epoch: [137]  [ 200/2502]  eta: 0:23:07  lr: 0.002509  min_lr: 0.002509  loss: 3.1564 (3.2926)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6641 (0.6833)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [137]  [ 400/2502]  eta: 0:20:55  lr: 0.002507  min_lr: 0.002507  loss: 3.3403 (3.3337)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6224 (0.6679)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [137]  [ 600/2502]  eta: 0:18:53  lr: 0.002506  min_lr: 0.002506  loss: 3.2018 (3.3170)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6365 (0.6576)  time: 0.5911  data: 0.0008  max mem: 53905
Epoch: [137]  [ 800/2502]  eta: 0:16:52  lr: 0.002504  min_lr: 0.002504  loss: 3.5427 (3.3246)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6067 (0.6522)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [137]  [1000/2502]  eta: 0:14:52  lr: 0.002502  min_lr: 0.002502  loss: 3.1991 (3.3255)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6681 (0.6504)  time: 0.5999  data: 0.0004  max mem: 53905
Epoch: [137]  [1200/2502]  eta: 0:12:53  lr: 0.002500  min_lr: 0.002500  loss: 3.3798 (3.3250)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6076 (0.6488)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [137]  [1400/2502]  eta: 0:10:54  lr: 0.002499  min_lr: 0.002499  loss: 3.4231 (3.3305)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6275 (0.6544)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [137]  [1600/2502]  eta: 0:08:55  lr: 0.002497  min_lr: 0.002497  loss: 3.3577 (3.3396)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5742 (0.6528)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [137]  [1800/2502]  eta: 0:06:56  lr: 0.002495  min_lr: 0.002495  loss: 3.5053 (3.3427)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6905 (0.6563)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [137]  [2000/2502]  eta: 0:04:57  lr: 0.002493  min_lr: 0.002493  loss: 3.2809 (3.3398)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6242 (0.6548)  time: 0.5916  data: 0.0003  max mem: 53905
Epoch: [137]  [2200/2502]  eta: 0:02:59  lr: 0.002492  min_lr: 0.002492  loss: 3.3173 (3.3394)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6186 (0.6541)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [137]  [2400/2502]  eta: 0:01:00  lr: 0.002490  min_lr: 0.002490  loss: 3.4402 (3.3449)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6084 (0.6534)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [137]  [2501/2502]  eta: 0:00:00  lr: 0.002489  min_lr: 0.002489  loss: 3.1348 (3.3437)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6538 (0.6537)  time: 0.5326  data: 0.0006  max mem: 53905
Epoch: [137] Total time: 0:24:43 (0.5929 s / it)
Averaged stats: lr: 0.002489  min_lr: 0.002489  loss: 3.1348 (3.3508)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6538 (0.6537)
Test:  [ 0/50]  eta: 0:02:59  loss: 0.5146 (0.5146)  acc1: 92.4000 (92.4000)  acc5: 99.2000 (99.2000)  time: 3.5808  data: 3.2785  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7077 (0.7931)  acc1: 85.2000 (84.3636)  acc5: 97.6000 (97.0182)  time: 0.5814  data: 0.2984  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7943 (0.8090)  acc1: 83.6000 (83.5238)  acc5: 97.6000 (97.2952)  time: 0.2815  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9413 (0.9150)  acc1: 80.0000 (81.1226)  acc5: 95.2000 (95.8968)  time: 0.2818  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1616 (0.9853)  acc1: 74.8000 (79.2585)  acc5: 92.4000 (95.1317)  time: 0.2818  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1616 (0.9990)  acc1: 74.0000 (78.6640)  acc5: 92.8000 (94.9920)  time: 0.2817  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3506 s / it)
* Acc@1 78.822 Acc@5 95.022 loss 1.001
Accuracy of the model on the 50000 test images: 78.8%
Max accuracy: 78.82%
Epoch: [138]  [   0/2502]  eta: 1:47:53  lr: 0.002489  min_lr: 0.002489  loss: 2.9109 (2.9109)  weight_decay: 0.0500 (0.0500)  time: 2.5872  data: 1.9827  max mem: 53905
Epoch: [138]  [ 200/2502]  eta: 0:23:05  lr: 0.002487  min_lr: 0.002487  loss: 3.4520 (3.3765)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6594 (0.6527)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [138]  [ 400/2502]  eta: 0:20:56  lr: 0.002486  min_lr: 0.002486  loss: 3.2572 (3.3381)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6636 (0.6462)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [138]  [ 600/2502]  eta: 0:18:53  lr: 0.002484  min_lr: 0.002484  loss: 3.3634 (3.3312)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6316 (0.6495)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [138]  [ 800/2502]  eta: 0:16:53  lr: 0.002482  min_lr: 0.002482  loss: 3.3472 (3.3410)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6103 (0.6503)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [138]  [1000/2502]  eta: 0:14:53  lr: 0.002480  min_lr: 0.002480  loss: 3.4298 (3.3425)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7184 (0.6569)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [138]  [1200/2502]  eta: 0:12:53  lr: 0.002479  min_lr: 0.002479  loss: 3.5494 (3.3420)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6362 (0.6576)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [138]  [1400/2502]  eta: 0:10:54  lr: 0.002477  min_lr: 0.002477  loss: 3.4819 (3.3469)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6107 (0.6588)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [138]  [1600/2502]  eta: 0:08:55  lr: 0.002475  min_lr: 0.002475  loss: 3.1786 (3.3458)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6415 (0.6592)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [138]  [1800/2502]  eta: 0:06:56  lr: 0.002473  min_lr: 0.002473  loss: 3.2628 (3.3422)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6443 (0.6618)  time: 0.6044  data: 0.0005  max mem: 53905
Epoch: [138]  [2000/2502]  eta: 0:04:58  lr: 0.002472  min_lr: 0.002472  loss: 3.5024 (3.3398)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6426 (0.6594)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [138]  [2200/2502]  eta: 0:02:59  lr: 0.002470  min_lr: 0.002470  loss: 3.1163 (3.3369)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6687 (0.6613)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [138]  [2400/2502]  eta: 0:01:00  lr: 0.002468  min_lr: 0.002468  loss: 3.2611 (3.3354)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6106 (0.6610)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [138]  [2501/2502]  eta: 0:00:00  lr: 0.002467  min_lr: 0.002467  loss: 3.3551 (3.3365)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5894 (0.6591)  time: 0.5333  data: 0.0008  max mem: 53905
Epoch: [138] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.002467  min_lr: 0.002467  loss: 3.3551 (3.3435)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5894 (0.6591)
Test:  [ 0/50]  eta: 0:03:06  loss: 0.6451 (0.6451)  acc1: 93.2000 (93.2000)  acc5: 98.0000 (98.0000)  time: 3.7207  data: 3.3959  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8823 (0.8988)  acc1: 82.8000 (83.8909)  acc5: 97.6000 (97.0546)  time: 0.5972  data: 0.3091  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9016 (0.9102)  acc1: 82.4000 (83.3524)  acc5: 97.2000 (97.0476)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0990 (1.0211)  acc1: 78.4000 (80.9936)  acc5: 95.2000 (95.6387)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2725 (1.0844)  acc1: 74.8000 (79.5610)  acc5: 92.8000 (95.0342)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2387 (1.1016)  acc1: 74.8000 (78.8720)  acc5: 93.2000 (94.9120)  time: 0.2844  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3565 s / it)
* Acc@1 78.822 Acc@5 94.980 loss 1.101
Accuracy of the model on the 50000 test images: 78.8%
Max accuracy: 78.82%
Epoch: [139]  [   0/2502]  eta: 1:41:54  lr: 0.002467  min_lr: 0.002467  loss: 3.4812 (3.4812)  weight_decay: 0.0500 (0.0500)  time: 2.4439  data: 1.8542  max mem: 53905
Epoch: [139]  [ 200/2502]  eta: 0:23:08  lr: 0.002466  min_lr: 0.002466  loss: 3.6378 (3.3154)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6396 (0.6398)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [139]  [ 400/2502]  eta: 0:20:56  lr: 0.002464  min_lr: 0.002464  loss: 3.3582 (3.3134)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6555 (0.6485)  time: 0.6033  data: 0.0004  max mem: 53905
Epoch: [139]  [ 600/2502]  eta: 0:18:53  lr: 0.002462  min_lr: 0.002462  loss: 3.3761 (3.3295)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6059 (0.6499)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [139]  [ 800/2502]  eta: 0:16:52  lr: 0.002460  min_lr: 0.002460  loss: 3.4503 (3.3210)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6475 (0.6555)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [139]  [1000/2502]  eta: 0:14:52  lr: 0.002459  min_lr: 0.002459  loss: 3.7393 (3.3314)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6538 (0.6603)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [139]  [1200/2502]  eta: 0:12:53  lr: 0.002457  min_lr: 0.002457  loss: 3.3271 (3.3251)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6024 (0.6588)  time: 0.5968  data: 0.0005  max mem: 53905
Epoch: [139]  [1400/2502]  eta: 0:10:54  lr: 0.002455  min_lr: 0.002455  loss: 3.6015 (3.3239)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6533 (0.6602)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [139]  [1600/2502]  eta: 0:08:55  lr: 0.002453  min_lr: 0.002453  loss: 3.5393 (3.3233)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6684 (0.6655)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [139]  [1800/2502]  eta: 0:06:56  lr: 0.002452  min_lr: 0.002452  loss: 3.3492 (3.3277)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6831 (0.6680)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [139]  [2000/2502]  eta: 0:04:58  lr: 0.002450  min_lr: 0.002450  loss: 3.3408 (3.3313)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6273 (0.6661)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [139]  [2200/2502]  eta: 0:02:59  lr: 0.002448  min_lr: 0.002448  loss: 3.3705 (3.3326)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6069 (0.6627)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [139]  [2400/2502]  eta: 0:01:00  lr: 0.002446  min_lr: 0.002446  loss: 3.5150 (3.3353)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6148 (0.6608)  time: 0.6052  data: 0.0005  max mem: 53905
Epoch: [139]  [2501/2502]  eta: 0:00:00  lr: 0.002445  min_lr: 0.002445  loss: 3.1948 (3.3370)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6320 (0.6613)  time: 0.5356  data: 0.0009  max mem: 53905
Epoch: [139] Total time: 0:24:45 (0.5938 s / it)
Averaged stats: lr: 0.002445  min_lr: 0.002445  loss: 3.1948 (3.3474)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6320 (0.6613)
Test:  [ 0/50]  eta: 0:02:45  loss: 0.5872 (0.5872)  acc1: 91.2000 (91.2000)  acc5: 98.4000 (98.4000)  time: 3.3134  data: 2.9888  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.9177 (0.8820)  acc1: 83.6000 (84.5455)  acc5: 97.2000 (96.6546)  time: 0.5898  data: 0.3040  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9177 (0.9087)  acc1: 81.6000 (83.5048)  acc5: 97.2000 (97.0476)  time: 0.3001  data: 0.0180  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1028 (1.0016)  acc1: 79.6000 (81.4194)  acc5: 95.2000 (95.7290)  time: 0.2826  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2378 (1.0646)  acc1: 75.6000 (79.6878)  acc5: 92.0000 (94.9073)  time: 0.2823  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2081 (1.0773)  acc1: 74.4000 (79.1440)  acc5: 93.2000 (94.8000)  time: 0.2821  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3522 s / it)
* Acc@1 78.864 Acc@5 94.886 loss 1.080
Accuracy of the model on the 50000 test images: 78.9%
Max accuracy: 78.86%
Epoch: [140]  [   0/2502]  eta: 1:32:18  lr: 0.002445  min_lr: 0.002445  loss: 2.8820 (2.8820)  weight_decay: 0.0500 (0.0500)  time: 2.2137  data: 1.6204  max mem: 53905
Epoch: [140]  [ 200/2502]  eta: 0:23:02  lr: 0.002444  min_lr: 0.002444  loss: 3.3237 (3.3352)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6387 (0.6760)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [140]  [ 400/2502]  eta: 0:20:56  lr: 0.002442  min_lr: 0.002442  loss: 3.3500 (3.3456)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6337 (0.6566)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [140]  [ 600/2502]  eta: 0:18:53  lr: 0.002440  min_lr: 0.002440  loss: 3.0986 (3.3372)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6444 (0.6584)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [140]  [ 800/2502]  eta: 0:16:53  lr: 0.002438  min_lr: 0.002438  loss: 3.5502 (3.3396)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6753 (0.6589)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [140]  [1000/2502]  eta: 0:14:53  lr: 0.002437  min_lr: 0.002437  loss: 3.2572 (3.3417)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6583 (0.6608)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [140]  [1200/2502]  eta: 0:12:54  lr: 0.002435  min_lr: 0.002435  loss: 3.5884 (3.3331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6334 (0.6583)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [140]  [1400/2502]  eta: 0:10:55  lr: 0.002433  min_lr: 0.002433  loss: 3.4211 (3.3349)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6585 (0.6577)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [140]  [1600/2502]  eta: 0:08:56  lr: 0.002431  min_lr: 0.002431  loss: 3.4603 (3.3345)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6584 (0.6572)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [140]  [1800/2502]  eta: 0:06:57  lr: 0.002430  min_lr: 0.002430  loss: 3.6005 (3.3378)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6564 (0.6587)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [140]  [2000/2502]  eta: 0:04:58  lr: 0.002428  min_lr: 0.002428  loss: 3.5904 (3.3363)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6386 (0.6584)  time: 0.5987  data: 0.0004  max mem: 53905
Epoch: [140]  [2200/2502]  eta: 0:02:59  lr: 0.002426  min_lr: 0.002426  loss: 3.3911 (3.3384)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6793 (0.6608)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [140]  [2400/2502]  eta: 0:01:00  lr: 0.002424  min_lr: 0.002424  loss: 3.3613 (3.3426)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6901 (0.6628)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [140]  [2501/2502]  eta: 0:00:00  lr: 0.002424  min_lr: 0.002424  loss: 3.5641 (3.3432)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6345 (0.6628)  time: 0.5394  data: 0.0009  max mem: 53905
Epoch: [140] Total time: 0:24:45 (0.5936 s / it)
Averaged stats: lr: 0.002424  min_lr: 0.002424  loss: 3.5641 (3.3385)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6345 (0.6628)
Test:  [ 0/50]  eta: 0:02:39  loss: 0.5598 (0.5598)  acc1: 92.4000 (92.4000)  acc5: 99.2000 (99.2000)  time: 3.1921  data: 2.8565  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8790 (0.9288)  acc1: 83.6000 (84.5455)  acc5: 97.6000 (97.0545)  time: 0.6061  data: 0.3178  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9623 (0.9542)  acc1: 82.0000 (83.2191)  acc5: 97.2000 (96.9714)  time: 0.3162  data: 0.0322  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0776 (1.0587)  acc1: 78.8000 (81.0581)  acc5: 94.8000 (95.7032)  time: 0.2847  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3035 (1.1230)  acc1: 75.2000 (79.3171)  acc5: 92.8000 (95.0829)  time: 0.2843  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3026 (1.1397)  acc1: 74.8000 (78.6640)  acc5: 93.6000 (94.9680)  time: 0.2841  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3580 s / it)
* Acc@1 78.830 Acc@5 94.988 loss 1.139
Accuracy of the model on the 50000 test images: 78.8%
Max accuracy: 78.86%
Epoch: [141]  [   0/2502]  eta: 1:56:42  lr: 0.002424  min_lr: 0.002424  loss: 3.9288 (3.9288)  weight_decay: 0.0500 (0.0500)  time: 2.7989  data: 1.9233  max mem: 53905
Epoch: [141]  [ 200/2502]  eta: 0:23:07  lr: 0.002422  min_lr: 0.002422  loss: 3.3654 (3.3189)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6393 (0.6698)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [141]  [ 400/2502]  eta: 0:20:56  lr: 0.002420  min_lr: 0.002420  loss: 3.4363 (3.3027)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7204 (0.6751)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [141]  [ 600/2502]  eta: 0:18:54  lr: 0.002418  min_lr: 0.002418  loss: 3.5433 (3.3173)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6168 (0.6695)  time: 0.5968  data: 0.0004  max mem: 53905
Epoch: [141]  [ 800/2502]  eta: 0:16:53  lr: 0.002417  min_lr: 0.002417  loss: 3.3848 (3.3118)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6872 (0.6668)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [141]  [1000/2502]  eta: 0:14:53  lr: 0.002415  min_lr: 0.002415  loss: 3.1777 (3.3102)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5803 (0.6608)  time: 0.6011  data: 0.0004  max mem: 53905
Epoch: [141]  [1200/2502]  eta: 0:12:54  lr: 0.002413  min_lr: 0.002413  loss: 3.3911 (3.3203)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6456 (0.6618)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [141]  [1400/2502]  eta: 0:10:54  lr: 0.002411  min_lr: 0.002411  loss: 3.5529 (3.3274)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6756 (0.6630)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [141]  [1600/2502]  eta: 0:08:55  lr: 0.002409  min_lr: 0.002409  loss: 3.6238 (3.3254)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6835 (0.6669)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [141]  [1800/2502]  eta: 0:06:57  lr: 0.002408  min_lr: 0.002408  loss: 3.4655 (3.3255)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6619 (0.6670)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [141]  [2000/2502]  eta: 0:04:58  lr: 0.002406  min_lr: 0.002406  loss: 3.2425 (3.3271)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6758 (0.6662)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [141]  [2200/2502]  eta: 0:02:59  lr: 0.002404  min_lr: 0.002404  loss: 3.3854 (3.3249)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [141]  [2400/2502]  eta: 0:01:00  lr: 0.002402  min_lr: 0.002402  loss: 2.8591 (3.3177)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6559 (nan)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [141]  [2501/2502]  eta: 0:00:00  lr: 0.002402  min_lr: 0.002402  loss: 3.5681 (3.3194)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6620 (nan)  time: 0.5334  data: 0.0007  max mem: 53905
Epoch: [141] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.002402  min_lr: 0.002402  loss: 3.5681 (3.3314)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6620 (nan)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.6170 (0.6170)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 3.0063  data: 2.6934  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.9016 (0.9544)  acc1: 83.2000 (84.2182)  acc5: 97.6000 (97.2000)  time: 0.5666  data: 0.2800  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 1.0181 (0.9849)  acc1: 82.4000 (83.3714)  acc5: 97.2000 (97.1810)  time: 0.3039  data: 0.0196  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1070 (1.0778)  acc1: 78.8000 (81.4968)  acc5: 95.6000 (95.8323)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3425 (1.1443)  acc1: 76.4000 (79.7561)  acc5: 92.8000 (95.2293)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.3556 (1.1655)  acc1: 73.6000 (78.9520)  acc5: 92.8000 (94.9120)  time: 0.2845  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3493 s / it)
* Acc@1 78.890 Acc@5 94.986 loss 1.164
Accuracy of the model on the 50000 test images: 78.9%
Max accuracy: 78.89%
Epoch: [142]  [   0/2502]  eta: 1:38:37  lr: 0.002402  min_lr: 0.002402  loss: 3.4532 (3.4532)  weight_decay: 0.0500 (0.0500)  time: 2.3650  data: 1.7572  max mem: 53905
Epoch: [142]  [ 200/2502]  eta: 0:23:07  lr: 0.002400  min_lr: 0.002400  loss: 3.5358 (3.2881)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6204 (0.6587)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [142]  [ 400/2502]  eta: 0:20:56  lr: 0.002398  min_lr: 0.002398  loss: 3.4264 (3.2811)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6313 (0.6564)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [142]  [ 600/2502]  eta: 0:18:54  lr: 0.002396  min_lr: 0.002396  loss: 3.5313 (3.2954)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6635 (0.6678)  time: 0.5934  data: 0.0005  max mem: 53905
Epoch: [142]  [ 800/2502]  eta: 0:16:53  lr: 0.002395  min_lr: 0.002395  loss: 3.2705 (3.2982)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6563 (0.6690)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [142]  [1000/2502]  eta: 0:14:53  lr: 0.002393  min_lr: 0.002393  loss: 3.6264 (3.3004)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5951 (0.6711)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [142]  [1200/2502]  eta: 0:12:53  lr: 0.002391  min_lr: 0.002391  loss: 3.5759 (3.3126)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6746 (0.6728)  time: 0.5993  data: 0.0005  max mem: 53905
Epoch: [142]  [1400/2502]  eta: 0:10:54  lr: 0.002389  min_lr: 0.002389  loss: 3.3739 (3.3182)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7049 (0.6779)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [142]  [1600/2502]  eta: 0:08:55  lr: 0.002388  min_lr: 0.002388  loss: 3.3000 (3.3111)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6452 (0.6780)  time: 0.5927  data: 0.0006  max mem: 53905
Epoch: [142]  [1800/2502]  eta: 0:06:56  lr: 0.002386  min_lr: 0.002386  loss: 3.5621 (3.3140)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6893 (0.6788)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [142]  [2000/2502]  eta: 0:04:58  lr: 0.002384  min_lr: 0.002384  loss: 3.4342 (3.3164)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6474 (0.6771)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [142]  [2200/2502]  eta: 0:02:59  lr: 0.002382  min_lr: 0.002382  loss: 3.4820 (3.3151)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6980 (0.6755)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [142]  [2400/2502]  eta: 0:01:00  lr: 0.002380  min_lr: 0.002380  loss: 3.3967 (3.3188)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6267 (0.6739)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [142]  [2501/2502]  eta: 0:00:00  lr: 0.002380  min_lr: 0.002380  loss: 3.4254 (3.3150)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6026 (0.6730)  time: 0.5331  data: 0.0007  max mem: 53905
Epoch: [142] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.002380  min_lr: 0.002380  loss: 3.4254 (3.3326)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6026 (0.6730)
Test:  [ 0/50]  eta: 0:02:37  loss: 0.5795 (0.5795)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 3.1539  data: 2.8319  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8280 (0.8659)  acc1: 84.4000 (84.6182)  acc5: 97.6000 (97.3091)  time: 0.5582  data: 0.2712  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8973 (0.8831)  acc1: 83.2000 (83.7905)  acc5: 97.2000 (97.2191)  time: 0.2914  data: 0.0078  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9916 (0.9846)  acc1: 79.6000 (81.6387)  acc5: 94.8000 (96.0129)  time: 0.2843  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2592 (1.0565)  acc1: 75.2000 (79.6390)  acc5: 92.8000 (95.2781)  time: 0.2842  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2592 (1.0753)  acc1: 73.6000 (79.0880)  acc5: 93.2000 (95.0800)  time: 0.2841  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3470 s / it)
* Acc@1 79.022 Acc@5 95.072 loss 1.081
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.02%
Epoch: [143]  [   0/2502]  eta: 1:34:37  lr: 0.002380  min_lr: 0.002380  loss: 3.2349 (3.2349)  weight_decay: 0.0500 (0.0500)  time: 2.2693  data: 1.6605  max mem: 53905
Epoch: [143]  [ 200/2502]  eta: 0:23:05  lr: 0.002378  min_lr: 0.002378  loss: 3.3552 (3.2927)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6279 (inf)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [143]  [ 400/2502]  eta: 0:20:55  lr: 0.002376  min_lr: 0.002376  loss: 3.0065 (3.2877)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6521 (inf)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [143]  [ 600/2502]  eta: 0:18:53  lr: 0.002374  min_lr: 0.002374  loss: 3.3039 (3.2925)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6751 (inf)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [143]  [ 800/2502]  eta: 0:16:52  lr: 0.002373  min_lr: 0.002373  loss: 3.4707 (3.2996)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6347 (inf)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [143]  [1000/2502]  eta: 0:14:53  lr: 0.002371  min_lr: 0.002371  loss: 3.4176 (3.3076)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6822 (inf)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [143]  [1200/2502]  eta: 0:12:53  lr: 0.002369  min_lr: 0.002369  loss: 3.3330 (3.3153)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6321 (inf)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [143]  [1400/2502]  eta: 0:10:54  lr: 0.002367  min_lr: 0.002367  loss: 3.4200 (3.3195)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6637 (inf)  time: 0.5942  data: 0.0005  max mem: 53905
Epoch: [143]  [1600/2502]  eta: 0:08:56  lr: 0.002365  min_lr: 0.002365  loss: 3.4576 (3.3254)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6373 (inf)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [143]  [1800/2502]  eta: 0:06:57  lr: 0.002364  min_lr: 0.002364  loss: 3.3229 (3.3237)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6445 (inf)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [143]  [2000/2502]  eta: 0:04:58  lr: 0.002362  min_lr: 0.002362  loss: 3.3091 (3.3246)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6074 (inf)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [143]  [2200/2502]  eta: 0:02:59  lr: 0.002360  min_lr: 0.002360  loss: 3.6055 (3.3304)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6645 (inf)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [143]  [2400/2502]  eta: 0:01:00  lr: 0.002358  min_lr: 0.002358  loss: 3.2884 (3.3351)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6931 (inf)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [143]  [2501/2502]  eta: 0:00:00  lr: 0.002358  min_lr: 0.002358  loss: 3.2575 (3.3359)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6135 (inf)  time: 0.5337  data: 0.0007  max mem: 53905
Epoch: [143] Total time: 0:24:45 (0.5938 s / it)
Averaged stats: lr: 0.002358  min_lr: 0.002358  loss: 3.2575 (3.3236)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6135 (inf)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.5636 (0.5636)  acc1: 92.8000 (92.8000)  acc5: 99.2000 (99.2000)  time: 3.0522  data: 2.7281  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.8693 (0.8840)  acc1: 84.4000 (84.1455)  acc5: 97.2000 (97.0546)  time: 0.5492  data: 0.2619  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9188 (0.8996)  acc1: 82.4000 (83.7714)  acc5: 97.2000 (97.2381)  time: 0.2917  data: 0.0079  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0362 (0.9969)  acc1: 79.2000 (81.3677)  acc5: 95.6000 (95.9484)  time: 0.2846  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2317 (1.0591)  acc1: 75.6000 (79.7756)  acc5: 92.4000 (95.2098)  time: 0.2843  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2357 (1.0779)  acc1: 74.8000 (79.0240)  acc5: 94.0000 (95.0720)  time: 0.2840  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3450 s / it)
* Acc@1 79.036 Acc@5 95.052 loss 1.080
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.04%
Epoch: [144]  [   0/2502]  eta: 1:48:18  lr: 0.002358  min_lr: 0.002358  loss: 3.5676 (3.5676)  weight_decay: 0.0500 (0.0500)  time: 2.5973  data: 1.9861  max mem: 53905
Epoch: [144]  [ 200/2502]  eta: 0:23:08  lr: 0.002356  min_lr: 0.002356  loss: 3.3447 (3.2779)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6505 (0.6861)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [144]  [ 400/2502]  eta: 0:20:56  lr: 0.002354  min_lr: 0.002354  loss: 3.3098 (3.3152)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6169 (0.6669)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [144]  [ 600/2502]  eta: 0:18:53  lr: 0.002352  min_lr: 0.002352  loss: 3.5202 (3.3105)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6579 (0.6703)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [144]  [ 800/2502]  eta: 0:16:52  lr: 0.002350  min_lr: 0.002350  loss: 3.2214 (3.3113)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6943 (0.6834)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [144]  [1000/2502]  eta: 0:14:53  lr: 0.002349  min_lr: 0.002349  loss: 3.5052 (3.3139)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6448 (0.6762)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [144]  [1200/2502]  eta: 0:12:53  lr: 0.002347  min_lr: 0.002347  loss: 3.4911 (3.3151)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6328 (0.6756)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [144]  [1400/2502]  eta: 0:10:54  lr: 0.002345  min_lr: 0.002345  loss: 3.6584 (3.3166)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6623 (0.6730)  time: 0.5983  data: 0.0004  max mem: 53905
Epoch: [144]  [1600/2502]  eta: 0:08:55  lr: 0.002343  min_lr: 0.002343  loss: 3.4941 (3.3175)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6309 (0.6705)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [144]  [1800/2502]  eta: 0:06:56  lr: 0.002342  min_lr: 0.002342  loss: 3.2892 (3.3169)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6356 (0.6685)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [144]  [2000/2502]  eta: 0:04:58  lr: 0.002340  min_lr: 0.002340  loss: 3.2538 (3.3162)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6656 (0.6673)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [144]  [2200/2502]  eta: 0:02:59  lr: 0.002338  min_lr: 0.002338  loss: 3.4184 (3.3150)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6515 (0.6673)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [144]  [2400/2502]  eta: 0:01:00  lr: 0.002336  min_lr: 0.002336  loss: 3.1367 (3.3190)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6277 (0.6701)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [144]  [2501/2502]  eta: 0:00:00  lr: 0.002335  min_lr: 0.002335  loss: 3.5965 (3.3225)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6361 (0.6696)  time: 0.5356  data: 0.0009  max mem: 53905
Epoch: [144] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.002335  min_lr: 0.002335  loss: 3.5965 (3.3167)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6361 (0.6696)
Test:  [ 0/50]  eta: 0:03:08  loss: 0.6224 (0.6224)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 3.7733  data: 3.4536  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8375 (0.8928)  acc1: 83.6000 (85.2364)  acc5: 97.6000 (97.2727)  time: 0.6019  data: 0.3144  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9223 (0.9110)  acc1: 82.4000 (84.4381)  acc5: 97.6000 (97.2571)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0476 (1.0148)  acc1: 80.4000 (82.0516)  acc5: 95.2000 (95.9613)  time: 0.2852  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3087 (1.0835)  acc1: 75.2000 (80.2829)  acc5: 93.2000 (95.3073)  time: 0.2847  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2775 (1.1051)  acc1: 75.6000 (79.4160)  acc5: 93.6000 (95.0960)  time: 0.2842  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3566 s / it)
* Acc@1 79.270 Acc@5 95.106 loss 1.111
Accuracy of the model on the 50000 test images: 79.3%
Max accuracy: 79.27%
Epoch: [145]  [   0/2502]  eta: 1:39:13  lr: 0.002335  min_lr: 0.002335  loss: 4.0150 (4.0150)  weight_decay: 0.0500 (0.0500)  time: 2.3793  data: 1.7737  max mem: 53905
Epoch: [145]  [ 200/2502]  eta: 0:23:02  lr: 0.002334  min_lr: 0.002334  loss: 3.4408 (3.2881)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6232 (0.6678)  time: 0.5912  data: 0.0009  max mem: 53905
Epoch: [145]  [ 400/2502]  eta: 0:20:56  lr: 0.002332  min_lr: 0.002332  loss: 3.4043 (3.3148)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6964 (0.6701)  time: 0.5992  data: 0.0006  max mem: 53905
Epoch: [145]  [ 600/2502]  eta: 0:18:53  lr: 0.002330  min_lr: 0.002330  loss: 3.1795 (3.2996)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6661 (0.6762)  time: 0.5928  data: 0.0007  max mem: 53905
Epoch: [145]  [ 800/2502]  eta: 0:16:52  lr: 0.002328  min_lr: 0.002328  loss: 3.1873 (3.3140)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7211 (0.6758)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [145]  [1000/2502]  eta: 0:14:53  lr: 0.002327  min_lr: 0.002327  loss: 3.3493 (3.3011)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6885 (0.6833)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [145]  [1200/2502]  eta: 0:12:54  lr: 0.002325  min_lr: 0.002325  loss: 3.3265 (3.3067)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5876 (0.6791)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [145]  [1400/2502]  eta: 0:10:54  lr: 0.002323  min_lr: 0.002323  loss: 3.3271 (3.3144)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6703 (0.6776)  time: 0.5942  data: 0.0008  max mem: 53905
Epoch: [145]  [1600/2502]  eta: 0:08:55  lr: 0.002321  min_lr: 0.002321  loss: 3.0497 (3.3085)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6481 (0.6774)  time: 0.5907  data: 0.0006  max mem: 53905
Epoch: [145]  [1800/2502]  eta: 0:06:56  lr: 0.002319  min_lr: 0.002319  loss: 3.4476 (3.3086)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6263 (0.6758)  time: 0.5928  data: 0.0006  max mem: 53905
Epoch: [145]  [2000/2502]  eta: 0:04:58  lr: 0.002318  min_lr: 0.002318  loss: 3.2410 (3.3065)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6333 (0.6758)  time: 0.5924  data: 0.0008  max mem: 53905
Epoch: [145]  [2200/2502]  eta: 0:02:59  lr: 0.002316  min_lr: 0.002316  loss: 3.5066 (3.3086)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6508 (0.6758)  time: 0.5927  data: 0.0007  max mem: 53905
Epoch: [145]  [2400/2502]  eta: 0:01:00  lr: 0.002314  min_lr: 0.002314  loss: 3.2339 (3.3109)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6579 (0.6732)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [145]  [2501/2502]  eta: 0:00:00  lr: 0.002313  min_lr: 0.002313  loss: 3.3812 (3.3110)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6287 (0.6722)  time: 0.5340  data: 0.0009  max mem: 53905
Epoch: [145] Total time: 0:24:45 (0.5936 s / it)
Averaged stats: lr: 0.002313  min_lr: 0.002313  loss: 3.3812 (3.3089)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6287 (0.6722)
Test:  [ 0/50]  eta: 0:02:51  loss: 0.5446 (0.5446)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 3.4270  data: 3.0880  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8759 (0.8872)  acc1: 84.8000 (84.8364)  acc5: 97.2000 (96.9818)  time: 0.5706  data: 0.2812  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8776 (0.9039)  acc1: 83.6000 (83.6571)  acc5: 97.2000 (97.2000)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0175 (1.0096)  acc1: 79.2000 (81.5226)  acc5: 95.2000 (95.7806)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2915 (1.0814)  acc1: 76.0000 (79.9024)  acc5: 92.8000 (95.0634)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2460 (1.1036)  acc1: 76.0000 (79.2960)  acc5: 92.8000 (94.8400)  time: 0.2850  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3500 s / it)
* Acc@1 79.222 Acc@5 95.020 loss 1.104
Accuracy of the model on the 50000 test images: 79.2%
Max accuracy: 79.27%
Epoch: [146]  [   0/2502]  eta: 1:52:03  lr: 0.002313  min_lr: 0.002313  loss: 3.4380 (3.4380)  weight_decay: 0.0500 (0.0500)  time: 2.6874  data: 2.0934  max mem: 53905
Epoch: [146]  [ 200/2502]  eta: 0:23:12  lr: 0.002312  min_lr: 0.002312  loss: 3.5663 (3.3273)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6111 (0.6722)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [146]  [ 400/2502]  eta: 0:20:59  lr: 0.002310  min_lr: 0.002310  loss: 3.5103 (3.3172)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6833 (0.6855)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [146]  [ 600/2502]  eta: 0:18:56  lr: 0.002308  min_lr: 0.002308  loss: 3.2423 (3.3176)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7269 (0.6880)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [146]  [ 800/2502]  eta: 0:16:55  lr: 0.002306  min_lr: 0.002306  loss: 3.5054 (3.3205)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6280 (0.6912)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [146]  [1000/2502]  eta: 0:14:54  lr: 0.002304  min_lr: 0.002304  loss: 3.5518 (3.3047)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6355 (0.6872)  time: 0.5935  data: 0.0006  max mem: 53905
Epoch: [146]  [1200/2502]  eta: 0:12:55  lr: 0.002303  min_lr: 0.002303  loss: 3.4581 (3.3029)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6239 (0.6852)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [146]  [1400/2502]  eta: 0:10:55  lr: 0.002301  min_lr: 0.002301  loss: 3.3335 (3.3058)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6896 (0.6838)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [146]  [1600/2502]  eta: 0:08:56  lr: 0.002299  min_lr: 0.002299  loss: 3.3507 (3.3072)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6361 (0.6800)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [146]  [1800/2502]  eta: 0:06:57  lr: 0.002297  min_lr: 0.002297  loss: 3.5180 (3.3022)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6390 (0.6805)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [146]  [2000/2502]  eta: 0:04:58  lr: 0.002296  min_lr: 0.002296  loss: 3.1087 (3.3040)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6489 (0.6778)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [146]  [2200/2502]  eta: 0:02:59  lr: 0.002294  min_lr: 0.002294  loss: 3.4579 (3.3031)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6141 (0.6777)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [146]  [2400/2502]  eta: 0:01:00  lr: 0.002292  min_lr: 0.002292  loss: 3.3694 (3.3074)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6561 (0.6775)  time: 0.5934  data: 0.0004  max mem: 53905
Epoch: [146]  [2501/2502]  eta: 0:00:00  lr: 0.002291  min_lr: 0.002291  loss: 3.2936 (3.3056)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6282 (0.6762)  time: 0.5331  data: 0.0007  max mem: 53905
Epoch: [146] Total time: 0:24:46 (0.5941 s / it)
Averaged stats: lr: 0.002291  min_lr: 0.002291  loss: 3.2936 (3.3096)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6282 (0.6762)
Test:  [ 0/50]  eta: 0:02:41  loss: 0.5588 (0.5588)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 3.2277  data: 2.9096  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8747 (0.8680)  acc1: 84.8000 (85.0545)  acc5: 98.0000 (97.3818)  time: 0.5969  data: 0.3100  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8868 (0.9007)  acc1: 82.8000 (83.8476)  acc5: 97.6000 (97.3524)  time: 0.3117  data: 0.0252  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.0858 (1.0054)  acc1: 78.4000 (81.3548)  acc5: 95.2000 (96.1161)  time: 0.2931  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2543 (1.0677)  acc1: 75.2000 (79.8732)  acc5: 93.2000 (95.3659)  time: 0.2906  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2543 (1.0893)  acc1: 75.2000 (79.1920)  acc5: 92.8000 (95.1760)  time: 0.2847  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3590 s / it)
* Acc@1 79.230 Acc@5 95.214 loss 1.093
Accuracy of the model on the 50000 test images: 79.2%
Max accuracy: 79.27%
Epoch: [147]  [   0/2502]  eta: 1:53:22  lr: 0.002291  min_lr: 0.002291  loss: 3.6832 (3.6832)  weight_decay: 0.0500 (0.0500)  time: 2.7188  data: 2.0129  max mem: 53905
Epoch: [147]  [ 200/2502]  eta: 0:23:09  lr: 0.002289  min_lr: 0.002289  loss: 3.5204 (3.2933)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7124 (0.6866)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [147]  [ 400/2502]  eta: 0:20:59  lr: 0.002288  min_lr: 0.002288  loss: 3.4915 (3.3042)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6289 (0.6694)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [147]  [ 600/2502]  eta: 0:18:56  lr: 0.002286  min_lr: 0.002286  loss: 3.2728 (3.2696)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6543 (0.6722)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [147]  [ 800/2502]  eta: 0:16:54  lr: 0.002284  min_lr: 0.002284  loss: 3.2389 (3.2721)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6651 (0.6739)  time: 0.5974  data: 0.0004  max mem: 53905
Epoch: [147]  [1000/2502]  eta: 0:14:54  lr: 0.002282  min_lr: 0.002282  loss: 3.5715 (3.2775)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6281 (0.6696)  time: 0.5942  data: 0.0004  max mem: 53905
Epoch: [147]  [1200/2502]  eta: 0:12:54  lr: 0.002280  min_lr: 0.002280  loss: 3.1595 (3.2750)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6613 (0.6706)  time: 0.5936  data: 0.0005  max mem: 53905
Epoch: [147]  [1400/2502]  eta: 0:10:55  lr: 0.002279  min_lr: 0.002279  loss: 3.6722 (3.2829)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6900 (0.6715)  time: 0.5956  data: 0.0004  max mem: 53905
Epoch: [147]  [1600/2502]  eta: 0:08:56  lr: 0.002277  min_lr: 0.002277  loss: 3.4764 (3.2866)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6455 (0.6727)  time: 0.5994  data: 0.0005  max mem: 53905
Epoch: [147]  [1800/2502]  eta: 0:06:57  lr: 0.002275  min_lr: 0.002275  loss: 3.5868 (3.2886)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6500 (0.6735)  time: 0.5935  data: 0.0004  max mem: 53905
Epoch: [147]  [2000/2502]  eta: 0:04:58  lr: 0.002273  min_lr: 0.002273  loss: 3.4336 (3.2897)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6599 (0.6732)  time: 0.5962  data: 0.0005  max mem: 53905
Epoch: [147]  [2200/2502]  eta: 0:02:59  lr: 0.002272  min_lr: 0.002272  loss: 3.2900 (3.2916)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7085 (0.6755)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [147]  [2400/2502]  eta: 0:01:00  lr: 0.002270  min_lr: 0.002270  loss: 3.3974 (3.2942)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6566 (0.6741)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [147]  [2501/2502]  eta: 0:00:00  lr: 0.002269  min_lr: 0.002269  loss: 3.2967 (3.2937)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6980 (0.6755)  time: 0.5362  data: 0.0009  max mem: 53905
Epoch: [147] Total time: 0:24:46 (0.5942 s / it)
Averaged stats: lr: 0.002269  min_lr: 0.002269  loss: 3.2967 (3.2974)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6980 (0.6755)
Test:  [ 0/50]  eta: 0:02:58  loss: 0.5662 (0.5662)  acc1: 93.6000 (93.6000)  acc5: 98.0000 (98.0000)  time: 3.5727  data: 3.2387  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8594 (0.8831)  acc1: 83.2000 (84.5818)  acc5: 97.6000 (97.2000)  time: 0.5901  data: 0.3013  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8968 (0.8911)  acc1: 82.4000 (83.7143)  acc5: 97.2000 (97.4286)  time: 0.2886  data: 0.0040  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0055 (0.9850)  acc1: 80.0000 (81.9226)  acc5: 96.0000 (96.2452)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1824 (1.0444)  acc1: 75.2000 (80.2244)  acc5: 93.2000 (95.4829)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1777 (1.0640)  acc1: 75.2000 (79.5040)  acc5: 93.2000 (95.2240)  time: 0.2847  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3546 s / it)
* Acc@1 79.378 Acc@5 95.238 loss 1.068
Accuracy of the model on the 50000 test images: 79.4%
Max accuracy: 79.38%
Epoch: [148]  [   0/2502]  eta: 1:42:16  lr: 0.002269  min_lr: 0.002269  loss: 2.5785 (2.5785)  weight_decay: 0.0500 (0.0500)  time: 2.4528  data: 1.8498  max mem: 53905
Epoch: [148]  [ 200/2502]  eta: 0:23:06  lr: 0.002267  min_lr: 0.002267  loss: 3.3813 (3.3740)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6788 (0.6789)  time: 0.5936  data: 0.0004  max mem: 53905
Epoch: [148]  [ 400/2502]  eta: 0:20:56  lr: 0.002265  min_lr: 0.002265  loss: 3.5771 (3.3635)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6643 (0.6772)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [148]  [ 600/2502]  eta: 0:18:55  lr: 0.002264  min_lr: 0.002264  loss: 3.0390 (3.3167)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6706 (0.6773)  time: 0.6040  data: 0.0005  max mem: 53905
Epoch: [148]  [ 800/2502]  eta: 0:16:54  lr: 0.002262  min_lr: 0.002262  loss: 3.1636 (3.2974)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6610 (0.6741)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [148]  [1000/2502]  eta: 0:14:54  lr: 0.002260  min_lr: 0.002260  loss: 3.0135 (3.2854)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6576 (0.6772)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [148]  [1200/2502]  eta: 0:12:54  lr: 0.002258  min_lr: 0.002258  loss: 3.3122 (3.2830)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6528 (0.6741)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [148]  [1400/2502]  eta: 0:10:55  lr: 0.002256  min_lr: 0.002256  loss: 3.4018 (3.2890)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6783 (0.6774)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [148]  [1600/2502]  eta: 0:08:56  lr: 0.002255  min_lr: 0.002255  loss: 3.2115 (3.2901)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6512 (0.6778)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [148]  [1800/2502]  eta: 0:06:57  lr: 0.002253  min_lr: 0.002253  loss: 3.4260 (3.2941)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6614 (0.6790)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [148]  [2000/2502]  eta: 0:04:58  lr: 0.002251  min_lr: 0.002251  loss: 3.4096 (3.2970)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6808 (0.6806)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [148]  [2200/2502]  eta: 0:02:59  lr: 0.002249  min_lr: 0.002249  loss: 3.4787 (3.3022)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6759 (0.6842)  time: 0.6015  data: 0.0005  max mem: 53905
Epoch: [148]  [2400/2502]  eta: 0:01:00  lr: 0.002248  min_lr: 0.002248  loss: 3.4110 (3.2977)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6959 (0.6840)  time: 0.5932  data: 0.0007  max mem: 53905
Epoch: [148]  [2501/2502]  eta: 0:00:00  lr: 0.002247  min_lr: 0.002247  loss: 3.4227 (3.2990)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6366 (0.6828)  time: 0.5338  data: 0.0009  max mem: 53905
Epoch: [148] Total time: 0:24:46 (0.5939 s / it)
Averaged stats: lr: 0.002247  min_lr: 0.002247  loss: 3.4227 (3.3031)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6366 (0.6828)
Test:  [ 0/50]  eta: 0:02:28  loss: 0.5531 (0.5531)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 2.9762  data: 2.6658  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7961 (0.8411)  acc1: 84.8000 (85.1273)  acc5: 97.6000 (97.0909)  time: 0.5921  data: 0.3055  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8013 (0.8464)  acc1: 82.0000 (84.1333)  acc5: 97.2000 (97.2000)  time: 0.3195  data: 0.0349  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9899 (0.9483)  acc1: 80.0000 (81.6903)  acc5: 95.2000 (95.9871)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1619 (1.0051)  acc1: 75.2000 (80.0683)  acc5: 93.2000 (95.3366)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1875 (1.0213)  acc1: 74.0000 (79.3360)  acc5: 93.6000 (95.2000)  time: 0.2847  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3548 s / it)
* Acc@1 79.466 Acc@5 95.282 loss 1.024
Accuracy of the model on the 50000 test images: 79.5%
Max accuracy: 79.47%
Epoch: [149]  [   0/2502]  eta: 1:35:01  lr: 0.002247  min_lr: 0.002247  loss: 3.3488 (3.3488)  weight_decay: 0.0500 (0.0500)  time: 2.2788  data: 1.6834  max mem: 53905
Epoch: [149]  [ 200/2502]  eta: 0:23:02  lr: 0.002245  min_lr: 0.002245  loss: 3.0050 (3.2748)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6401 (0.6650)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [149]  [ 400/2502]  eta: 0:20:55  lr: 0.002243  min_lr: 0.002243  loss: 3.3633 (3.2793)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6168 (0.6761)  time: 0.5915  data: 0.0003  max mem: 53905
Epoch: [149]  [ 600/2502]  eta: 0:18:53  lr: 0.002241  min_lr: 0.002241  loss: 3.4559 (3.2842)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6820 (0.6826)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [149]  [ 800/2502]  eta: 0:16:53  lr: 0.002240  min_lr: 0.002240  loss: 3.4640 (3.3052)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.6007  data: 0.0004  max mem: 53905
Epoch: [149]  [1000/2502]  eta: 0:14:53  lr: 0.002238  min_lr: 0.002238  loss: 3.3244 (3.3073)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6867 (nan)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [149]  [1200/2502]  eta: 0:12:54  lr: 0.002236  min_lr: 0.002236  loss: 3.5512 (3.3065)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6775 (nan)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [149]  [1400/2502]  eta: 0:10:54  lr: 0.002234  min_lr: 0.002234  loss: 3.3637 (3.3057)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6493 (nan)  time: 0.5977  data: 0.0004  max mem: 53905
Epoch: [149]  [1600/2502]  eta: 0:08:55  lr: 0.002232  min_lr: 0.002232  loss: 3.3371 (3.3115)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6796 (nan)  time: 0.5968  data: 0.0004  max mem: 53905
Epoch: [149]  [1800/2502]  eta: 0:06:56  lr: 0.002231  min_lr: 0.002231  loss: 3.4102 (3.3042)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6685 (nan)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [149]  [2000/2502]  eta: 0:04:58  lr: 0.002229  min_lr: 0.002229  loss: 3.4796 (3.2993)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6453 (nan)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [149]  [2200/2502]  eta: 0:02:59  lr: 0.002227  min_lr: 0.002227  loss: 3.4636 (3.2970)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6576 (nan)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [149]  [2400/2502]  eta: 0:01:00  lr: 0.002225  min_lr: 0.002225  loss: 3.3375 (3.2993)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7016 (nan)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [149]  [2501/2502]  eta: 0:00:00  lr: 0.002224  min_lr: 0.002224  loss: 2.9661 (3.2971)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6638 (nan)  time: 0.5408  data: 0.0007  max mem: 53905
Epoch: [149] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.002224  min_lr: 0.002224  loss: 2.9661 (3.3023)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6638 (nan)
Test:  [ 0/50]  eta: 0:03:13  loss: 0.4780 (0.4780)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.8753  data: 3.5426  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8480 (0.8251)  acc1: 84.8000 (85.3818)  acc5: 98.0000 (97.4182)  time: 0.6112  data: 0.3224  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8585 (0.8563)  acc1: 82.4000 (84.0381)  acc5: 97.6000 (97.3714)  time: 0.2849  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9705 (0.9554)  acc1: 79.6000 (81.8968)  acc5: 95.6000 (95.9871)  time: 0.2852  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1438 (1.0055)  acc1: 76.8000 (80.4293)  acc5: 92.8000 (95.3366)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1419 (1.0237)  acc1: 75.6000 (79.8800)  acc5: 93.6000 (95.1600)  time: 0.2847  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3592 s / it)
* Acc@1 79.702 Acc@5 95.218 loss 1.028
Accuracy of the model on the 50000 test images: 79.7%
Max accuracy: 79.70%
Epoch: [150]  [   0/2502]  eta: 1:31:56  lr: 0.002224  min_lr: 0.002224  loss: 2.6314 (2.6314)  weight_decay: 0.0500 (0.0500)  time: 2.2049  data: 1.5952  max mem: 53905
Epoch: [150]  [ 200/2502]  eta: 0:23:04  lr: 0.002223  min_lr: 0.002223  loss: 3.4394 (3.2850)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6541 (0.6616)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [150]  [ 400/2502]  eta: 0:20:55  lr: 0.002221  min_lr: 0.002221  loss: 3.1042 (3.3026)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7057 (0.6826)  time: 0.5921  data: 0.0007  max mem: 53905
Epoch: [150]  [ 600/2502]  eta: 0:18:53  lr: 0.002219  min_lr: 0.002219  loss: 3.2259 (3.3067)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6629 (0.6789)  time: 0.5915  data: 0.0007  max mem: 53905
Epoch: [150]  [ 800/2502]  eta: 0:16:53  lr: 0.002217  min_lr: 0.002217  loss: 3.3443 (3.3089)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6764 (0.6840)  time: 0.5922  data: 0.0007  max mem: 53905
Epoch: [150]  [1000/2502]  eta: 0:14:53  lr: 0.002215  min_lr: 0.002215  loss: 3.2832 (3.2995)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7242 (0.6887)  time: 0.5939  data: 0.0008  max mem: 53905
Epoch: [150]  [1200/2502]  eta: 0:12:54  lr: 0.002214  min_lr: 0.002214  loss: 3.4212 (3.3048)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6427 (0.6836)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [150]  [1400/2502]  eta: 0:10:55  lr: 0.002212  min_lr: 0.002212  loss: 3.3802 (3.3130)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6871 (0.6883)  time: 0.5937  data: 0.0006  max mem: 53905
Epoch: [150]  [1600/2502]  eta: 0:08:56  lr: 0.002210  min_lr: 0.002210  loss: 3.4455 (3.3100)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7356 (0.6899)  time: 0.5975  data: 0.0006  max mem: 53905
Epoch: [150]  [1800/2502]  eta: 0:06:57  lr: 0.002208  min_lr: 0.002208  loss: 3.3137 (3.3076)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6292 (0.6889)  time: 0.5992  data: 0.0010  max mem: 53905
Epoch: [150]  [2000/2502]  eta: 0:04:58  lr: 0.002207  min_lr: 0.002207  loss: 3.4515 (3.3110)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6963 (0.6886)  time: 0.5924  data: 0.0006  max mem: 53905
Epoch: [150]  [2200/2502]  eta: 0:02:59  lr: 0.002205  min_lr: 0.002205  loss: 3.5282 (3.3130)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6708 (0.6856)  time: 0.5967  data: 0.0007  max mem: 53905
Epoch: [150]  [2400/2502]  eta: 0:01:00  lr: 0.002203  min_lr: 0.002203  loss: 3.3007 (3.3120)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6967 (0.6871)  time: 0.5975  data: 0.0005  max mem: 53905
Epoch: [150]  [2501/2502]  eta: 0:00:00  lr: 0.002202  min_lr: 0.002202  loss: 3.0955 (3.3116)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6952 (0.6875)  time: 0.5343  data: 0.0008  max mem: 53905
Epoch: [150] Total time: 0:24:47 (0.5943 s / it)
Averaged stats: lr: 0.002202  min_lr: 0.002202  loss: 3.0955 (3.2967)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6952 (0.6875)
Test:  [ 0/50]  eta: 0:02:56  loss: 0.5000 (0.5000)  acc1: 92.0000 (92.0000)  acc5: 99.2000 (99.2000)  time: 3.5229  data: 3.1945  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8787 (0.8467)  acc1: 82.4000 (84.6909)  acc5: 97.6000 (97.3818)  time: 0.5792  data: 0.2908  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8787 (0.8751)  acc1: 82.0000 (83.8095)  acc5: 97.6000 (97.2952)  time: 0.2851  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9893 (0.9785)  acc1: 80.0000 (81.5871)  acc5: 94.8000 (95.9226)  time: 0.2854  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2028 (1.0324)  acc1: 76.4000 (80.1854)  acc5: 92.8000 (95.2390)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1886 (1.0478)  acc1: 76.4000 (79.7200)  acc5: 93.2000 (95.1360)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3525 s / it)
* Acc@1 79.462 Acc@5 95.190 loss 1.047
Accuracy of the model on the 50000 test images: 79.5%
Max accuracy: 79.70%
Epoch: [151]  [   0/2502]  eta: 1:41:09  lr: 0.002202  min_lr: 0.002202  loss: 3.3756 (3.3756)  weight_decay: 0.0500 (0.0500)  time: 2.4260  data: 1.8310  max mem: 53905
Epoch: [151]  [ 200/2502]  eta: 0:23:04  lr: 0.002200  min_lr: 0.002200  loss: 3.4025 (3.2814)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6080 (0.6638)  time: 0.6013  data: 0.0005  max mem: 53905
Epoch: [151]  [ 400/2502]  eta: 0:20:56  lr: 0.002198  min_lr: 0.002198  loss: 3.1991 (3.2964)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6358 (0.6763)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [151]  [ 600/2502]  eta: 0:18:54  lr: 0.002197  min_lr: 0.002197  loss: 3.2378 (3.3039)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6440 (0.6718)  time: 0.5936  data: 0.0005  max mem: 53905
Epoch: [151]  [ 800/2502]  eta: 0:16:53  lr: 0.002195  min_lr: 0.002195  loss: 3.4175 (3.2989)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6874 (0.6720)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [151]  [1000/2502]  eta: 0:14:54  lr: 0.002193  min_lr: 0.002193  loss: 3.3568 (3.2920)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6830 (0.6735)  time: 0.5992  data: 0.0005  max mem: 53905
Epoch: [151]  [1200/2502]  eta: 0:12:54  lr: 0.002191  min_lr: 0.002191  loss: 3.3829 (3.2919)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7140 (0.6753)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [151]  [1400/2502]  eta: 0:10:55  lr: 0.002190  min_lr: 0.002190  loss: 3.4621 (3.3001)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6578 (0.6797)  time: 0.5933  data: 0.0006  max mem: 53905
Epoch: [151]  [1600/2502]  eta: 0:08:56  lr: 0.002188  min_lr: 0.002188  loss: 3.2480 (3.3015)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6612 (0.6758)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [151]  [1800/2502]  eta: 0:06:57  lr: 0.002186  min_lr: 0.002186  loss: 3.2619 (3.2963)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6626 (0.6786)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [151]  [2000/2502]  eta: 0:04:58  lr: 0.002184  min_lr: 0.002184  loss: 3.3696 (3.2971)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6793 (0.6785)  time: 0.5984  data: 0.0005  max mem: 53905
Epoch: [151]  [2200/2502]  eta: 0:02:59  lr: 0.002182  min_lr: 0.002182  loss: 3.2473 (3.3012)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6815 (0.6795)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [151]  [2400/2502]  eta: 0:01:00  lr: 0.002181  min_lr: 0.002181  loss: 3.4194 (3.3012)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6855 (0.6807)  time: 0.6008  data: 0.0007  max mem: 53905
Epoch: [151]  [2501/2502]  eta: 0:00:00  lr: 0.002180  min_lr: 0.002180  loss: 3.5476 (3.3032)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6480 (0.6805)  time: 0.5329  data: 0.0007  max mem: 53905
Epoch: [151] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.002180  min_lr: 0.002180  loss: 3.5476 (3.3004)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6480 (0.6805)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.5890 (0.5890)  acc1: 91.6000 (91.6000)  acc5: 99.2000 (99.2000)  time: 3.0513  data: 2.7178  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.9163 (0.9006)  acc1: 84.4000 (84.8727)  acc5: 97.6000 (97.2364)  time: 0.5363  data: 0.2475  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9163 (0.9232)  acc1: 83.6000 (83.9238)  acc5: 97.6000 (97.2191)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0142 (1.0255)  acc1: 79.2000 (81.7936)  acc5: 94.8000 (95.8581)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2641 (1.0841)  acc1: 75.6000 (80.1951)  acc5: 92.8000 (95.2195)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2594 (1.1019)  acc1: 75.2000 (79.5520)  acc5: 93.2000 (95.0880)  time: 0.2847  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3428 s / it)
* Acc@1 79.444 Acc@5 95.202 loss 1.107
Accuracy of the model on the 50000 test images: 79.4%
Max accuracy: 79.70%
Epoch: [152]  [   0/2502]  eta: 1:55:08  lr: 0.002180  min_lr: 0.002180  loss: 2.9837 (2.9837)  weight_decay: 0.0500 (0.0500)  time: 2.7613  data: 1.7973  max mem: 53905
Epoch: [152]  [ 200/2502]  eta: 0:23:09  lr: 0.002178  min_lr: 0.002178  loss: 3.2907 (3.2091)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6826 (0.6900)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [152]  [ 400/2502]  eta: 0:20:56  lr: 0.002176  min_lr: 0.002176  loss: 3.4566 (3.2626)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7339 (0.6995)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [152]  [ 600/2502]  eta: 0:18:55  lr: 0.002174  min_lr: 0.002174  loss: 3.2958 (3.2632)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6803 (0.6967)  time: 0.5931  data: 0.0008  max mem: 53905
Epoch: [152]  [ 800/2502]  eta: 0:16:54  lr: 0.002173  min_lr: 0.002173  loss: 3.3908 (3.2573)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6801 (0.6927)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [152]  [1000/2502]  eta: 0:14:54  lr: 0.002171  min_lr: 0.002171  loss: 3.2654 (3.2530)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6735 (0.6888)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [152]  [1200/2502]  eta: 0:12:54  lr: 0.002169  min_lr: 0.002169  loss: 3.3436 (3.2557)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6306 (0.6861)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [152]  [1400/2502]  eta: 0:10:55  lr: 0.002167  min_lr: 0.002167  loss: 3.5454 (3.2626)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7415 (0.6954)  time: 0.5925  data: 0.0006  max mem: 53905
Epoch: [152]  [1600/2502]  eta: 0:08:56  lr: 0.002165  min_lr: 0.002165  loss: 3.3734 (3.2698)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6346 (0.6932)  time: 0.5920  data: 0.0007  max mem: 53905
Epoch: [152]  [1800/2502]  eta: 0:06:57  lr: 0.002164  min_lr: 0.002164  loss: 3.1755 (3.2690)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6778 (0.6909)  time: 0.5918  data: 0.0007  max mem: 53905
Epoch: [152]  [2000/2502]  eta: 0:04:58  lr: 0.002162  min_lr: 0.002162  loss: 3.4630 (3.2736)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6576 (0.6903)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [152]  [2200/2502]  eta: 0:02:59  lr: 0.002160  min_lr: 0.002160  loss: 3.2565 (3.2708)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6661 (0.6893)  time: 0.5934  data: 0.0005  max mem: 53905
Epoch: [152]  [2400/2502]  eta: 0:01:00  lr: 0.002158  min_lr: 0.002158  loss: 3.3690 (3.2728)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7018 (0.6888)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [152]  [2501/2502]  eta: 0:00:00  lr: 0.002157  min_lr: 0.002157  loss: 3.2322 (3.2727)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6328 (0.6883)  time: 0.5327  data: 0.0009  max mem: 53905
Epoch: [152] Total time: 0:24:45 (0.5938 s / it)
Averaged stats: lr: 0.002157  min_lr: 0.002157  loss: 3.2322 (3.2905)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6328 (0.6883)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.4958 (0.4958)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 3.0448  data: 2.7404  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.7683 (0.7979)  acc1: 85.2000 (85.7818)  acc5: 98.0000 (97.8182)  time: 0.5375  data: 0.2516  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8174 (0.8339)  acc1: 83.6000 (84.4762)  acc5: 97.2000 (97.4667)  time: 0.2859  data: 0.0016  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9468 (0.9300)  acc1: 80.4000 (82.0903)  acc5: 95.6000 (96.1806)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1860 (0.9945)  acc1: 75.6000 (80.2634)  acc5: 92.8000 (95.4634)  time: 0.2850  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1860 (1.0136)  acc1: 74.4000 (79.5280)  acc5: 93.2000 (95.3440)  time: 0.2846  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3426 s / it)
* Acc@1 79.522 Acc@5 95.222 loss 1.017
Accuracy of the model on the 50000 test images: 79.5%
Max accuracy: 79.70%
Epoch: [153]  [   0/2502]  eta: 1:44:19  lr: 0.002157  min_lr: 0.002157  loss: 3.3617 (3.3617)  weight_decay: 0.0500 (0.0500)  time: 2.5019  data: 1.8107  max mem: 53905
Epoch: [153]  [ 200/2502]  eta: 0:23:10  lr: 0.002156  min_lr: 0.002156  loss: 3.4520 (3.2806)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6698 (0.7190)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [153]  [ 400/2502]  eta: 0:20:57  lr: 0.002154  min_lr: 0.002154  loss: 3.5498 (3.2971)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6915 (0.7016)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [153]  [ 600/2502]  eta: 0:18:54  lr: 0.002152  min_lr: 0.002152  loss: 3.4096 (3.2833)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6729 (0.6957)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [153]  [ 800/2502]  eta: 0:16:53  lr: 0.002150  min_lr: 0.002150  loss: 3.2681 (3.2727)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6708 (0.6939)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [153]  [1000/2502]  eta: 0:14:54  lr: 0.002148  min_lr: 0.002148  loss: 3.5689 (3.2721)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6745 (0.6955)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [153]  [1200/2502]  eta: 0:12:54  lr: 0.002147  min_lr: 0.002147  loss: 3.4196 (3.2668)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6700 (0.6950)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [153]  [1400/2502]  eta: 0:10:55  lr: 0.002145  min_lr: 0.002145  loss: 3.3123 (3.2677)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7384 (0.6978)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [153]  [1600/2502]  eta: 0:08:56  lr: 0.002143  min_lr: 0.002143  loss: 3.3078 (3.2644)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6894 (0.7016)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [153]  [1800/2502]  eta: 0:06:57  lr: 0.002141  min_lr: 0.002141  loss: 3.2862 (3.2618)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7070 (0.7006)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [153]  [2000/2502]  eta: 0:04:58  lr: 0.002139  min_lr: 0.002139  loss: 3.2741 (3.2704)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6785 (0.7043)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [153]  [2200/2502]  eta: 0:02:59  lr: 0.002138  min_lr: 0.002138  loss: 3.2838 (3.2757)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6640 (0.7032)  time: 0.5938  data: 0.0004  max mem: 53905
Epoch: [153]  [2400/2502]  eta: 0:01:00  lr: 0.002136  min_lr: 0.002136  loss: 3.4446 (3.2787)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6501 (0.6997)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [153]  [2501/2502]  eta: 0:00:00  lr: 0.002135  min_lr: 0.002135  loss: 3.5322 (3.2797)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6706 (0.6996)  time: 0.5334  data: 0.0009  max mem: 53905
Epoch: [153] Total time: 0:24:46 (0.5942 s / it)
Averaged stats: lr: 0.002135  min_lr: 0.002135  loss: 3.5322 (3.2823)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6706 (0.6996)
Test:  [ 0/50]  eta: 0:02:46  loss: 0.5953 (0.5953)  acc1: 91.6000 (91.6000)  acc5: 99.2000 (99.2000)  time: 3.3253  data: 2.9892  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8570 (0.8762)  acc1: 83.6000 (84.4727)  acc5: 98.0000 (97.4909)  time: 0.5612  data: 0.2722  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9193 (0.9086)  acc1: 82.8000 (83.7714)  acc5: 97.6000 (97.4667)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0247 (1.0077)  acc1: 80.0000 (81.9226)  acc5: 95.2000 (96.2323)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2429 (1.0742)  acc1: 77.2000 (80.1756)  acc5: 94.0000 (95.6390)  time: 0.2850  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2429 (1.0931)  acc1: 74.8000 (79.6240)  acc5: 94.4000 (95.4400)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3484 s / it)
* Acc@1 79.440 Acc@5 95.326 loss 1.097
Accuracy of the model on the 50000 test images: 79.4%
Max accuracy: 79.70%
Epoch: [154]  [   0/2502]  eta: 1:43:41  lr: 0.002135  min_lr: 0.002135  loss: 2.1754 (2.1754)  weight_decay: 0.0500 (0.0500)  time: 2.4868  data: 1.6120  max mem: 53905
Epoch: [154]  [ 200/2502]  eta: 0:23:07  lr: 0.002133  min_lr: 0.002133  loss: 3.4477 (3.2456)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6686 (0.6678)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [154]  [ 400/2502]  eta: 0:20:57  lr: 0.002131  min_lr: 0.002131  loss: 3.0053 (3.2591)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6988 (0.6788)  time: 0.6004  data: 0.0004  max mem: 53905
Epoch: [154]  [ 600/2502]  eta: 0:18:55  lr: 0.002130  min_lr: 0.002130  loss: 3.4496 (3.2414)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7032 (0.6776)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [154]  [ 800/2502]  eta: 0:16:54  lr: 0.002128  min_lr: 0.002128  loss: 3.3911 (3.2582)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6304 (0.6823)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [154]  [1000/2502]  eta: 0:14:54  lr: 0.002126  min_lr: 0.002126  loss: 3.3880 (3.2564)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6420 (0.6818)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [154]  [1200/2502]  eta: 0:12:54  lr: 0.002124  min_lr: 0.002124  loss: 3.4880 (3.2574)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7040 (0.6864)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [154]  [1400/2502]  eta: 0:10:55  lr: 0.002122  min_lr: 0.002122  loss: 3.3587 (3.2676)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7533 (0.6914)  time: 0.5935  data: 0.0004  max mem: 53905
Epoch: [154]  [1600/2502]  eta: 0:08:56  lr: 0.002121  min_lr: 0.002121  loss: 3.5559 (3.2637)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6886 (0.6913)  time: 0.5963  data: 0.0004  max mem: 53905
Epoch: [154]  [1800/2502]  eta: 0:06:57  lr: 0.002119  min_lr: 0.002119  loss: 3.1943 (3.2598)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6534 (0.6907)  time: 0.5960  data: 0.0004  max mem: 53905
Epoch: [154]  [2000/2502]  eta: 0:04:58  lr: 0.002117  min_lr: 0.002117  loss: 3.3209 (3.2628)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7103 (0.6944)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [154]  [2200/2502]  eta: 0:02:59  lr: 0.002115  min_lr: 0.002115  loss: 2.7551 (3.2608)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7007 (0.6957)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [154]  [2400/2502]  eta: 0:01:00  lr: 0.002114  min_lr: 0.002114  loss: 3.4331 (3.2667)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6653 (0.6945)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [154]  [2501/2502]  eta: 0:00:00  lr: 0.002113  min_lr: 0.002113  loss: 3.1504 (3.2656)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6371 (0.6937)  time: 0.5331  data: 0.0007  max mem: 53905
Epoch: [154] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.002113  min_lr: 0.002113  loss: 3.1504 (3.2739)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6371 (0.6937)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.5295 (0.5295)  acc1: 93.6000 (93.6000)  acc5: 99.2000 (99.2000)  time: 3.0557  data: 2.7202  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8424 (0.8630)  acc1: 85.2000 (84.6182)  acc5: 97.6000 (97.3455)  time: 0.5557  data: 0.2666  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8567 (0.8817)  acc1: 82.4000 (83.8476)  acc5: 97.2000 (97.3524)  time: 0.3012  data: 0.0108  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9886 (0.9864)  acc1: 80.0000 (81.7290)  acc5: 95.2000 (96.0774)  time: 0.2913  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2246 (1.0466)  acc1: 75.6000 (80.2146)  acc5: 93.6000 (95.4439)  time: 0.2851  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2246 (1.0698)  acc1: 74.8000 (79.4640)  acc5: 94.0000 (95.2880)  time: 0.2843  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3490 s / it)
* Acc@1 79.634 Acc@5 95.312 loss 1.071
Accuracy of the model on the 50000 test images: 79.6%
Max accuracy: 79.70%
Epoch: [155]  [   0/2502]  eta: 1:59:05  lr: 0.002113  min_lr: 0.002113  loss: 3.0145 (3.0145)  weight_decay: 0.0500 (0.0500)  time: 2.8559  data: 1.9611  max mem: 53905
Epoch: [155]  [ 200/2502]  eta: 0:23:13  lr: 0.002111  min_lr: 0.002111  loss: 3.3511 (3.3039)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6344 (0.6835)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [155]  [ 400/2502]  eta: 0:20:59  lr: 0.002109  min_lr: 0.002109  loss: 3.4710 (3.2861)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6772 (0.7020)  time: 0.5943  data: 0.0004  max mem: 53905
Epoch: [155]  [ 600/2502]  eta: 0:18:56  lr: 0.002107  min_lr: 0.002107  loss: 3.1793 (3.2839)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6996 (0.6976)  time: 0.5999  data: 0.0004  max mem: 53905
Epoch: [155]  [ 800/2502]  eta: 0:16:54  lr: 0.002105  min_lr: 0.002105  loss: 3.2418 (3.2619)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6641 (0.7044)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [155]  [1000/2502]  eta: 0:14:54  lr: 0.002104  min_lr: 0.002104  loss: 3.4524 (3.2643)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6746 (0.6986)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [155]  [1200/2502]  eta: 0:12:54  lr: 0.002102  min_lr: 0.002102  loss: 3.2474 (3.2676)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6738 (0.7001)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [155]  [1400/2502]  eta: 0:10:55  lr: 0.002100  min_lr: 0.002100  loss: 3.1796 (3.2641)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7176 (0.7037)  time: 0.5935  data: 0.0005  max mem: 53905
Epoch: [155]  [1600/2502]  eta: 0:08:56  lr: 0.002098  min_lr: 0.002098  loss: 3.4536 (3.2629)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6792 (0.7041)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [155]  [1800/2502]  eta: 0:06:57  lr: 0.002096  min_lr: 0.002096  loss: 3.1486 (3.2687)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5968 (0.7022)  time: 0.5959  data: 0.0004  max mem: 53905
Epoch: [155]  [2000/2502]  eta: 0:04:58  lr: 0.002095  min_lr: 0.002095  loss: 3.3033 (3.2704)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7164 (inf)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [155]  [2200/2502]  eta: 0:02:59  lr: 0.002093  min_lr: 0.002093  loss: 3.1105 (3.2664)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6614 (inf)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [155]  [2400/2502]  eta: 0:01:00  lr: 0.002091  min_lr: 0.002091  loss: 3.3715 (3.2640)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6697 (inf)  time: 0.5953  data: 0.0004  max mem: 53905
Epoch: [155]  [2501/2502]  eta: 0:00:00  lr: 0.002090  min_lr: 0.002090  loss: 3.4177 (3.2640)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6701 (inf)  time: 0.5326  data: 0.0012  max mem: 53905
Epoch: [155] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.002090  min_lr: 0.002090  loss: 3.4177 (3.2701)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6701 (inf)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.5929 (0.5929)  acc1: 92.8000 (92.8000)  acc5: 99.2000 (99.2000)  time: 3.0429  data: 2.7185  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.9063 (0.9407)  acc1: 85.6000 (85.0546)  acc5: 97.6000 (97.2000)  time: 0.5932  data: 0.3074  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9510 (0.9562)  acc1: 82.4000 (84.2286)  acc5: 97.2000 (97.1619)  time: 0.3156  data: 0.0334  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1482 (1.0573)  acc1: 79.2000 (81.7290)  acc5: 95.2000 (96.0387)  time: 0.2830  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.3116 (1.1159)  acc1: 76.0000 (80.1951)  acc5: 93.6000 (95.4342)  time: 0.2829  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2956 (1.1301)  acc1: 76.0000 (79.5920)  acc5: 94.0000 (95.3040)  time: 0.2828  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3535 s / it)
* Acc@1 79.686 Acc@5 95.246 loss 1.130
Accuracy of the model on the 50000 test images: 79.7%
Max accuracy: 79.70%
Epoch: [156]  [   0/2502]  eta: 1:40:45  lr: 0.002090  min_lr: 0.002090  loss: 3.3297 (3.3297)  weight_decay: 0.0500 (0.0500)  time: 2.4163  data: 1.8007  max mem: 53905
Epoch: [156]  [ 200/2502]  eta: 0:23:05  lr: 0.002088  min_lr: 0.002088  loss: 3.4470 (3.2805)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7508 (0.7369)  time: 0.5934  data: 0.0004  max mem: 53905
Epoch: [156]  [ 400/2502]  eta: 0:20:56  lr: 0.002087  min_lr: 0.002087  loss: 3.3250 (3.2817)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6768 (0.7042)  time: 0.6002  data: 0.0005  max mem: 53905
Epoch: [156]  [ 600/2502]  eta: 0:18:53  lr: 0.002085  min_lr: 0.002085  loss: 3.5009 (3.2690)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6704 (0.7032)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [156]  [ 800/2502]  eta: 0:16:52  lr: 0.002083  min_lr: 0.002083  loss: 3.5885 (3.2668)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6621 (0.7005)  time: 0.5960  data: 0.0005  max mem: 53905
Epoch: [156]  [1000/2502]  eta: 0:14:52  lr: 0.002081  min_lr: 0.002081  loss: 3.4148 (3.2719)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6760 (0.6965)  time: 0.5939  data: 0.0004  max mem: 53905
Epoch: [156]  [1200/2502]  eta: 0:12:53  lr: 0.002079  min_lr: 0.002079  loss: 3.3445 (3.2711)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6562 (0.6998)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [156]  [1400/2502]  eta: 0:10:54  lr: 0.002078  min_lr: 0.002078  loss: 3.2603 (3.2693)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6327 (0.6985)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [156]  [1600/2502]  eta: 0:08:55  lr: 0.002076  min_lr: 0.002076  loss: 3.0116 (3.2635)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7321 (0.7009)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [156]  [1800/2502]  eta: 0:06:56  lr: 0.002074  min_lr: 0.002074  loss: 3.2251 (3.2617)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7248 (0.7042)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [156]  [2000/2502]  eta: 0:04:57  lr: 0.002072  min_lr: 0.002072  loss: 3.4533 (3.2647)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6916 (0.7034)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [156]  [2200/2502]  eta: 0:02:59  lr: 0.002070  min_lr: 0.002070  loss: 3.4071 (3.2649)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7091 (0.7052)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [156]  [2400/2502]  eta: 0:01:00  lr: 0.002069  min_lr: 0.002069  loss: 3.4623 (3.2617)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6676 (0.7053)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [156]  [2501/2502]  eta: 0:00:00  lr: 0.002068  min_lr: 0.002068  loss: 3.4170 (3.2606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6921 (0.7049)  time: 0.5333  data: 0.0007  max mem: 53905
Epoch: [156] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.002068  min_lr: 0.002068  loss: 3.4170 (3.2669)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6921 (0.7049)
Test:  [ 0/50]  eta: 0:02:42  loss: 0.6085 (0.6085)  acc1: 92.0000 (92.0000)  acc5: 99.2000 (99.2000)  time: 3.2496  data: 2.9129  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8582 (0.9160)  acc1: 83.2000 (84.1091)  acc5: 97.6000 (97.4182)  time: 0.5651  data: 0.2761  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8993 (0.9252)  acc1: 82.8000 (83.6000)  acc5: 97.2000 (97.3524)  time: 0.2909  data: 0.0064  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0379 (1.0193)  acc1: 78.4000 (81.2387)  acc5: 96.0000 (96.0129)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2274 (1.0704)  acc1: 76.0000 (79.8829)  acc5: 92.4000 (95.3756)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2255 (1.0831)  acc1: 74.8000 (79.3200)  acc5: 93.6000 (95.2560)  time: 0.2851  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3489 s / it)
* Acc@1 79.502 Acc@5 95.322 loss 1.084
Accuracy of the model on the 50000 test images: 79.5%
Max accuracy: 79.70%
Epoch: [157]  [   0/2502]  eta: 1:55:52  lr: 0.002068  min_lr: 0.002068  loss: 4.0530 (4.0530)  weight_decay: 0.0500 (0.0500)  time: 2.7788  data: 1.6826  max mem: 53905
Epoch: [157]  [ 200/2502]  eta: 0:23:10  lr: 0.002066  min_lr: 0.002066  loss: 3.4473 (3.2469)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6829 (nan)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [157]  [ 400/2502]  eta: 0:20:58  lr: 0.002064  min_lr: 0.002064  loss: 3.3249 (3.2534)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6733 (nan)  time: 0.5936  data: 0.0005  max mem: 53905
Epoch: [157]  [ 600/2502]  eta: 0:18:55  lr: 0.002062  min_lr: 0.002062  loss: 3.4414 (3.2504)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6992 (nan)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [157]  [ 800/2502]  eta: 0:16:54  lr: 0.002061  min_lr: 0.002061  loss: 3.1650 (3.2434)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7031 (nan)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [157]  [1000/2502]  eta: 0:14:54  lr: 0.002059  min_lr: 0.002059  loss: 3.1594 (3.2343)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7189 (nan)  time: 0.6024  data: 0.0005  max mem: 53905
Epoch: [157]  [1200/2502]  eta: 0:12:54  lr: 0.002057  min_lr: 0.002057  loss: 3.5045 (3.2449)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6840 (nan)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [157]  [1400/2502]  eta: 0:10:55  lr: 0.002055  min_lr: 0.002055  loss: 3.3642 (3.2395)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6527 (nan)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [157]  [1600/2502]  eta: 0:08:56  lr: 0.002053  min_lr: 0.002053  loss: 3.3796 (3.2358)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7094 (nan)  time: 0.5942  data: 0.0005  max mem: 53905
Epoch: [157]  [1800/2502]  eta: 0:06:57  lr: 0.002052  min_lr: 0.002052  loss: 3.3586 (3.2423)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6762 (nan)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [157]  [2000/2502]  eta: 0:04:58  lr: 0.002050  min_lr: 0.002050  loss: 3.0782 (3.2421)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7245 (nan)  time: 0.5935  data: 0.0004  max mem: 53905
Epoch: [157]  [2200/2502]  eta: 0:02:59  lr: 0.002048  min_lr: 0.002048  loss: 3.3095 (3.2456)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6863 (nan)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [157]  [2400/2502]  eta: 0:01:00  lr: 0.002046  min_lr: 0.002046  loss: 3.4611 (3.2451)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6754 (nan)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [157]  [2501/2502]  eta: 0:00:00  lr: 0.002045  min_lr: 0.002045  loss: 3.3901 (3.2460)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6580 (nan)  time: 0.5342  data: 0.0009  max mem: 53905
Epoch: [157] Total time: 0:24:46 (0.5942 s / it)
Averaged stats: lr: 0.002045  min_lr: 0.002045  loss: 3.3901 (3.2663)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6580 (nan)
Test:  [ 0/50]  eta: 0:02:38  loss: 0.5050 (0.5050)  acc1: 92.8000 (92.8000)  acc5: 99.2000 (99.2000)  time: 3.1706  data: 2.8456  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8749 (0.8416)  acc1: 86.0000 (85.4545)  acc5: 97.6000 (97.3091)  time: 0.6026  data: 0.3150  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8906 (0.8674)  acc1: 83.6000 (84.5143)  acc5: 97.6000 (97.3524)  time: 0.3153  data: 0.0313  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0362 (0.9683)  acc1: 79.2000 (82.2581)  acc5: 95.6000 (96.0774)  time: 0.2849  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1995 (1.0270)  acc1: 76.8000 (80.6829)  acc5: 92.8000 (95.3366)  time: 0.2847  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1873 (1.0392)  acc1: 75.2000 (80.0400)  acc5: 94.0000 (95.2640)  time: 0.2843  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3568 s / it)
* Acc@1 79.958 Acc@5 95.268 loss 1.042
Accuracy of the model on the 50000 test images: 80.0%
Max accuracy: 79.96%
Epoch: [158]  [   0/2502]  eta: 1:42:33  lr: 0.002045  min_lr: 0.002045  loss: 3.4074 (3.4074)  weight_decay: 0.0500 (0.0500)  time: 2.4593  data: 1.8500  max mem: 53905
Epoch: [158]  [ 200/2502]  eta: 0:23:10  lr: 0.002044  min_lr: 0.002044  loss: 3.2986 (3.2502)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6840 (0.6836)  time: 0.5959  data: 0.0004  max mem: 53905
Epoch: [158]  [ 400/2502]  eta: 0:20:58  lr: 0.002042  min_lr: 0.002042  loss: 3.2522 (3.2249)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7034 (0.6975)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [158]  [ 600/2502]  eta: 0:18:55  lr: 0.002040  min_lr: 0.002040  loss: 3.3979 (3.2366)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6853 (0.7041)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [158]  [ 800/2502]  eta: 0:16:54  lr: 0.002038  min_lr: 0.002038  loss: 3.4093 (3.2490)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6776 (0.6979)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [158]  [1000/2502]  eta: 0:14:54  lr: 0.002036  min_lr: 0.002036  loss: 3.3112 (3.2555)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6880 (0.6995)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [158]  [1200/2502]  eta: 0:12:55  lr: 0.002035  min_lr: 0.002035  loss: 2.9630 (3.2539)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6850 (0.7005)  time: 0.5944  data: 0.0005  max mem: 53905
Epoch: [158]  [1400/2502]  eta: 0:10:55  lr: 0.002033  min_lr: 0.002033  loss: 3.1890 (3.2608)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6541 (0.7008)  time: 0.5936  data: 0.0005  max mem: 53905
Epoch: [158]  [1600/2502]  eta: 0:08:56  lr: 0.002031  min_lr: 0.002031  loss: 3.2291 (3.2667)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6818 (0.7010)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [158]  [1800/2502]  eta: 0:06:57  lr: 0.002029  min_lr: 0.002029  loss: 2.9885 (3.2713)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6778 (0.7048)  time: 0.5945  data: 0.0005  max mem: 53905
Epoch: [158]  [2000/2502]  eta: 0:04:58  lr: 0.002027  min_lr: 0.002027  loss: 3.3806 (3.2733)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6711 (0.7065)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [158]  [2200/2502]  eta: 0:02:59  lr: 0.002026  min_lr: 0.002026  loss: 3.4895 (3.2731)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6746 (0.7059)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [158]  [2400/2502]  eta: 0:01:00  lr: 0.002024  min_lr: 0.002024  loss: 3.5001 (3.2689)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7324 (0.7088)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [158]  [2501/2502]  eta: 0:00:00  lr: 0.002023  min_lr: 0.002023  loss: 3.0690 (3.2647)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6670 (0.7077)  time: 0.5346  data: 0.0008  max mem: 53905
Epoch: [158] Total time: 0:24:46 (0.5943 s / it)
Averaged stats: lr: 0.002023  min_lr: 0.002023  loss: 3.0690 (3.2642)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6670 (0.7077)
Test:  [ 0/50]  eta: 0:02:44  loss: 0.4650 (0.4650)  acc1: 94.0000 (94.0000)  acc5: 99.6000 (99.6000)  time: 3.2812  data: 2.9594  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8101 (0.8175)  acc1: 85.6000 (84.8364)  acc5: 97.6000 (97.1636)  time: 0.5575  data: 0.2695  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8101 (0.8274)  acc1: 83.6000 (84.1333)  acc5: 97.6000 (97.4095)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9982 (0.9302)  acc1: 79.2000 (81.8839)  acc5: 94.8000 (96.0000)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1593 (0.9894)  acc1: 74.4000 (80.1951)  acc5: 92.4000 (95.2781)  time: 0.2851  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1516 (1.0023)  acc1: 74.4000 (79.6160)  acc5: 93.6000 (95.2960)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3475 s / it)
* Acc@1 79.540 Acc@5 95.238 loss 1.004
Accuracy of the model on the 50000 test images: 79.5%
Max accuracy: 79.96%
Epoch: [159]  [   0/2502]  eta: 1:54:42  lr: 0.002023  min_lr: 0.002023  loss: 4.0280 (4.0280)  weight_decay: 0.0500 (0.0500)  time: 2.7507  data: 2.0525  max mem: 53905
Epoch: [159]  [ 200/2502]  eta: 0:23:10  lr: 0.002021  min_lr: 0.002021  loss: 3.1084 (3.2186)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6742 (0.6775)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [159]  [ 400/2502]  eta: 0:20:58  lr: 0.002019  min_lr: 0.002019  loss: 3.2298 (3.2318)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6811 (0.6859)  time: 0.5937  data: 0.0005  max mem: 53905
Epoch: [159]  [ 600/2502]  eta: 0:18:54  lr: 0.002018  min_lr: 0.002018  loss: 3.3969 (3.2425)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6624 (0.6884)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [159]  [ 800/2502]  eta: 0:16:54  lr: 0.002016  min_lr: 0.002016  loss: 3.4712 (3.2445)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6689 (0.6857)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [159]  [1000/2502]  eta: 0:14:54  lr: 0.002014  min_lr: 0.002014  loss: 3.0898 (3.2384)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6842 (0.6929)  time: 0.5924  data: 0.0006  max mem: 53905
Epoch: [159]  [1200/2502]  eta: 0:12:54  lr: 0.002012  min_lr: 0.002012  loss: 3.2641 (3.2400)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7001 (0.6979)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [159]  [1400/2502]  eta: 0:10:55  lr: 0.002010  min_lr: 0.002010  loss: 3.1821 (3.2466)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7255 (0.7014)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [159]  [1600/2502]  eta: 0:08:56  lr: 0.002009  min_lr: 0.002009  loss: 3.4655 (3.2501)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7117 (0.7054)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [159]  [1800/2502]  eta: 0:06:57  lr: 0.002007  min_lr: 0.002007  loss: 3.4330 (3.2545)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6862 (0.7059)  time: 0.5936  data: 0.0005  max mem: 53905
Epoch: [159]  [2000/2502]  eta: 0:04:58  lr: 0.002005  min_lr: 0.002005  loss: 3.1111 (3.2548)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7014 (0.7041)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [159]  [2200/2502]  eta: 0:02:59  lr: 0.002003  min_lr: 0.002003  loss: 3.4485 (3.2565)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7228 (0.7054)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [159]  [2400/2502]  eta: 0:01:00  lr: 0.002001  min_lr: 0.002001  loss: 3.3600 (3.2532)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7159 (0.7071)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [159]  [2501/2502]  eta: 0:00:00  lr: 0.002001  min_lr: 0.002001  loss: 3.3791 (3.2547)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7648 (0.7100)  time: 0.5344  data: 0.0007  max mem: 53905
Epoch: [159] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.002001  min_lr: 0.002001  loss: 3.3791 (3.2513)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7648 (0.7100)
Test:  [ 0/50]  eta: 0:02:27  loss: 0.6440 (0.6440)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 2.9587  data: 2.6507  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.8873 (0.9201)  acc1: 84.8000 (84.9818)  acc5: 96.8000 (97.0545)  time: 0.5282  data: 0.2414  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9412 (0.9454)  acc1: 84.4000 (84.4571)  acc5: 97.2000 (97.1429)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.1001 (1.0461)  acc1: 79.6000 (82.0645)  acc5: 96.4000 (96.1290)  time: 0.2855  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2676 (1.1058)  acc1: 75.6000 (80.3024)  acc5: 94.0000 (95.6293)  time: 0.2853  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2366 (1.1238)  acc1: 75.2000 (79.6960)  acc5: 94.4000 (95.5440)  time: 0.2850  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3414 s / it)
* Acc@1 79.692 Acc@5 95.426 loss 1.129
Accuracy of the model on the 50000 test images: 79.7%
Max accuracy: 79.96%
Epoch: [160]  [   0/2502]  eta: 1:43:06  lr: 0.002001  min_lr: 0.002001  loss: 2.7471 (2.7471)  weight_decay: 0.0500 (0.0500)  time: 2.4725  data: 1.8637  max mem: 53905
Epoch: [160]  [ 200/2502]  eta: 0:23:07  lr: 0.001999  min_lr: 0.001999  loss: 3.4371 (3.2556)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6901 (0.7170)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [160]  [ 400/2502]  eta: 0:20:58  lr: 0.001997  min_lr: 0.001997  loss: 3.2737 (3.2420)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7012 (0.7157)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [160]  [ 600/2502]  eta: 0:18:55  lr: 0.001995  min_lr: 0.001995  loss: 3.4198 (3.2346)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7684 (0.7270)  time: 0.5938  data: 0.0004  max mem: 53905
Epoch: [160]  [ 800/2502]  eta: 0:16:54  lr: 0.001993  min_lr: 0.001993  loss: 3.2999 (3.2375)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6850 (0.7208)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [160]  [1000/2502]  eta: 0:14:54  lr: 0.001992  min_lr: 0.001992  loss: 3.4814 (3.2385)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7275 (0.7188)  time: 0.5964  data: 0.0005  max mem: 53905
Epoch: [160]  [1200/2502]  eta: 0:12:54  lr: 0.001990  min_lr: 0.001990  loss: 2.8566 (3.2409)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6958 (0.7199)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [160]  [1400/2502]  eta: 0:10:54  lr: 0.001988  min_lr: 0.001988  loss: 2.9772 (3.2430)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7013 (0.7229)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [160]  [1600/2502]  eta: 0:08:55  lr: 0.001986  min_lr: 0.001986  loss: 3.2454 (3.2482)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6982 (0.7191)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [160]  [1800/2502]  eta: 0:06:57  lr: 0.001984  min_lr: 0.001984  loss: 3.4138 (3.2557)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6858 (0.7176)  time: 0.6001  data: 0.0006  max mem: 53905
Epoch: [160]  [2000/2502]  eta: 0:04:58  lr: 0.001983  min_lr: 0.001983  loss: 3.4944 (3.2567)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7017 (0.7179)  time: 0.5950  data: 0.0004  max mem: 53905
Epoch: [160]  [2200/2502]  eta: 0:02:59  lr: 0.001981  min_lr: 0.001981  loss: 3.4297 (3.2588)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6657 (inf)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [160]  [2400/2502]  eta: 0:01:00  lr: 0.001979  min_lr: 0.001979  loss: 3.6185 (3.2585)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6554 (inf)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [160]  [2501/2502]  eta: 0:00:00  lr: 0.001978  min_lr: 0.001978  loss: 3.2381 (3.2574)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6724 (inf)  time: 0.5339  data: 0.0007  max mem: 53905
Epoch: [160] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.001978  min_lr: 0.001978  loss: 3.2381 (3.2533)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6724 (inf)
Test:  [ 0/50]  eta: 0:02:35  loss: 0.5296 (0.5296)  acc1: 91.2000 (91.2000)  acc5: 99.2000 (99.2000)  time: 3.1130  data: 2.7894  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8129 (0.8133)  acc1: 84.4000 (85.2364)  acc5: 97.6000 (97.4546)  time: 0.5547  data: 0.2669  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8208 (0.8433)  acc1: 83.2000 (84.2286)  acc5: 97.6000 (97.3524)  time: 0.2920  data: 0.0076  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0435 (0.9473)  acc1: 80.0000 (82.1290)  acc5: 95.2000 (96.2581)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1816 (1.0005)  acc1: 76.8000 (80.4488)  acc5: 93.6000 (95.6781)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1491 (1.0160)  acc1: 76.0000 (79.7760)  acc5: 94.0000 (95.5680)  time: 0.2874  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3477 s / it)
* Acc@1 79.834 Acc@5 95.518 loss 1.019
Accuracy of the model on the 50000 test images: 79.8%
Max accuracy: 79.96%
Epoch: [161]  [   0/2502]  eta: 1:50:58  lr: 0.001978  min_lr: 0.001978  loss: 3.5940 (3.5940)  weight_decay: 0.0500 (0.0500)  time: 2.6614  data: 1.6433  max mem: 53905
Epoch: [161]  [ 200/2502]  eta: 0:23:11  lr: 0.001976  min_lr: 0.001976  loss: 3.0118 (3.2689)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6559 (0.6743)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [161]  [ 400/2502]  eta: 0:20:58  lr: 0.001974  min_lr: 0.001974  loss: 3.3263 (3.2404)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6732 (0.6860)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [161]  [ 600/2502]  eta: 0:18:54  lr: 0.001973  min_lr: 0.001973  loss: 3.3715 (3.2304)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6401 (0.6944)  time: 0.5959  data: 0.0005  max mem: 53905
Epoch: [161]  [ 800/2502]  eta: 0:16:54  lr: 0.001971  min_lr: 0.001971  loss: 3.5064 (3.2538)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7329 (0.6971)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [161]  [1000/2502]  eta: 0:14:53  lr: 0.001969  min_lr: 0.001969  loss: 3.3604 (3.2669)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7430 (0.7060)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [161]  [1200/2502]  eta: 0:12:54  lr: 0.001967  min_lr: 0.001967  loss: 3.5545 (3.2598)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7392 (0.7085)  time: 0.5982  data: 0.0004  max mem: 53905
Epoch: [161]  [1400/2502]  eta: 0:10:55  lr: 0.001966  min_lr: 0.001966  loss: 3.2816 (3.2617)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7624 (0.7147)  time: 0.5971  data: 0.0004  max mem: 53905
Epoch: [161]  [1600/2502]  eta: 0:08:56  lr: 0.001964  min_lr: 0.001964  loss: 3.2780 (3.2584)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6690 (0.7136)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [161]  [1800/2502]  eta: 0:06:57  lr: 0.001962  min_lr: 0.001962  loss: 3.4887 (3.2644)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6910 (0.7138)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [161]  [2000/2502]  eta: 0:04:58  lr: 0.001960  min_lr: 0.001960  loss: 3.4089 (3.2638)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7036 (0.7124)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [161]  [2200/2502]  eta: 0:02:59  lr: 0.001958  min_lr: 0.001958  loss: 3.4049 (3.2595)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7350 (0.7170)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [161]  [2400/2502]  eta: 0:01:00  lr: 0.001957  min_lr: 0.001957  loss: 3.3343 (3.2664)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7342 (0.7184)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [161]  [2501/2502]  eta: 0:00:00  lr: 0.001956  min_lr: 0.001956  loss: 3.4327 (3.2645)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7138 (0.7190)  time: 0.5360  data: 0.0009  max mem: 53905
Epoch: [161] Total time: 0:24:44 (0.5935 s / it)
Averaged stats: lr: 0.001956  min_lr: 0.001956  loss: 3.4327 (3.2494)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7138 (0.7190)
Test:  [ 0/50]  eta: 0:03:05  loss: 0.5387 (0.5387)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.7001  data: 3.3698  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8756 (0.8625)  acc1: 86.0000 (85.3818)  acc5: 97.6000 (97.1636)  time: 0.5955  data: 0.3068  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9171 (0.8957)  acc1: 83.6000 (84.4571)  acc5: 97.2000 (97.2000)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0262 (0.9951)  acc1: 80.8000 (81.9484)  acc5: 95.6000 (96.1290)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2332 (1.0622)  acc1: 75.6000 (80.3512)  acc5: 94.0000 (95.5415)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2236 (1.0765)  acc1: 75.6000 (79.6880)  acc5: 94.0000 (95.3600)  time: 0.2847  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3559 s / it)
* Acc@1 79.874 Acc@5 95.376 loss 1.076
Accuracy of the model on the 50000 test images: 79.9%
Max accuracy: 79.96%
Epoch: [162]  [   0/2502]  eta: 1:53:50  lr: 0.001956  min_lr: 0.001956  loss: 2.8626 (2.8626)  weight_decay: 0.0500 (0.0500)  time: 2.7298  data: 2.1287  max mem: 53905
Epoch: [162]  [ 200/2502]  eta: 0:23:07  lr: 0.001954  min_lr: 0.001954  loss: 3.3290 (3.2459)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6912 (0.6922)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [162]  [ 400/2502]  eta: 0:20:58  lr: 0.001952  min_lr: 0.001952  loss: 3.3323 (3.2121)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6818 (0.7031)  time: 0.5926  data: 0.0007  max mem: 53905
Epoch: [162]  [ 600/2502]  eta: 0:18:55  lr: 0.001950  min_lr: 0.001950  loss: 3.3140 (3.2347)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7531 (0.7204)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [162]  [ 800/2502]  eta: 0:16:54  lr: 0.001948  min_lr: 0.001948  loss: 3.3019 (3.2418)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7261 (0.7189)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [162]  [1000/2502]  eta: 0:14:54  lr: 0.001947  min_lr: 0.001947  loss: 3.3925 (3.2401)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6629 (0.7219)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [162]  [1200/2502]  eta: 0:12:54  lr: 0.001945  min_lr: 0.001945  loss: 3.0539 (3.2451)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7333 (0.7253)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [162]  [1400/2502]  eta: 0:10:55  lr: 0.001943  min_lr: 0.001943  loss: 3.4065 (3.2397)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7041 (0.7257)  time: 0.5941  data: 0.0005  max mem: 53905
Epoch: [162]  [1600/2502]  eta: 0:08:56  lr: 0.001941  min_lr: 0.001941  loss: 2.9613 (3.2347)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7070 (0.7320)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [162]  [1800/2502]  eta: 0:06:57  lr: 0.001939  min_lr: 0.001939  loss: 3.5173 (3.2355)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6585 (0.7304)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [162]  [2000/2502]  eta: 0:04:58  lr: 0.001938  min_lr: 0.001938  loss: 3.1882 (3.2335)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6734 (0.7277)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [162]  [2200/2502]  eta: 0:02:59  lr: 0.001936  min_lr: 0.001936  loss: 3.5192 (3.2358)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7083 (0.7274)  time: 0.5964  data: 0.0004  max mem: 53905
Epoch: [162]  [2400/2502]  eta: 0:01:00  lr: 0.001934  min_lr: 0.001934  loss: 3.2691 (3.2403)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7148 (0.7289)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [162]  [2501/2502]  eta: 0:00:00  lr: 0.001933  min_lr: 0.001933  loss: 3.3787 (3.2441)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7827 (0.7309)  time: 0.5332  data: 0.0007  max mem: 53905
Epoch: [162] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.001933  min_lr: 0.001933  loss: 3.3787 (3.2382)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7827 (0.7309)
Test:  [ 0/50]  eta: 0:02:54  loss: 0.5897 (0.5897)  acc1: 92.8000 (92.8000)  acc5: 98.4000 (98.4000)  time: 3.4929  data: 3.1837  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8259 (0.8472)  acc1: 87.2000 (85.6364)  acc5: 98.4000 (97.4545)  time: 0.5766  data: 0.2899  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8719 (0.8840)  acc1: 83.6000 (84.4381)  acc5: 98.0000 (97.4476)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0373 (0.9820)  acc1: 79.6000 (82.3226)  acc5: 96.0000 (96.2581)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2096 (1.0433)  acc1: 77.2000 (80.6732)  acc5: 93.6000 (95.6488)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1890 (1.0607)  acc1: 75.2000 (79.9040)  acc5: 94.0000 (95.4960)  time: 0.2847  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3515 s / it)
* Acc@1 79.960 Acc@5 95.514 loss 1.063
Accuracy of the model on the 50000 test images: 80.0%
Max accuracy: 79.96%
Epoch: [163]  [   0/2502]  eta: 1:31:34  lr: 0.001933  min_lr: 0.001933  loss: 3.7523 (3.7523)  weight_decay: 0.0500 (0.0500)  time: 2.1962  data: 1.6051  max mem: 53905
Epoch: [163]  [ 200/2502]  eta: 0:23:03  lr: 0.001931  min_lr: 0.001931  loss: 3.3359 (3.2260)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6753 (0.7112)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [163]  [ 400/2502]  eta: 0:20:55  lr: 0.001930  min_lr: 0.001930  loss: 3.1560 (3.2383)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6930 (0.7231)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [163]  [ 600/2502]  eta: 0:18:53  lr: 0.001928  min_lr: 0.001928  loss: 3.5008 (3.2258)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6878 (0.7208)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [163]  [ 800/2502]  eta: 0:16:53  lr: 0.001926  min_lr: 0.001926  loss: 3.0280 (3.2293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7147 (0.7237)  time: 0.5962  data: 0.0004  max mem: 53905
Epoch: [163]  [1000/2502]  eta: 0:14:53  lr: 0.001924  min_lr: 0.001924  loss: 3.3893 (3.2361)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6981 (0.7229)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [163]  [1200/2502]  eta: 0:12:54  lr: 0.001922  min_lr: 0.001922  loss: 3.3038 (3.2365)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7163 (0.7178)  time: 0.5987  data: 0.0004  max mem: 53905
Epoch: [163]  [1400/2502]  eta: 0:10:54  lr: 0.001921  min_lr: 0.001921  loss: 2.8720 (3.2295)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6648 (0.7155)  time: 0.5904  data: 0.0003  max mem: 53905
Epoch: [163]  [1600/2502]  eta: 0:08:55  lr: 0.001919  min_lr: 0.001919  loss: 3.4289 (3.2303)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7465 (0.7157)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [163]  [1800/2502]  eta: 0:06:56  lr: 0.001917  min_lr: 0.001917  loss: 3.3254 (3.2301)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6963 (0.7180)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [163]  [2000/2502]  eta: 0:04:57  lr: 0.001915  min_lr: 0.001915  loss: 3.2471 (3.2296)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6936 (0.7167)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [163]  [2200/2502]  eta: 0:02:59  lr: 0.001913  min_lr: 0.001913  loss: 3.3276 (3.2243)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7314 (0.7184)  time: 0.5901  data: 0.0004  max mem: 53905
Epoch: [163]  [2400/2502]  eta: 0:01:00  lr: 0.001912  min_lr: 0.001912  loss: 2.9842 (3.2335)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7035 (0.7172)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [163]  [2501/2502]  eta: 0:00:00  lr: 0.001911  min_lr: 0.001911  loss: 3.1390 (3.2343)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7319 (0.7179)  time: 0.5326  data: 0.0006  max mem: 53905
Epoch: [163] Total time: 0:24:43 (0.5929 s / it)
Averaged stats: lr: 0.001911  min_lr: 0.001911  loss: 3.1390 (3.2382)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7319 (0.7179)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.5838 (0.5838)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 3.0081  data: 2.7016  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8276 (0.8509)  acc1: 88.8000 (86.1091)  acc5: 97.6000 (97.4182)  time: 0.5698  data: 0.2843  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8757 (0.8732)  acc1: 84.0000 (84.9905)  acc5: 97.2000 (97.5429)  time: 0.3053  data: 0.0215  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9841 (0.9648)  acc1: 80.4000 (83.0065)  acc5: 96.8000 (96.3226)  time: 0.2845  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2588 (1.0352)  acc1: 76.0000 (80.9366)  acc5: 93.2000 (95.5805)  time: 0.2843  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2295 (1.0500)  acc1: 75.6000 (80.2560)  acc5: 93.6000 (95.4960)  time: 0.2842  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3495 s / it)
* Acc@1 80.082 Acc@5 95.512 loss 1.059
Accuracy of the model on the 50000 test images: 80.1%
Max accuracy: 80.08%
Epoch: [164]  [   0/2502]  eta: 1:45:21  lr: 0.001911  min_lr: 0.001911  loss: 3.4901 (3.4901)  weight_decay: 0.0500 (0.0500)  time: 2.5265  data: 1.9270  max mem: 53905
Epoch: [164]  [ 200/2502]  eta: 0:23:05  lr: 0.001909  min_lr: 0.001909  loss: 3.5128 (3.1761)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7969 (0.7336)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [164]  [ 400/2502]  eta: 0:20:56  lr: 0.001907  min_lr: 0.001907  loss: 3.5619 (3.2223)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6615 (0.7437)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [164]  [ 600/2502]  eta: 0:18:55  lr: 0.001905  min_lr: 0.001905  loss: 3.5121 (3.2350)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7424 (0.7442)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [164]  [ 800/2502]  eta: 0:16:53  lr: 0.001904  min_lr: 0.001904  loss: 3.2236 (3.2356)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7317 (0.7470)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [164]  [1000/2502]  eta: 0:14:53  lr: 0.001902  min_lr: 0.001902  loss: 3.1762 (3.2372)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6996 (0.7370)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [164]  [1200/2502]  eta: 0:12:53  lr: 0.001900  min_lr: 0.001900  loss: 3.4506 (3.2399)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6998 (0.7332)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [164]  [1400/2502]  eta: 0:10:54  lr: 0.001898  min_lr: 0.001898  loss: 3.1795 (3.2424)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7011 (0.7342)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [164]  [1600/2502]  eta: 0:08:55  lr: 0.001896  min_lr: 0.001896  loss: 3.3911 (3.2376)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6959 (0.7314)  time: 0.5961  data: 0.0004  max mem: 53905
Epoch: [164]  [1800/2502]  eta: 0:06:56  lr: 0.001895  min_lr: 0.001895  loss: 3.1089 (3.2330)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7177 (0.7345)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [164]  [2000/2502]  eta: 0:04:58  lr: 0.001893  min_lr: 0.001893  loss: 3.2187 (3.2364)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7340 (0.7313)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [164]  [2200/2502]  eta: 0:02:59  lr: 0.001891  min_lr: 0.001891  loss: 3.0118 (3.2336)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6962 (0.7310)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [164]  [2400/2502]  eta: 0:01:00  lr: 0.001889  min_lr: 0.001889  loss: 3.5061 (3.2345)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7908 (0.7341)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [164]  [2501/2502]  eta: 0:00:00  lr: 0.001888  min_lr: 0.001888  loss: 3.2901 (3.2361)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7093 (0.7350)  time: 0.5409  data: 0.0006  max mem: 53905
Epoch: [164] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.001888  min_lr: 0.001888  loss: 3.2901 (3.2393)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7093 (0.7350)
Test:  [ 0/50]  eta: 0:02:46  loss: 0.5612 (0.5612)  acc1: 93.6000 (93.6000)  acc5: 99.2000 (99.2000)  time: 3.3247  data: 3.0099  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8779 (0.9244)  acc1: 84.8000 (85.2000)  acc5: 98.0000 (97.2000)  time: 0.5954  data: 0.3108  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9408 (0.9402)  acc1: 83.6000 (84.2476)  acc5: 97.6000 (97.2571)  time: 0.3025  data: 0.0206  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0339 (1.0378)  acc1: 78.8000 (82.0774)  acc5: 96.4000 (96.0774)  time: 0.2826  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2485 (1.0856)  acc1: 76.0000 (80.4878)  acc5: 94.0000 (95.5707)  time: 0.2825  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2165 (1.0983)  acc1: 78.0000 (80.0080)  acc5: 94.0000 (95.4800)  time: 0.2823  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3535 s / it)
* Acc@1 80.074 Acc@5 95.458 loss 1.101
Accuracy of the model on the 50000 test images: 80.1%
Max accuracy: 80.08%
Epoch: [165]  [   0/2502]  eta: 1:55:52  lr: 0.001888  min_lr: 0.001888  loss: 2.9175 (2.9175)  weight_decay: 0.0500 (0.0500)  time: 2.7786  data: 1.6895  max mem: 53905
Epoch: [165]  [ 200/2502]  eta: 0:23:10  lr: 0.001887  min_lr: 0.001887  loss: 2.9350 (3.2239)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7102 (0.7229)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [165]  [ 400/2502]  eta: 0:20:58  lr: 0.001885  min_lr: 0.001885  loss: 3.4131 (3.2404)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7205 (0.7177)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [165]  [ 600/2502]  eta: 0:18:55  lr: 0.001883  min_lr: 0.001883  loss: 3.2621 (3.2230)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7016 (0.7127)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [165]  [ 800/2502]  eta: 0:16:54  lr: 0.001881  min_lr: 0.001881  loss: 3.5483 (3.2270)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7203 (0.7139)  time: 0.5934  data: 0.0004  max mem: 53905
Epoch: [165]  [1000/2502]  eta: 0:14:54  lr: 0.001879  min_lr: 0.001879  loss: 2.9992 (3.2117)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7892 (0.7225)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [165]  [1200/2502]  eta: 0:12:54  lr: 0.001878  min_lr: 0.001878  loss: 3.3705 (3.2193)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7278 (0.7210)  time: 0.5923  data: 0.0006  max mem: 53905
Epoch: [165]  [1400/2502]  eta: 0:10:55  lr: 0.001876  min_lr: 0.001876  loss: 3.4292 (3.2236)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7479 (0.7283)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [165]  [1600/2502]  eta: 0:08:56  lr: 0.001874  min_lr: 0.001874  loss: 3.2775 (3.2262)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7232 (0.7291)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [165]  [1800/2502]  eta: 0:06:57  lr: 0.001872  min_lr: 0.001872  loss: 3.3378 (3.2279)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7613 (0.7289)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [165]  [2000/2502]  eta: 0:04:58  lr: 0.001870  min_lr: 0.001870  loss: 3.2851 (3.2282)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7113 (0.7286)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [165]  [2200/2502]  eta: 0:02:59  lr: 0.001869  min_lr: 0.001869  loss: 3.5438 (3.2331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7436 (0.7312)  time: 0.6001  data: 0.0006  max mem: 53905
Epoch: [165]  [2400/2502]  eta: 0:01:00  lr: 0.001867  min_lr: 0.001867  loss: 3.4281 (3.2325)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6995 (0.7310)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [165]  [2501/2502]  eta: 0:00:00  lr: 0.001866  min_lr: 0.001866  loss: 3.5101 (3.2325)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7459 (0.7322)  time: 0.5318  data: 0.0008  max mem: 53905
Epoch: [165] Total time: 0:24:47 (0.5943 s / it)
Averaged stats: lr: 0.001866  min_lr: 0.001866  loss: 3.5101 (3.2266)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7459 (0.7322)
Test:  [ 0/50]  eta: 0:05:14  loss: 0.6737 (0.6737)  acc1: 94.0000 (94.0000)  acc5: 98.4000 (98.4000)  time: 6.2873  data: 5.9743  max mem: 53905
Test:  [10/50]  eta: 0:00:33  loss: 0.8946 (0.9412)  acc1: 87.2000 (85.0909)  acc5: 98.4000 (97.4182)  time: 0.8302  data: 0.5437  max mem: 53905
Test:  [20/50]  eta: 0:00:19  loss: 0.9170 (0.9567)  acc1: 83.6000 (84.2667)  acc5: 97.6000 (97.3905)  time: 0.3522  data: 0.0606  max mem: 53905
Test:  [30/50]  eta: 0:00:11  loss: 1.1125 (1.0678)  acc1: 79.6000 (81.9226)  acc5: 95.6000 (96.0516)  time: 0.4485  data: 0.1490  max mem: 53905
Test:  [40/50]  eta: 0:00:05  loss: 1.2753 (1.1239)  acc1: 75.2000 (80.4195)  acc5: 92.8000 (95.3951)  time: 0.3798  data: 0.0888  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2753 (1.1413)  acc1: 74.4000 (79.8080)  acc5: 93.6000 (95.3040)  time: 0.3718  data: 0.0885  max mem: 53905
Test: Total time: 0:00:23 (0.4716 s / it)
* Acc@1 79.972 Acc@5 95.470 loss 1.140
Accuracy of the model on the 50000 test images: 80.0%
Max accuracy: 80.08%
Epoch: [166]  [   0/2502]  eta: 2:27:55  lr: 0.001866  min_lr: 0.001866  loss: 3.4972 (3.4972)  weight_decay: 0.0500 (0.0500)  time: 3.5474  data: 2.9064  max mem: 53905
Epoch: [166]  [ 200/2502]  eta: 0:23:15  lr: 0.001864  min_lr: 0.001864  loss: 3.3946 (3.1878)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6767 (0.7196)  time: 0.5913  data: 0.0007  max mem: 53905
Epoch: [166]  [ 400/2502]  eta: 0:21:03  lr: 0.001862  min_lr: 0.001862  loss: 3.4566 (3.2466)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7625 (0.7377)  time: 0.5922  data: 0.0007  max mem: 53905
Epoch: [166]  [ 600/2502]  eta: 0:18:58  lr: 0.001861  min_lr: 0.001861  loss: 3.3703 (3.2287)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7125 (0.7409)  time: 0.5925  data: 0.0009  max mem: 53905
Epoch: [166]  [ 800/2502]  eta: 0:16:56  lr: 0.001859  min_lr: 0.001859  loss: 3.1618 (3.2210)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7232 (0.7343)  time: 0.5962  data: 0.0007  max mem: 53905
Epoch: [166]  [1000/2502]  eta: 0:14:56  lr: 0.001857  min_lr: 0.001857  loss: 3.4333 (3.2349)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6970 (0.7269)  time: 0.5937  data: 0.0006  max mem: 53905
Epoch: [166]  [1200/2502]  eta: 0:12:56  lr: 0.001855  min_lr: 0.001855  loss: 3.1559 (3.2343)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7353 (0.7306)  time: 0.5926  data: 0.0007  max mem: 53905
Epoch: [166]  [1400/2502]  eta: 0:10:56  lr: 0.001853  min_lr: 0.001853  loss: 3.4172 (3.2336)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6885 (0.7337)  time: 0.5988  data: 0.0007  max mem: 53905
Epoch: [166]  [1600/2502]  eta: 0:08:57  lr: 0.001852  min_lr: 0.001852  loss: 3.3760 (3.2262)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7308 (0.7337)  time: 0.5967  data: 0.0006  max mem: 53905
Epoch: [166]  [1800/2502]  eta: 0:06:57  lr: 0.001850  min_lr: 0.001850  loss: 3.4017 (3.2293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6548 (0.7311)  time: 0.5925  data: 0.0006  max mem: 53905
Epoch: [166]  [2000/2502]  eta: 0:04:58  lr: 0.001848  min_lr: 0.001848  loss: 3.3631 (3.2300)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7038 (0.7308)  time: 0.6047  data: 0.0006  max mem: 53905
Epoch: [166]  [2200/2502]  eta: 0:02:59  lr: 0.001846  min_lr: 0.001846  loss: 3.2676 (3.2302)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7603 (0.7330)  time: 0.5923  data: 0.0006  max mem: 53905
Epoch: [166]  [2400/2502]  eta: 0:01:00  lr: 0.001845  min_lr: 0.001845  loss: 3.4182 (3.2363)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6999 (0.7333)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [166]  [2501/2502]  eta: 0:00:00  lr: 0.001844  min_lr: 0.001844  loss: 3.2837 (3.2354)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7263 (0.7331)  time: 0.5386  data: 0.0009  max mem: 53905
Epoch: [166] Total time: 0:24:47 (0.5946 s / it)
Averaged stats: lr: 0.001844  min_lr: 0.001844  loss: 3.2837 (3.2309)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7263 (0.7331)
Test:  [ 0/50]  eta: 0:02:51  loss: 0.4990 (0.4990)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.4287  data: 3.1098  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8448 (0.8432)  acc1: 84.8000 (85.4182)  acc5: 98.0000 (97.5636)  time: 0.5738  data: 0.2863  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8524 (0.8706)  acc1: 82.8000 (84.5143)  acc5: 97.6000 (97.5238)  time: 0.2869  data: 0.0022  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9982 (0.9602)  acc1: 79.6000 (82.3613)  acc5: 95.6000 (96.2581)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1848 (1.0154)  acc1: 76.0000 (80.7024)  acc5: 93.6000 (95.6488)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1848 (1.0332)  acc1: 75.2000 (80.1040)  acc5: 94.4000 (95.6240)  time: 0.2846  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3504 s / it)
* Acc@1 80.138 Acc@5 95.490 loss 1.036
Accuracy of the model on the 50000 test images: 80.1%
Max accuracy: 80.14%
Epoch: [167]  [   0/2502]  eta: 1:42:13  lr: 0.001844  min_lr: 0.001844  loss: 3.1140 (3.1140)  weight_decay: 0.0500 (0.0500)  time: 2.4515  data: 1.8570  max mem: 53905
Epoch: [167]  [ 200/2502]  eta: 0:23:06  lr: 0.001842  min_lr: 0.001842  loss: 3.3910 (3.2117)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6785 (0.7322)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [167]  [ 400/2502]  eta: 0:20:55  lr: 0.001840  min_lr: 0.001840  loss: 3.4004 (3.2345)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7262 (0.7279)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [167]  [ 600/2502]  eta: 0:18:54  lr: 0.001838  min_lr: 0.001838  loss: 3.2655 (3.2402)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6749 (0.7207)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [167]  [ 800/2502]  eta: 0:16:53  lr: 0.001836  min_lr: 0.001836  loss: 3.3456 (3.2351)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7181 (0.7254)  time: 0.5947  data: 0.0006  max mem: 53905
Epoch: [167]  [1000/2502]  eta: 0:14:53  lr: 0.001835  min_lr: 0.001835  loss: 3.4343 (3.2335)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7200 (0.7296)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [167]  [1200/2502]  eta: 0:12:54  lr: 0.001833  min_lr: 0.001833  loss: 3.4420 (3.2332)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7244 (nan)  time: 0.5906  data: 0.0007  max mem: 53905
Epoch: [167]  [1400/2502]  eta: 0:10:54  lr: 0.001831  min_lr: 0.001831  loss: 3.2542 (3.2302)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6780 (nan)  time: 0.5918  data: 0.0008  max mem: 53905
Epoch: [167]  [1600/2502]  eta: 0:08:55  lr: 0.001829  min_lr: 0.001829  loss: 3.2219 (3.2304)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8213 (nan)  time: 0.5930  data: 0.0007  max mem: 53905
Epoch: [167]  [1800/2502]  eta: 0:06:56  lr: 0.001828  min_lr: 0.001828  loss: 3.2170 (3.2357)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6914 (nan)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [167]  [2000/2502]  eta: 0:04:58  lr: 0.001826  min_lr: 0.001826  loss: 3.3605 (3.2397)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7562 (nan)  time: 0.5916  data: 0.0008  max mem: 53905
Epoch: [167]  [2200/2502]  eta: 0:02:59  lr: 0.001824  min_lr: 0.001824  loss: 3.0938 (3.2379)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7339 (nan)  time: 0.5909  data: 0.0006  max mem: 53905
Epoch: [167]  [2400/2502]  eta: 0:01:00  lr: 0.001822  min_lr: 0.001822  loss: 3.1817 (3.2316)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7172 (nan)  time: 0.5947  data: 0.0005  max mem: 53905
Epoch: [167]  [2501/2502]  eta: 0:00:00  lr: 0.001821  min_lr: 0.001821  loss: 3.2574 (3.2333)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7220 (nan)  time: 0.5323  data: 0.0007  max mem: 53905
Epoch: [167] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.001821  min_lr: 0.001821  loss: 3.2574 (3.2202)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7220 (nan)
Test:  [ 0/50]  eta: 0:02:49  loss: 0.5384 (0.5384)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 3.3920  data: 3.0687  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8888 (0.8526)  acc1: 85.2000 (85.7818)  acc5: 98.0000 (97.4545)  time: 0.5644  data: 0.2795  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8946 (0.8760)  acc1: 84.8000 (84.7810)  acc5: 97.6000 (97.3905)  time: 0.2817  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0106 (0.9847)  acc1: 80.4000 (82.3742)  acc5: 96.0000 (96.2452)  time: 0.2818  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2510 (1.0557)  acc1: 75.2000 (80.4683)  acc5: 93.2000 (95.4537)  time: 0.2816  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2236 (1.0719)  acc1: 75.2000 (79.8560)  acc5: 94.0000 (95.4240)  time: 0.2816  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3462 s / it)
* Acc@1 79.954 Acc@5 95.472 loss 1.068
Accuracy of the model on the 50000 test images: 80.0%
Max accuracy: 80.14%
Epoch: [168]  [   0/2502]  eta: 1:44:30  lr: 0.001821  min_lr: 0.001821  loss: 3.4176 (3.4176)  weight_decay: 0.0500 (0.0500)  time: 2.5062  data: 1.8029  max mem: 53905
Epoch: [168]  [ 200/2502]  eta: 0:23:06  lr: 0.001819  min_lr: 0.001819  loss: 3.2247 (3.2312)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7423 (0.7298)  time: 0.5972  data: 0.0005  max mem: 53905
Epoch: [168]  [ 400/2502]  eta: 0:20:56  lr: 0.001818  min_lr: 0.001818  loss: 3.2662 (3.2038)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7607 (0.7391)  time: 0.5914  data: 0.0008  max mem: 53905
Epoch: [168]  [ 600/2502]  eta: 0:18:53  lr: 0.001816  min_lr: 0.001816  loss: 3.1649 (3.2025)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7023 (0.7378)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [168]  [ 800/2502]  eta: 0:16:53  lr: 0.001814  min_lr: 0.001814  loss: 3.1194 (3.1988)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7391 (0.7383)  time: 0.5990  data: 0.0005  max mem: 53905
Epoch: [168]  [1000/2502]  eta: 0:14:53  lr: 0.001812  min_lr: 0.001812  loss: 3.3300 (3.1937)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7244 (nan)  time: 0.5968  data: 0.0005  max mem: 53905
Epoch: [168]  [1200/2502]  eta: 0:12:54  lr: 0.001811  min_lr: 0.001811  loss: 3.3128 (3.2102)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6711 (nan)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [168]  [1400/2502]  eta: 0:10:55  lr: 0.001809  min_lr: 0.001809  loss: 3.2409 (3.2173)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6731 (nan)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [168]  [1600/2502]  eta: 0:08:55  lr: 0.001807  min_lr: 0.001807  loss: 3.4656 (3.2241)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7208 (nan)  time: 0.5973  data: 0.0007  max mem: 53905
Epoch: [168]  [1800/2502]  eta: 0:06:57  lr: 0.001805  min_lr: 0.001805  loss: 3.4241 (3.2224)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7272 (nan)  time: 0.6037  data: 0.0006  max mem: 53905
Epoch: [168]  [2000/2502]  eta: 0:04:58  lr: 0.001803  min_lr: 0.001803  loss: 3.1378 (3.2260)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7248 (nan)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [168]  [2200/2502]  eta: 0:02:59  lr: 0.001802  min_lr: 0.001802  loss: 3.4720 (3.2278)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7229 (nan)  time: 0.5932  data: 0.0008  max mem: 53905
Epoch: [168]  [2400/2502]  eta: 0:01:00  lr: 0.001800  min_lr: 0.001800  loss: 3.2831 (3.2253)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7365 (nan)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [168]  [2501/2502]  eta: 0:00:00  lr: 0.001799  min_lr: 0.001799  loss: 3.4132 (3.2277)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7319 (nan)  time: 0.5322  data: 0.0007  max mem: 53905
Epoch: [168] Total time: 0:24:45 (0.5936 s / it)
Averaged stats: lr: 0.001799  min_lr: 0.001799  loss: 3.4132 (3.2199)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7319 (nan)
Test:  [ 0/50]  eta: 0:02:39  loss: 0.6342 (0.6342)  acc1: 92.0000 (92.0000)  acc5: 99.2000 (99.2000)  time: 3.1921  data: 2.8553  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8846 (0.9023)  acc1: 85.6000 (85.1636)  acc5: 98.4000 (97.5273)  time: 0.5998  data: 0.3136  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9312 (0.9192)  acc1: 82.8000 (84.2857)  acc5: 97.6000 (97.5048)  time: 0.3112  data: 0.0300  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0285 (1.0208)  acc1: 79.6000 (82.1548)  acc5: 96.0000 (96.1290)  time: 0.2820  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2867 (1.0867)  acc1: 76.0000 (80.4878)  acc5: 93.2000 (95.4927)  time: 0.2817  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2713 (1.1030)  acc1: 75.6000 (79.9120)  acc5: 93.6000 (95.4800)  time: 0.2814  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3540 s / it)
* Acc@1 80.074 Acc@5 95.468 loss 1.105
Accuracy of the model on the 50000 test images: 80.1%
Max accuracy: 80.14%
Epoch: [169]  [   0/2502]  eta: 1:52:30  lr: 0.001799  min_lr: 0.001799  loss: 2.7705 (2.7705)  weight_decay: 0.0500 (0.0500)  time: 2.6979  data: 1.5421  max mem: 53905
Epoch: [169]  [ 200/2502]  eta: 0:23:07  lr: 0.001797  min_lr: 0.001797  loss: 3.1109 (3.1668)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7217 (0.7356)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [169]  [ 400/2502]  eta: 0:20:55  lr: 0.001795  min_lr: 0.001795  loss: 3.4044 (3.2075)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7417 (0.7504)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [169]  [ 600/2502]  eta: 0:18:53  lr: 0.001794  min_lr: 0.001794  loss: 3.3883 (3.1902)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7526 (0.7476)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [169]  [ 800/2502]  eta: 0:16:53  lr: 0.001792  min_lr: 0.001792  loss: 3.2915 (3.1931)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7315 (0.7466)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [169]  [1000/2502]  eta: 0:14:53  lr: 0.001790  min_lr: 0.001790  loss: 3.1450 (3.1934)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7983 (0.7530)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [169]  [1200/2502]  eta: 0:12:53  lr: 0.001788  min_lr: 0.001788  loss: 3.3655 (3.1929)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7397 (0.7535)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [169]  [1400/2502]  eta: 0:10:54  lr: 0.001786  min_lr: 0.001786  loss: 3.4385 (3.1942)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6961 (0.7507)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [169]  [1600/2502]  eta: 0:08:55  lr: 0.001785  min_lr: 0.001785  loss: 3.2849 (3.2023)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7263 (0.7489)  time: 0.5903  data: 0.0005  max mem: 53905
Epoch: [169]  [1800/2502]  eta: 0:06:56  lr: 0.001783  min_lr: 0.001783  loss: 3.3707 (3.2032)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7890 (0.7492)  time: 0.5959  data: 0.0004  max mem: 53905
Epoch: [169]  [2000/2502]  eta: 0:04:57  lr: 0.001781  min_lr: 0.001781  loss: 3.5047 (3.2030)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6954 (0.7498)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [169]  [2200/2502]  eta: 0:02:59  lr: 0.001779  min_lr: 0.001779  loss: 3.1106 (3.1994)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7057 (0.7480)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [169]  [2400/2502]  eta: 0:01:00  lr: 0.001778  min_lr: 0.001778  loss: 3.4191 (3.2071)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7139 (0.7477)  time: 0.5982  data: 0.0005  max mem: 53905
Epoch: [169]  [2501/2502]  eta: 0:00:00  lr: 0.001777  min_lr: 0.001777  loss: 3.3804 (3.2066)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7681 (0.7481)  time: 0.5326  data: 0.0007  max mem: 53905
Epoch: [169] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.001777  min_lr: 0.001777  loss: 3.3804 (3.2043)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7681 (0.7481)
Test:  [ 0/50]  eta: 0:03:05  loss: 0.5886 (0.5886)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.7079  data: 3.3962  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8964 (0.8883)  acc1: 85.6000 (85.7818)  acc5: 98.0000 (97.4546)  time: 0.5949  data: 0.3092  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8964 (0.9200)  acc1: 83.2000 (84.5714)  acc5: 97.6000 (97.3905)  time: 0.2839  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0387 (1.0146)  acc1: 80.8000 (82.6194)  acc5: 95.2000 (96.0645)  time: 0.2842  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2392 (1.0804)  acc1: 75.2000 (80.7805)  acc5: 92.8000 (95.3951)  time: 0.2840  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2187 (1.0903)  acc1: 75.2000 (80.2400)  acc5: 93.6000 (95.3360)  time: 0.2838  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3551 s / it)
* Acc@1 80.112 Acc@5 95.488 loss 1.092
Accuracy of the model on the 50000 test images: 80.1%
Max accuracy: 80.14%
Epoch: [170]  [   0/2502]  eta: 1:54:33  lr: 0.001777  min_lr: 0.001777  loss: 4.0943 (4.0943)  weight_decay: 0.0500 (0.0500)  time: 2.7473  data: 2.1542  max mem: 53905
Epoch: [170]  [ 200/2502]  eta: 0:23:09  lr: 0.001775  min_lr: 0.001775  loss: 3.3153 (3.2089)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7372 (0.7390)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [170]  [ 400/2502]  eta: 0:20:57  lr: 0.001773  min_lr: 0.001773  loss: 3.1263 (3.2038)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6870 (0.7394)  time: 0.5986  data: 0.0004  max mem: 53905
Epoch: [170]  [ 600/2502]  eta: 0:18:54  lr: 0.001771  min_lr: 0.001771  loss: 3.4243 (3.1916)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6905 (0.7386)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [170]  [ 800/2502]  eta: 0:16:53  lr: 0.001769  min_lr: 0.001769  loss: 3.4189 (3.2021)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7152 (0.7377)  time: 0.5933  data: 0.0004  max mem: 53905
Epoch: [170]  [1000/2502]  eta: 0:14:54  lr: 0.001768  min_lr: 0.001768  loss: 3.1528 (3.1989)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7109 (0.7379)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [170]  [1200/2502]  eta: 0:12:54  lr: 0.001766  min_lr: 0.001766  loss: 3.1727 (3.2046)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7231 (0.7382)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [170]  [1400/2502]  eta: 0:10:55  lr: 0.001764  min_lr: 0.001764  loss: 3.2100 (3.2054)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7282 (0.7421)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [170]  [1600/2502]  eta: 0:08:56  lr: 0.001762  min_lr: 0.001762  loss: 3.3143 (3.2083)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7453 (0.7436)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [170]  [1800/2502]  eta: 0:06:57  lr: 0.001761  min_lr: 0.001761  loss: 3.2228 (3.2035)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7070 (0.7412)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [170]  [2000/2502]  eta: 0:04:58  lr: 0.001759  min_lr: 0.001759  loss: 3.3598 (3.2142)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7362 (0.7436)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [170]  [2200/2502]  eta: 0:02:59  lr: 0.001757  min_lr: 0.001757  loss: 3.2712 (3.2133)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7669 (0.7457)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [170]  [2400/2502]  eta: 0:01:00  lr: 0.001755  min_lr: 0.001755  loss: 3.5233 (3.2172)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7056 (0.7456)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [170]  [2501/2502]  eta: 0:00:00  lr: 0.001754  min_lr: 0.001754  loss: 3.3174 (3.2170)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7042 (0.7446)  time: 0.5333  data: 0.0008  max mem: 53905
Epoch: [170] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.001754  min_lr: 0.001754  loss: 3.3174 (3.2035)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7042 (0.7446)
Test:  [ 0/50]  eta: 0:02:53  loss: 0.6754 (0.6754)  acc1: 92.0000 (92.0000)  acc5: 98.8000 (98.8000)  time: 3.4620  data: 3.1439  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8811 (0.9098)  acc1: 84.8000 (85.3818)  acc5: 97.6000 (97.3091)  time: 0.5882  data: 0.3019  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8860 (0.9245)  acc1: 83.2000 (84.5524)  acc5: 97.2000 (97.2381)  time: 0.2924  data: 0.0091  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0739 (1.0145)  acc1: 80.0000 (82.6452)  acc5: 95.2000 (96.0516)  time: 0.2839  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2450 (1.0705)  acc1: 76.4000 (81.0341)  acc5: 93.6000 (95.5902)  time: 0.2854  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2415 (1.0876)  acc1: 74.8000 (80.3360)  acc5: 94.8000 (95.5600)  time: 0.2905  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3556 s / it)
* Acc@1 80.570 Acc@5 95.650 loss 1.083
Accuracy of the model on the 50000 test images: 80.6%
Max accuracy: 80.57%
Epoch: [171]  [   0/2502]  eta: 1:52:30  lr: 0.001754  min_lr: 0.001754  loss: 3.8288 (3.8288)  weight_decay: 0.0500 (0.0500)  time: 2.6980  data: 2.1051  max mem: 53905
Epoch: [171]  [ 200/2502]  eta: 0:23:09  lr: 0.001753  min_lr: 0.001753  loss: 3.1246 (3.1998)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7428 (0.7348)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [171]  [ 400/2502]  eta: 0:20:56  lr: 0.001751  min_lr: 0.001751  loss: 3.3925 (3.2155)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7441 (0.7518)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [171]  [ 600/2502]  eta: 0:18:53  lr: 0.001749  min_lr: 0.001749  loss: 3.1608 (3.2301)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7517 (0.7566)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [171]  [ 800/2502]  eta: 0:16:53  lr: 0.001747  min_lr: 0.001747  loss: 3.0557 (3.2184)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7629 (0.7693)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [171]  [1000/2502]  eta: 0:14:53  lr: 0.001745  min_lr: 0.001745  loss: 3.3132 (3.2125)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7745 (0.7633)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [171]  [1200/2502]  eta: 0:12:54  lr: 0.001744  min_lr: 0.001744  loss: 3.3861 (3.2069)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7286 (0.7612)  time: 0.5991  data: 0.0005  max mem: 53905
Epoch: [171]  [1400/2502]  eta: 0:10:55  lr: 0.001742  min_lr: 0.001742  loss: 3.3097 (3.2108)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7069 (0.7607)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [171]  [1600/2502]  eta: 0:08:56  lr: 0.001740  min_lr: 0.001740  loss: 3.2836 (3.2029)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7743 (0.7605)  time: 0.5986  data: 0.0005  max mem: 53905
Epoch: [171]  [1800/2502]  eta: 0:06:57  lr: 0.001738  min_lr: 0.001738  loss: 3.2411 (3.2099)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7283 (0.7618)  time: 0.5902  data: 0.0004  max mem: 53905
Epoch: [171]  [2000/2502]  eta: 0:04:58  lr: 0.001737  min_lr: 0.001737  loss: 3.1998 (3.2109)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7718 (0.7624)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [171]  [2200/2502]  eta: 0:02:59  lr: 0.001735  min_lr: 0.001735  loss: 3.3522 (3.2100)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7501 (0.7607)  time: 0.5904  data: 0.0005  max mem: 53905
Epoch: [171]  [2400/2502]  eta: 0:01:00  lr: 0.001733  min_lr: 0.001733  loss: 3.0989 (3.2079)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7474 (0.7592)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [171]  [2501/2502]  eta: 0:00:00  lr: 0.001732  min_lr: 0.001732  loss: 3.4372 (3.2112)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7333 (0.7602)  time: 0.5336  data: 0.0008  max mem: 53905
Epoch: [171] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.001732  min_lr: 0.001732  loss: 3.4372 (3.2023)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7333 (0.7602)
Test:  [ 0/50]  eta: 0:02:51  loss: 0.5055 (0.5055)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 3.4258  data: 3.0975  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8359 (0.8377)  acc1: 86.8000 (85.9636)  acc5: 97.6000 (97.2364)  time: 0.5940  data: 0.3064  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8615 (0.8801)  acc1: 83.6000 (84.7619)  acc5: 97.2000 (97.2571)  time: 0.2977  data: 0.0139  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0073 (0.9751)  acc1: 80.8000 (82.6968)  acc5: 95.2000 (95.9871)  time: 0.2846  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2112 (1.0255)  acc1: 78.0000 (81.3171)  acc5: 93.6000 (95.5610)  time: 0.2844  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1959 (1.0401)  acc1: 77.2000 (80.4560)  acc5: 94.0000 (95.4960)  time: 0.2841  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3549 s / it)
* Acc@1 80.352 Acc@5 95.600 loss 1.043
Accuracy of the model on the 50000 test images: 80.4%
Max accuracy: 80.57%
Epoch: [172]  [   0/2502]  eta: 1:53:30  lr: 0.001732  min_lr: 0.001732  loss: 3.0053 (3.0053)  weight_decay: 0.0500 (0.0500)  time: 2.7220  data: 1.6640  max mem: 53905
Epoch: [172]  [ 200/2502]  eta: 0:23:10  lr: 0.001730  min_lr: 0.001730  loss: 3.5094 (3.1377)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7379 (0.7579)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [172]  [ 400/2502]  eta: 0:20:59  lr: 0.001729  min_lr: 0.001729  loss: 3.3059 (3.1597)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6754 (0.7429)  time: 0.5934  data: 0.0005  max mem: 53905
Epoch: [172]  [ 600/2502]  eta: 0:18:55  lr: 0.001727  min_lr: 0.001727  loss: 3.3975 (3.1750)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7061 (0.7418)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [172]  [ 800/2502]  eta: 0:16:54  lr: 0.001725  min_lr: 0.001725  loss: 3.1909 (3.1734)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7407 (0.7440)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [172]  [1000/2502]  eta: 0:14:54  lr: 0.001723  min_lr: 0.001723  loss: 3.3824 (3.1793)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8093 (0.7537)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [172]  [1200/2502]  eta: 0:12:54  lr: 0.001721  min_lr: 0.001721  loss: 3.4287 (3.1856)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7529 (0.7554)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [172]  [1400/2502]  eta: 0:10:55  lr: 0.001720  min_lr: 0.001720  loss: 3.5439 (3.1903)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7984 (0.7572)  time: 0.5996  data: 0.0004  max mem: 53905
Epoch: [172]  [1600/2502]  eta: 0:08:56  lr: 0.001718  min_lr: 0.001718  loss: 3.5461 (3.1937)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7653 (0.7647)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [172]  [1800/2502]  eta: 0:06:57  lr: 0.001716  min_lr: 0.001716  loss: 2.8806 (3.1911)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6976 (0.7667)  time: 0.5956  data: 0.0005  max mem: 53905
Epoch: [172]  [2000/2502]  eta: 0:04:58  lr: 0.001714  min_lr: 0.001714  loss: 2.6469 (3.1856)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7581 (0.7673)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [172]  [2200/2502]  eta: 0:02:59  lr: 0.001713  min_lr: 0.001713  loss: 3.2738 (3.1904)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7859 (0.7693)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [172]  [2400/2502]  eta: 0:01:00  lr: 0.001711  min_lr: 0.001711  loss: 3.2118 (3.1953)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7506 (0.7682)  time: 0.5968  data: 0.0005  max mem: 53905
Epoch: [172]  [2501/2502]  eta: 0:00:00  lr: 0.001710  min_lr: 0.001710  loss: 3.3058 (3.1967)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7226 (0.7676)  time: 0.5335  data: 0.0008  max mem: 53905
Epoch: [172] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.001710  min_lr: 0.001710  loss: 3.3058 (3.1978)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7226 (0.7676)
Test:  [ 0/50]  eta: 0:02:36  loss: 0.5543 (0.5543)  acc1: 93.2000 (93.2000)  acc5: 98.4000 (98.4000)  time: 3.1272  data: 2.7997  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7818 (0.8438)  acc1: 85.2000 (85.4545)  acc5: 98.0000 (97.4546)  time: 0.5699  data: 0.2824  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8301 (0.8622)  acc1: 83.2000 (84.6476)  acc5: 97.6000 (97.3333)  time: 0.2993  data: 0.0155  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9599 (0.9543)  acc1: 81.2000 (82.6065)  acc5: 95.6000 (96.2452)  time: 0.2844  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2106 (1.0198)  acc1: 76.4000 (80.9951)  acc5: 93.2000 (95.5024)  time: 0.2843  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1797 (1.0347)  acc1: 75.6000 (80.4640)  acc5: 93.6000 (95.3440)  time: 0.2840  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3493 s / it)
* Acc@1 80.374 Acc@5 95.474 loss 1.034
Accuracy of the model on the 50000 test images: 80.4%
Max accuracy: 80.57%
Epoch: [173]  [   0/2502]  eta: 1:55:56  lr: 0.001710  min_lr: 0.001710  loss: 3.4860 (3.4860)  weight_decay: 0.0500 (0.0500)  time: 2.7802  data: 2.1883  max mem: 53905
Epoch: [173]  [ 200/2502]  eta: 0:23:11  lr: 0.001708  min_lr: 0.001708  loss: 3.1288 (3.1093)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7020 (0.7492)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [173]  [ 400/2502]  eta: 0:20:58  lr: 0.001706  min_lr: 0.001706  loss: 3.3662 (3.1530)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7484 (0.7436)  time: 0.5981  data: 0.0004  max mem: 53905
Epoch: [173]  [ 600/2502]  eta: 0:18:56  lr: 0.001705  min_lr: 0.001705  loss: 3.2999 (3.1654)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6985 (0.7498)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [173]  [ 800/2502]  eta: 0:16:54  lr: 0.001703  min_lr: 0.001703  loss: 3.3139 (3.1724)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7056 (0.7439)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [173]  [1000/2502]  eta: 0:14:54  lr: 0.001701  min_lr: 0.001701  loss: 3.2855 (3.1617)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7090 (0.7453)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [173]  [1200/2502]  eta: 0:12:55  lr: 0.001699  min_lr: 0.001699  loss: 3.3502 (3.1637)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8460 (0.7478)  time: 0.5957  data: 0.0005  max mem: 53905
Epoch: [173]  [1400/2502]  eta: 0:10:55  lr: 0.001697  min_lr: 0.001697  loss: 3.1138 (3.1683)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7459 (0.7518)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [173]  [1600/2502]  eta: 0:08:56  lr: 0.001696  min_lr: 0.001696  loss: 3.2950 (3.1775)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7544 (0.7509)  time: 0.5945  data: 0.0005  max mem: 53905
Epoch: [173]  [1800/2502]  eta: 0:06:57  lr: 0.001694  min_lr: 0.001694  loss: 3.2302 (3.1848)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7356 (0.7526)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [173]  [2000/2502]  eta: 0:04:58  lr: 0.001692  min_lr: 0.001692  loss: 3.3830 (3.1861)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7291 (0.7532)  time: 0.5937  data: 0.0005  max mem: 53905
Epoch: [173]  [2200/2502]  eta: 0:02:59  lr: 0.001690  min_lr: 0.001690  loss: 3.3358 (3.1863)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7718 (0.7543)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [173]  [2400/2502]  eta: 0:01:00  lr: 0.001689  min_lr: 0.001689  loss: 3.3804 (3.1889)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7389 (0.7573)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [173]  [2501/2502]  eta: 0:00:00  lr: 0.001688  min_lr: 0.001688  loss: 3.3743 (3.1896)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6955 (0.7560)  time: 0.5332  data: 0.0007  max mem: 53905
Epoch: [173] Total time: 0:24:47 (0.5944 s / it)
Averaged stats: lr: 0.001688  min_lr: 0.001688  loss: 3.3743 (3.1904)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6955 (0.7560)
Test:  [ 0/50]  eta: 0:03:43  loss: 0.4896 (0.4896)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 4.4647  data: 4.1647  max mem: 53905
Test:  [10/50]  eta: 0:00:26  loss: 0.8622 (0.8466)  acc1: 85.2000 (85.5636)  acc5: 98.0000 (97.5273)  time: 0.6646  data: 0.3790  max mem: 53905
Test:  [20/50]  eta: 0:00:14  loss: 0.8755 (0.8763)  acc1: 83.2000 (84.7429)  acc5: 97.2000 (97.4095)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.0071 (0.9747)  acc1: 80.0000 (82.6194)  acc5: 95.2000 (96.1161)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2127 (1.0373)  acc1: 76.4000 (80.9659)  acc5: 93.2000 (95.4927)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2127 (1.0513)  acc1: 76.0000 (80.5840)  acc5: 94.0000 (95.4080)  time: 0.2845  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3715 s / it)
* Acc@1 80.480 Acc@5 95.550 loss 1.049
Accuracy of the model on the 50000 test images: 80.5%
Max accuracy: 80.57%
Epoch: [174]  [   0/2502]  eta: 1:53:10  lr: 0.001688  min_lr: 0.001688  loss: 3.7916 (3.7916)  weight_decay: 0.0500 (0.0500)  time: 2.7140  data: 2.1233  max mem: 53905
Epoch: [174]  [ 200/2502]  eta: 0:23:12  lr: 0.001686  min_lr: 0.001686  loss: 3.4908 (3.2331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7617 (0.8051)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [174]  [ 400/2502]  eta: 0:20:57  lr: 0.001684  min_lr: 0.001684  loss: 3.2998 (3.2281)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7401 (0.7848)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [174]  [ 600/2502]  eta: 0:18:54  lr: 0.001682  min_lr: 0.001682  loss: 3.3257 (3.2041)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7795 (0.7922)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [174]  [ 800/2502]  eta: 0:16:53  lr: 0.001681  min_lr: 0.001681  loss: 3.3043 (3.1919)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7438 (0.7837)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [174]  [1000/2502]  eta: 0:14:53  lr: 0.001679  min_lr: 0.001679  loss: 3.1803 (3.1874)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7562 (0.7874)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [174]  [1200/2502]  eta: 0:12:54  lr: 0.001677  min_lr: 0.001677  loss: 3.1864 (3.1870)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7555 (0.7794)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [174]  [1400/2502]  eta: 0:10:54  lr: 0.001675  min_lr: 0.001675  loss: 3.1699 (3.1843)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7383 (0.7762)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [174]  [1600/2502]  eta: 0:08:55  lr: 0.001674  min_lr: 0.001674  loss: 3.3422 (3.1872)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7868 (0.7790)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [174]  [1800/2502]  eta: 0:06:56  lr: 0.001672  min_lr: 0.001672  loss: 3.4262 (3.1955)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7502 (0.7782)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [174]  [2000/2502]  eta: 0:04:58  lr: 0.001670  min_lr: 0.001670  loss: 3.0843 (3.1983)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [174]  [2200/2502]  eta: 0:02:59  lr: 0.001668  min_lr: 0.001668  loss: 3.2082 (3.1966)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [174]  [2400/2502]  eta: 0:01:00  lr: 0.001666  min_lr: 0.001666  loss: 2.8120 (3.1916)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7464 (nan)  time: 0.5998  data: 0.0004  max mem: 53905
Epoch: [174]  [2501/2502]  eta: 0:00:00  lr: 0.001666  min_lr: 0.001666  loss: 3.3926 (3.1950)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8086 (nan)  time: 0.5336  data: 0.0009  max mem: 53905
Epoch: [174] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.001666  min_lr: 0.001666  loss: 3.3926 (3.1905)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8086 (nan)
Test:  [ 0/50]  eta: 0:02:52  loss: 0.5807 (0.5807)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 3.4450  data: 3.1206  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8435 (0.8718)  acc1: 86.8000 (85.8182)  acc5: 98.0000 (97.2364)  time: 0.5996  data: 0.3121  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9092 (0.9083)  acc1: 82.8000 (84.4191)  acc5: 97.6000 (97.2571)  time: 0.2999  data: 0.0159  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9805 (1.0053)  acc1: 80.8000 (82.2452)  acc5: 95.2000 (96.1032)  time: 0.2847  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2233 (1.0611)  acc1: 76.4000 (80.7220)  acc5: 93.6000 (95.5805)  time: 0.2843  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2039 (1.0827)  acc1: 74.4000 (80.0480)  acc5: 94.4000 (95.5040)  time: 0.2840  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3557 s / it)
* Acc@1 80.386 Acc@5 95.598 loss 1.077
Accuracy of the model on the 50000 test images: 80.4%
Max accuracy: 80.57%
Epoch: [175]  [   0/2502]  eta: 2:14:53  lr: 0.001666  min_lr: 0.001666  loss: 3.4595 (3.4595)  weight_decay: 0.0500 (0.0500)  time: 3.2350  data: 2.0701  max mem: 53905
Epoch: [175]  [ 200/2502]  eta: 0:23:14  lr: 0.001664  min_lr: 0.001664  loss: 3.3080 (3.2264)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7627 (0.7867)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [175]  [ 400/2502]  eta: 0:21:00  lr: 0.001662  min_lr: 0.001662  loss: 3.2177 (3.2195)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7331 (0.7865)  time: 0.6004  data: 0.0004  max mem: 53905
Epoch: [175]  [ 600/2502]  eta: 0:18:57  lr: 0.001660  min_lr: 0.001660  loss: 3.3200 (3.2051)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8001 (0.7878)  time: 0.5939  data: 0.0005  max mem: 53905
Epoch: [175]  [ 800/2502]  eta: 0:16:55  lr: 0.001658  min_lr: 0.001658  loss: 3.1477 (3.1943)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7310 (0.7800)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [175]  [1000/2502]  eta: 0:14:55  lr: 0.001657  min_lr: 0.001657  loss: 3.3434 (3.1898)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7460 (0.7732)  time: 0.5942  data: 0.0007  max mem: 53905
Epoch: [175]  [1200/2502]  eta: 0:12:55  lr: 0.001655  min_lr: 0.001655  loss: 3.3694 (3.1916)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7350 (0.7685)  time: 0.5934  data: 0.0005  max mem: 53905
Epoch: [175]  [1400/2502]  eta: 0:10:56  lr: 0.001653  min_lr: 0.001653  loss: 3.3121 (3.1961)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7374 (0.7700)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [175]  [1600/2502]  eta: 0:08:56  lr: 0.001651  min_lr: 0.001651  loss: 3.3380 (3.2025)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7323 (0.7700)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [175]  [1800/2502]  eta: 0:06:57  lr: 0.001650  min_lr: 0.001650  loss: 3.4928 (3.1995)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7801 (0.7732)  time: 0.5942  data: 0.0005  max mem: 53905
Epoch: [175]  [2000/2502]  eta: 0:04:58  lr: 0.001648  min_lr: 0.001648  loss: 3.4058 (3.1956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7403 (0.7721)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [175]  [2200/2502]  eta: 0:02:59  lr: 0.001646  min_lr: 0.001646  loss: 3.4015 (3.1927)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7577 (0.7747)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [175]  [2400/2502]  eta: 0:01:00  lr: 0.001644  min_lr: 0.001644  loss: 3.2198 (3.1927)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7531 (nan)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [175]  [2501/2502]  eta: 0:00:00  lr: 0.001644  min_lr: 0.001644  loss: 3.1696 (3.1944)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7579 (nan)  time: 0.5325  data: 0.0007  max mem: 53905
Epoch: [175] Total time: 0:24:47 (0.5944 s / it)
Averaged stats: lr: 0.001644  min_lr: 0.001644  loss: 3.1696 (3.1919)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7579 (nan)
Test:  [ 0/50]  eta: 0:02:35  loss: 0.6243 (0.6243)  acc1: 93.6000 (93.6000)  acc5: 99.2000 (99.2000)  time: 3.1102  data: 2.7982  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.9314 (0.9269)  acc1: 85.2000 (85.7091)  acc5: 98.0000 (97.5636)  time: 0.5933  data: 0.3095  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9359 (0.9401)  acc1: 83.2000 (84.4571)  acc5: 97.6000 (97.3143)  time: 0.3117  data: 0.0306  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0666 (1.0375)  acc1: 80.0000 (82.2194)  acc5: 95.6000 (96.1677)  time: 0.2819  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2616 (1.1003)  acc1: 76.0000 (80.4390)  acc5: 93.2000 (95.5707)  time: 0.2817  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2594 (1.1102)  acc1: 75.6000 (80.1040)  acc5: 94.0000 (95.5280)  time: 0.2816  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3526 s / it)
* Acc@1 80.352 Acc@5 95.702 loss 1.108
Accuracy of the model on the 50000 test images: 80.4%
Max accuracy: 80.57%
Epoch: [176]  [   0/2502]  eta: 1:59:06  lr: 0.001643  min_lr: 0.001643  loss: 3.3481 (3.3481)  weight_decay: 0.0500 (0.0500)  time: 2.8564  data: 2.2550  max mem: 53905
Epoch: [176]  [ 200/2502]  eta: 0:23:12  lr: 0.001642  min_lr: 0.001642  loss: 3.2933 (3.1773)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7433 (0.7558)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [176]  [ 400/2502]  eta: 0:20:58  lr: 0.001640  min_lr: 0.001640  loss: 3.3653 (3.1776)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6905 (0.7447)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [176]  [ 600/2502]  eta: 0:18:54  lr: 0.001638  min_lr: 0.001638  loss: 3.2082 (3.1799)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7964 (0.7621)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [176]  [ 800/2502]  eta: 0:16:53  lr: 0.001636  min_lr: 0.001636  loss: 3.4080 (3.1728)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7593 (0.7644)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [176]  [1000/2502]  eta: 0:14:53  lr: 0.001635  min_lr: 0.001635  loss: 3.2945 (3.1722)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7769 (0.7724)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [176]  [1200/2502]  eta: 0:12:54  lr: 0.001633  min_lr: 0.001633  loss: 3.3197 (3.1710)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8885 (0.7790)  time: 0.5995  data: 0.0005  max mem: 53905
Epoch: [176]  [1400/2502]  eta: 0:10:55  lr: 0.001631  min_lr: 0.001631  loss: 3.0349 (3.1670)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7233 (0.7739)  time: 0.5996  data: 0.0006  max mem: 53905
Epoch: [176]  [1600/2502]  eta: 0:08:56  lr: 0.001629  min_lr: 0.001629  loss: 3.4245 (3.1700)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8089 (0.7738)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [176]  [1800/2502]  eta: 0:06:57  lr: 0.001628  min_lr: 0.001628  loss: 3.4602 (3.1709)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7157 (0.7710)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [176]  [2000/2502]  eta: 0:04:58  lr: 0.001626  min_lr: 0.001626  loss: 3.3131 (3.1667)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7293 (0.7714)  time: 0.5981  data: 0.0004  max mem: 53905
Epoch: [176]  [2200/2502]  eta: 0:02:59  lr: 0.001624  min_lr: 0.001624  loss: 3.2074 (3.1638)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7743 (0.7730)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [176]  [2400/2502]  eta: 0:01:00  lr: 0.001622  min_lr: 0.001622  loss: 3.2892 (3.1648)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7350 (0.7718)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [176]  [2501/2502]  eta: 0:00:00  lr: 0.001621  min_lr: 0.001621  loss: 3.0274 (3.1643)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7186 (0.7714)  time: 0.5320  data: 0.0007  max mem: 53905
Epoch: [176] Total time: 0:24:46 (0.5941 s / it)
Averaged stats: lr: 0.001621  min_lr: 0.001621  loss: 3.0274 (3.1768)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7186 (0.7714)
Test:  [ 0/50]  eta: 0:02:54  loss: 0.4562 (0.4562)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.4966  data: 3.1780  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7618 (0.7819)  acc1: 85.6000 (86.0727)  acc5: 98.0000 (97.4182)  time: 0.5735  data: 0.2893  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8110 (0.8224)  acc1: 84.4000 (84.9524)  acc5: 97.6000 (97.4095)  time: 0.2813  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9374 (0.9179)  acc1: 79.2000 (82.6323)  acc5: 95.2000 (96.2710)  time: 0.2814  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1496 (0.9801)  acc1: 77.2000 (80.8683)  acc5: 93.6000 (95.7951)  time: 0.2812  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1457 (0.9927)  acc1: 75.2000 (80.3280)  acc5: 94.8000 (95.7440)  time: 0.2809  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3481 s / it)
* Acc@1 80.576 Acc@5 95.748 loss 0.994
Accuracy of the model on the 50000 test images: 80.6%
Max accuracy: 80.58%
Epoch: [177]  [   0/2502]  eta: 1:40:31  lr: 0.001621  min_lr: 0.001621  loss: 3.6610 (3.6610)  weight_decay: 0.0500 (0.0500)  time: 2.4105  data: 1.8093  max mem: 53905
Epoch: [177]  [ 200/2502]  eta: 0:23:05  lr: 0.001620  min_lr: 0.001620  loss: 3.4564 (3.1745)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7687 (0.7799)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [177]  [ 400/2502]  eta: 0:20:57  lr: 0.001618  min_lr: 0.001618  loss: 3.2302 (3.1867)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7210 (0.7602)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [177]  [ 600/2502]  eta: 0:18:54  lr: 0.001616  min_lr: 0.001616  loss: 3.1987 (3.1892)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7451 (0.7519)  time: 0.5974  data: 0.0006  max mem: 53905
Epoch: [177]  [ 800/2502]  eta: 0:16:53  lr: 0.001614  min_lr: 0.001614  loss: 3.2034 (3.1671)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7846 (0.7676)  time: 0.5936  data: 0.0005  max mem: 53905
Epoch: [177]  [1000/2502]  eta: 0:14:54  lr: 0.001613  min_lr: 0.001613  loss: 3.0192 (3.1687)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7595 (0.7691)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [177]  [1200/2502]  eta: 0:12:54  lr: 0.001611  min_lr: 0.001611  loss: 3.1654 (3.1719)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6911 (0.7691)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [177]  [1400/2502]  eta: 0:10:55  lr: 0.001609  min_lr: 0.001609  loss: 3.2546 (3.1731)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7181 (0.7667)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [177]  [1600/2502]  eta: 0:08:56  lr: 0.001607  min_lr: 0.001607  loss: 3.2758 (3.1748)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7350 (0.7647)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [177]  [1800/2502]  eta: 0:06:57  lr: 0.001606  min_lr: 0.001606  loss: 3.3734 (3.1761)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7792 (0.7675)  time: 0.5995  data: 0.0004  max mem: 53905
Epoch: [177]  [2000/2502]  eta: 0:04:58  lr: 0.001604  min_lr: 0.001604  loss: 3.2901 (3.1705)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8114 (0.7686)  time: 0.5993  data: 0.0004  max mem: 53905
Epoch: [177]  [2200/2502]  eta: 0:02:59  lr: 0.001602  min_lr: 0.001602  loss: 3.3627 (3.1694)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7820 (0.7738)  time: 0.5976  data: 0.0007  max mem: 53905
Epoch: [177]  [2400/2502]  eta: 0:01:00  lr: 0.001600  min_lr: 0.001600  loss: 3.3136 (3.1689)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7852 (0.7762)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [177]  [2501/2502]  eta: 0:00:00  lr: 0.001599  min_lr: 0.001599  loss: 3.1112 (3.1703)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7677 (0.7765)  time: 0.5338  data: 0.0009  max mem: 53905
Epoch: [177] Total time: 0:24:45 (0.5939 s / it)
Averaged stats: lr: 0.001599  min_lr: 0.001599  loss: 3.1112 (3.1756)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7677 (0.7765)
Test:  [ 0/50]  eta: 0:03:09  loss: 0.5269 (0.5269)  acc1: 92.8000 (92.8000)  acc5: 99.2000 (99.2000)  time: 3.7808  data: 3.4826  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8379 (0.8191)  acc1: 84.0000 (85.8909)  acc5: 98.0000 (97.6000)  time: 0.6021  data: 0.3170  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8379 (0.8530)  acc1: 83.2000 (84.7619)  acc5: 98.0000 (97.5048)  time: 0.2843  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9857 (0.9400)  acc1: 80.8000 (82.9032)  acc5: 94.8000 (96.3613)  time: 0.2845  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1449 (1.0026)  acc1: 78.0000 (81.2098)  acc5: 94.0000 (95.7951)  time: 0.2843  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1449 (1.0163)  acc1: 76.4000 (80.6720)  acc5: 94.4000 (95.7840)  time: 0.2841  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3571 s / it)
* Acc@1 80.706 Acc@5 95.770 loss 1.015
Accuracy of the model on the 50000 test images: 80.7%
Max accuracy: 80.71%
Epoch: [178]  [   0/2502]  eta: 1:37:53  lr: 0.001599  min_lr: 0.001599  loss: 2.0958 (2.0958)  weight_decay: 0.0500 (0.0500)  time: 2.3474  data: 1.7439  max mem: 53905
Epoch: [178]  [ 200/2502]  eta: 0:23:07  lr: 0.001598  min_lr: 0.001598  loss: 3.3520 (3.1608)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8407 (0.7896)  time: 0.5989  data: 0.0004  max mem: 53905
Epoch: [178]  [ 400/2502]  eta: 0:20:55  lr: 0.001596  min_lr: 0.001596  loss: 3.3533 (3.1625)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7959 (0.7998)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [178]  [ 600/2502]  eta: 0:18:52  lr: 0.001594  min_lr: 0.001594  loss: 3.3601 (3.1554)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7802 (0.7868)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [178]  [ 800/2502]  eta: 0:16:51  lr: 0.001592  min_lr: 0.001592  loss: 2.9876 (3.1476)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7200 (0.7811)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [178]  [1000/2502]  eta: 0:14:52  lr: 0.001591  min_lr: 0.001591  loss: 3.1701 (3.1518)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8053 (0.7873)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [178]  [1200/2502]  eta: 0:12:53  lr: 0.001589  min_lr: 0.001589  loss: 3.0741 (3.1625)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7848 (0.7889)  time: 0.5961  data: 0.0004  max mem: 53905
Epoch: [178]  [1400/2502]  eta: 0:10:54  lr: 0.001587  min_lr: 0.001587  loss: 3.1701 (3.1551)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7647 (0.7921)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [178]  [1600/2502]  eta: 0:08:55  lr: 0.001585  min_lr: 0.001585  loss: 3.3840 (3.1646)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7396 (0.7885)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [178]  [1800/2502]  eta: 0:06:56  lr: 0.001584  min_lr: 0.001584  loss: 3.2801 (3.1640)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7409 (0.7862)  time: 0.5913  data: 0.0006  max mem: 53905
Epoch: [178]  [2000/2502]  eta: 0:04:57  lr: 0.001582  min_lr: 0.001582  loss: 3.1362 (3.1681)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7997 (0.7840)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [178]  [2200/2502]  eta: 0:02:59  lr: 0.001580  min_lr: 0.001580  loss: 3.1390 (3.1728)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7686 (0.7833)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [178]  [2400/2502]  eta: 0:01:00  lr: 0.001578  min_lr: 0.001578  loss: 3.3307 (3.1764)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7857 (0.7844)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [178]  [2501/2502]  eta: 0:00:00  lr: 0.001578  min_lr: 0.001578  loss: 3.1738 (3.1735)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8478 (0.7864)  time: 0.5329  data: 0.0011  max mem: 53905
Epoch: [178] Total time: 0:24:43 (0.5927 s / it)
Averaged stats: lr: 0.001578  min_lr: 0.001578  loss: 3.1738 (3.1663)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8478 (0.7864)
Test:  [ 0/50]  eta: 0:02:36  loss: 0.4813 (0.4813)  acc1: 92.8000 (92.8000)  acc5: 99.2000 (99.2000)  time: 3.1375  data: 2.8155  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8020 (0.7957)  acc1: 86.8000 (85.8909)  acc5: 97.6000 (97.4909)  time: 0.5867  data: 0.3002  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8314 (0.8345)  acc1: 84.8000 (84.9524)  acc5: 97.2000 (97.4095)  time: 0.3083  data: 0.0246  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9783 (0.9271)  acc1: 80.4000 (83.0581)  acc5: 95.6000 (96.3742)  time: 0.2906  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1692 (0.9874)  acc1: 76.8000 (81.2195)  acc5: 93.6000 (95.8146)  time: 0.2902  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1474 (1.0041)  acc1: 75.6000 (80.6160)  acc5: 94.4000 (95.6560)  time: 0.2842  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3554 s / it)
* Acc@1 80.788 Acc@5 95.730 loss 1.003
Accuracy of the model on the 50000 test images: 80.8%
Max accuracy: 80.79%
Epoch: [179]  [   0/2502]  eta: 1:43:09  lr: 0.001577  min_lr: 0.001577  loss: 2.8548 (2.8548)  weight_decay: 0.0500 (0.0500)  time: 2.4738  data: 1.8779  max mem: 53905
Epoch: [179]  [ 200/2502]  eta: 0:23:03  lr: 0.001576  min_lr: 0.001576  loss: 3.3815 (3.1007)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7249 (0.7426)  time: 0.5901  data: 0.0004  max mem: 53905
Epoch: [179]  [ 400/2502]  eta: 0:20:55  lr: 0.001574  min_lr: 0.001574  loss: 3.2264 (3.1228)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8038 (0.7743)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [179]  [ 600/2502]  eta: 0:18:53  lr: 0.001572  min_lr: 0.001572  loss: 2.8895 (3.1082)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8922 (0.7884)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [179]  [ 800/2502]  eta: 0:16:52  lr: 0.001570  min_lr: 0.001570  loss: 3.2743 (3.1357)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7159 (0.7775)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [179]  [1000/2502]  eta: 0:14:52  lr: 0.001569  min_lr: 0.001569  loss: 3.1662 (3.1363)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7838 (0.7754)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [179]  [1200/2502]  eta: 0:12:53  lr: 0.001567  min_lr: 0.001567  loss: 3.2109 (3.1371)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8119 (0.7820)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [179]  [1400/2502]  eta: 0:10:54  lr: 0.001565  min_lr: 0.001565  loss: 3.4306 (3.1459)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7378 (0.7789)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [179]  [1600/2502]  eta: 0:08:55  lr: 0.001563  min_lr: 0.001563  loss: 3.2861 (3.1484)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7394 (0.7789)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [179]  [1800/2502]  eta: 0:06:56  lr: 0.001562  min_lr: 0.001562  loss: 3.3262 (3.1546)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7843 (0.7815)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [179]  [2000/2502]  eta: 0:04:57  lr: 0.001560  min_lr: 0.001560  loss: 3.3523 (3.1582)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8004 (0.7844)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [179]  [2200/2502]  eta: 0:02:59  lr: 0.001558  min_lr: 0.001558  loss: 3.4322 (3.1598)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7582 (0.7857)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [179]  [2400/2502]  eta: 0:01:00  lr: 0.001556  min_lr: 0.001556  loss: 3.3122 (3.1631)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7599 (0.7846)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [179]  [2501/2502]  eta: 0:00:00  lr: 0.001556  min_lr: 0.001556  loss: 3.2501 (3.1620)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7204 (0.7834)  time: 0.5373  data: 0.0006  max mem: 53905
Epoch: [179] Total time: 0:24:42 (0.5927 s / it)
Averaged stats: lr: 0.001556  min_lr: 0.001556  loss: 3.2501 (3.1588)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7204 (0.7834)
Test:  [ 0/50]  eta: 0:02:49  loss: 0.5191 (0.5191)  acc1: 94.0000 (94.0000)  acc5: 99.6000 (99.6000)  time: 3.3935  data: 3.0636  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8057 (0.8480)  acc1: 88.0000 (86.2182)  acc5: 98.4000 (97.8545)  time: 0.5674  data: 0.2789  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8746 (0.8860)  acc1: 82.8000 (84.9524)  acc5: 97.6000 (97.6381)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0337 (0.9708)  acc1: 79.2000 (82.7742)  acc5: 95.6000 (96.5548)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1954 (1.0269)  acc1: 76.8000 (81.3073)  acc5: 94.0000 (95.9610)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1474 (1.0402)  acc1: 76.0000 (80.7440)  acc5: 94.8000 (95.9200)  time: 0.2845  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3493 s / it)
* Acc@1 80.914 Acc@5 95.852 loss 1.043
Accuracy of the model on the 50000 test images: 80.9%
Max accuracy: 80.91%
Epoch: [180]  [   0/2502]  eta: 1:32:14  lr: 0.001556  min_lr: 0.001556  loss: 3.6655 (3.6655)  weight_decay: 0.0500 (0.0500)  time: 2.2119  data: 1.6008  max mem: 53905
Epoch: [180]  [ 200/2502]  eta: 0:23:01  lr: 0.001554  min_lr: 0.001554  loss: 3.1361 (3.1617)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7750 (0.7734)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [180]  [ 400/2502]  eta: 0:20:54  lr: 0.001552  min_lr: 0.001552  loss: 3.3683 (3.1841)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7808 (0.8045)  time: 0.5919  data: 0.0007  max mem: 53905
Epoch: [180]  [ 600/2502]  eta: 0:18:53  lr: 0.001550  min_lr: 0.001550  loss: 3.3273 (3.1617)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7891 (0.8026)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [180]  [ 800/2502]  eta: 0:16:52  lr: 0.001549  min_lr: 0.001549  loss: 3.0699 (3.1553)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7546 (0.7931)  time: 0.5922  data: 0.0007  max mem: 53905
Epoch: [180]  [1000/2502]  eta: 0:14:52  lr: 0.001547  min_lr: 0.001547  loss: 3.3668 (3.1603)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7227 (0.7894)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [180]  [1200/2502]  eta: 0:12:53  lr: 0.001545  min_lr: 0.001545  loss: 3.0255 (3.1575)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7471 (0.7895)  time: 0.5937  data: 0.0007  max mem: 53905
Epoch: [180]  [1400/2502]  eta: 0:10:54  lr: 0.001543  min_lr: 0.001543  loss: 3.4093 (3.1585)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7497 (0.7862)  time: 0.5978  data: 0.0007  max mem: 53905
Epoch: [180]  [1600/2502]  eta: 0:08:55  lr: 0.001542  min_lr: 0.001542  loss: 3.1003 (3.1576)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7797 (0.7856)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [180]  [1800/2502]  eta: 0:06:56  lr: 0.001540  min_lr: 0.001540  loss: 3.1456 (3.1560)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8011 (0.7867)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [180]  [2000/2502]  eta: 0:04:58  lr: 0.001538  min_lr: 0.001538  loss: 3.2496 (3.1548)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7652 (0.7859)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [180]  [2200/2502]  eta: 0:02:59  lr: 0.001536  min_lr: 0.001536  loss: 3.2028 (3.1548)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7981 (0.7895)  time: 0.5958  data: 0.0006  max mem: 53905
Epoch: [180]  [2400/2502]  eta: 0:01:00  lr: 0.001535  min_lr: 0.001535  loss: 3.2880 (3.1588)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7761 (0.7915)  time: 0.5923  data: 0.0007  max mem: 53905
Epoch: [180]  [2501/2502]  eta: 0:00:00  lr: 0.001534  min_lr: 0.001534  loss: 3.1461 (3.1567)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7769 (0.7919)  time: 0.5335  data: 0.0008  max mem: 53905
Epoch: [180] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.001534  min_lr: 0.001534  loss: 3.1461 (3.1646)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7769 (0.7919)
Test:  [ 0/50]  eta: 0:03:04  loss: 0.5927 (0.5927)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 3.6990  data: 3.3742  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8628 (0.8631)  acc1: 88.0000 (86.2909)  acc5: 98.0000 (97.7455)  time: 0.5954  data: 0.3071  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8942 (0.8896)  acc1: 84.4000 (85.3143)  acc5: 97.6000 (97.6762)  time: 0.2852  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0051 (0.9776)  acc1: 80.4000 (83.3161)  acc5: 96.8000 (96.6452)  time: 0.2853  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2460 (1.0476)  acc1: 76.4000 (81.4439)  acc5: 94.0000 (95.9415)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2460 (1.0594)  acc1: 74.8000 (80.9280)  acc5: 94.4000 (95.8400)  time: 0.2848  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3558 s / it)
* Acc@1 80.850 Acc@5 95.818 loss 1.065
Accuracy of the model on the 50000 test images: 80.9%
Max accuracy: 80.91%
Epoch: [181]  [   0/2502]  eta: 1:52:42  lr: 0.001534  min_lr: 0.001534  loss: 3.6287 (3.6287)  weight_decay: 0.0500 (0.0500)  time: 2.7029  data: 2.0755  max mem: 53905
Epoch: [181]  [ 200/2502]  eta: 0:23:12  lr: 0.001532  min_lr: 0.001532  loss: 3.2718 (3.1339)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7255 (0.7986)  time: 0.5927  data: 0.0007  max mem: 53905
Epoch: [181]  [ 400/2502]  eta: 0:21:00  lr: 0.001530  min_lr: 0.001530  loss: 3.1218 (3.1342)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7620 (0.8089)  time: 0.5943  data: 0.0009  max mem: 53905
Epoch: [181]  [ 600/2502]  eta: 0:18:56  lr: 0.001528  min_lr: 0.001528  loss: 3.1598 (3.1458)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7900 (0.8148)  time: 0.6019  data: 0.0006  max mem: 53905
Epoch: [181]  [ 800/2502]  eta: 0:16:55  lr: 0.001527  min_lr: 0.001527  loss: 3.2695 (3.1585)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7850 (0.8134)  time: 0.5993  data: 0.0006  max mem: 53905
Epoch: [181]  [1000/2502]  eta: 0:14:55  lr: 0.001525  min_lr: 0.001525  loss: 3.3659 (3.1570)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7606 (0.8047)  time: 0.5942  data: 0.0005  max mem: 53905
Epoch: [181]  [1200/2502]  eta: 0:12:55  lr: 0.001523  min_lr: 0.001523  loss: 3.3965 (3.1566)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8172 (0.8111)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [181]  [1400/2502]  eta: 0:10:55  lr: 0.001522  min_lr: 0.001522  loss: 3.2586 (3.1512)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8108 (0.8154)  time: 0.5907  data: 0.0008  max mem: 53905
Epoch: [181]  [1600/2502]  eta: 0:08:56  lr: 0.001520  min_lr: 0.001520  loss: 3.1774 (3.1504)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7811 (0.8118)  time: 0.5905  data: 0.0007  max mem: 53905
Epoch: [181]  [1800/2502]  eta: 0:06:57  lr: 0.001518  min_lr: 0.001518  loss: 3.2526 (3.1524)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7954 (0.8126)  time: 0.5911  data: 0.0007  max mem: 53905
Epoch: [181]  [2000/2502]  eta: 0:04:58  lr: 0.001516  min_lr: 0.001516  loss: 3.1794 (3.1519)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8113 (0.8124)  time: 0.5915  data: 0.0006  max mem: 53905
Epoch: [181]  [2200/2502]  eta: 0:02:59  lr: 0.001515  min_lr: 0.001515  loss: 3.2721 (3.1476)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7754 (0.8115)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [181]  [2400/2502]  eta: 0:01:00  lr: 0.001513  min_lr: 0.001513  loss: 3.0282 (3.1503)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7648 (0.8105)  time: 0.5919  data: 0.0007  max mem: 53905
Epoch: [181]  [2501/2502]  eta: 0:00:00  lr: 0.001512  min_lr: 0.001512  loss: 3.0687 (3.1463)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7718 (0.8097)  time: 0.5329  data: 0.0009  max mem: 53905
Epoch: [181] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.001512  min_lr: 0.001512  loss: 3.0687 (3.1554)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7718 (0.8097)
Test:  [ 0/50]  eta: 0:03:13  loss: 0.5552 (0.5552)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 3.8738  data: 3.5670  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.7866 (0.8293)  acc1: 86.8000 (86.0000)  acc5: 98.0000 (97.7091)  time: 0.6080  data: 0.3247  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8728 (0.8646)  acc1: 84.8000 (85.2381)  acc5: 97.2000 (97.4667)  time: 0.2815  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9858 (0.9455)  acc1: 80.8000 (83.2387)  acc5: 96.0000 (96.4516)  time: 0.2816  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1392 (1.0027)  acc1: 76.8000 (81.4439)  acc5: 94.4000 (95.8927)  time: 0.2814  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1392 (1.0230)  acc1: 76.4000 (80.8400)  acc5: 93.6000 (95.7440)  time: 0.2812  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3556 s / it)
* Acc@1 80.796 Acc@5 95.790 loss 1.030
Accuracy of the model on the 50000 test images: 80.8%
Max accuracy: 80.91%
Epoch: [182]  [   0/2502]  eta: 1:50:40  lr: 0.001512  min_lr: 0.001512  loss: 2.6946 (2.6946)  weight_decay: 0.0500 (0.0500)  time: 2.6542  data: 1.9621  max mem: 53905
Epoch: [182]  [ 200/2502]  eta: 0:23:05  lr: 0.001510  min_lr: 0.001510  loss: 3.0612 (3.1025)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8273 (0.8186)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [182]  [ 400/2502]  eta: 0:20:55  lr: 0.001508  min_lr: 0.001508  loss: 3.1399 (3.1028)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7383 (0.8074)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [182]  [ 600/2502]  eta: 0:18:53  lr: 0.001507  min_lr: 0.001507  loss: 3.4248 (3.1059)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7635 (0.7942)  time: 0.5917  data: 0.0007  max mem: 53905
Epoch: [182]  [ 800/2502]  eta: 0:16:52  lr: 0.001505  min_lr: 0.001505  loss: 3.1509 (3.0935)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8133 (0.7999)  time: 0.5904  data: 0.0005  max mem: 53905
Epoch: [182]  [1000/2502]  eta: 0:14:52  lr: 0.001503  min_lr: 0.001503  loss: 3.3161 (3.1033)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7847 (0.8065)  time: 0.5907  data: 0.0009  max mem: 53905
Epoch: [182]  [1200/2502]  eta: 0:12:53  lr: 0.001502  min_lr: 0.001502  loss: 3.1426 (3.1157)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7692 (0.8092)  time: 0.5911  data: 0.0007  max mem: 53905
Epoch: [182]  [1400/2502]  eta: 0:10:54  lr: 0.001500  min_lr: 0.001500  loss: 3.1984 (3.1262)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8297 (0.8049)  time: 0.5984  data: 0.0006  max mem: 53905
Epoch: [182]  [1600/2502]  eta: 0:08:55  lr: 0.001498  min_lr: 0.001498  loss: 3.4406 (3.1279)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7582 (0.8058)  time: 0.5928  data: 0.0006  max mem: 53905
Epoch: [182]  [1800/2502]  eta: 0:06:56  lr: 0.001496  min_lr: 0.001496  loss: 3.1274 (3.1288)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7951 (0.8097)  time: 0.5915  data: 0.0006  max mem: 53905
Epoch: [182]  [2000/2502]  eta: 0:04:57  lr: 0.001495  min_lr: 0.001495  loss: 3.1932 (3.1323)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8634 (0.8103)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [182]  [2200/2502]  eta: 0:02:59  lr: 0.001493  min_lr: 0.001493  loss: 3.2361 (3.1319)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8494 (0.8122)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [182]  [2400/2502]  eta: 0:01:00  lr: 0.001491  min_lr: 0.001491  loss: 3.2717 (3.1355)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7951 (0.8130)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [182]  [2501/2502]  eta: 0:00:00  lr: 0.001490  min_lr: 0.001490  loss: 3.1228 (3.1328)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7899 (0.8127)  time: 0.5391  data: 0.0006  max mem: 53905
Epoch: [182] Total time: 0:24:43 (0.5928 s / it)
Averaged stats: lr: 0.001490  min_lr: 0.001490  loss: 3.1228 (3.1465)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7899 (0.8127)
Test:  [ 0/50]  eta: 0:03:04  loss: 0.5408 (0.5408)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 3.6963  data: 3.3799  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7884 (0.8079)  acc1: 88.4000 (86.5455)  acc5: 98.0000 (97.6727)  time: 0.5949  data: 0.3076  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8446 (0.8383)  acc1: 83.2000 (85.1619)  acc5: 97.6000 (97.5429)  time: 0.2851  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9510 (0.9197)  acc1: 81.6000 (83.3161)  acc5: 96.0000 (96.5419)  time: 0.2853  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1205 (0.9751)  acc1: 77.2000 (81.7366)  acc5: 94.4000 (95.9902)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1018 (0.9910)  acc1: 75.6000 (81.1280)  acc5: 94.4000 (95.7840)  time: 0.2846  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3555 s / it)
* Acc@1 81.166 Acc@5 95.890 loss 0.990
Accuracy of the model on the 50000 test images: 81.2%
Max accuracy: 81.17%
Epoch: [183]  [   0/2502]  eta: 1:52:37  lr: 0.001490  min_lr: 0.001490  loss: 3.5220 (3.5220)  weight_decay: 0.0500 (0.0500)  time: 2.7007  data: 2.0953  max mem: 53905
Epoch: [183]  [ 200/2502]  eta: 0:23:07  lr: 0.001488  min_lr: 0.001488  loss: 3.1715 (3.1459)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7680 (0.8092)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [183]  [ 400/2502]  eta: 0:20:59  lr: 0.001487  min_lr: 0.001487  loss: 3.0062 (3.1379)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8000 (0.8054)  time: 0.6008  data: 0.0004  max mem: 53905
Epoch: [183]  [ 600/2502]  eta: 0:18:55  lr: 0.001485  min_lr: 0.001485  loss: 3.3849 (3.1526)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7974 (0.8053)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [183]  [ 800/2502]  eta: 0:16:53  lr: 0.001483  min_lr: 0.001483  loss: 3.0142 (3.1467)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7456 (0.8070)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [183]  [1000/2502]  eta: 0:14:53  lr: 0.001482  min_lr: 0.001482  loss: 3.3553 (3.1551)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7871 (0.8067)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [183]  [1200/2502]  eta: 0:12:54  lr: 0.001480  min_lr: 0.001480  loss: 3.1758 (3.1544)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7636 (0.8097)  time: 0.5902  data: 0.0004  max mem: 53905
Epoch: [183]  [1400/2502]  eta: 0:10:54  lr: 0.001478  min_lr: 0.001478  loss: 2.9501 (3.1550)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7603 (0.8067)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [183]  [1600/2502]  eta: 0:08:55  lr: 0.001476  min_lr: 0.001476  loss: 3.2959 (3.1555)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7387 (0.8052)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [183]  [1800/2502]  eta: 0:06:56  lr: 0.001475  min_lr: 0.001475  loss: 3.2455 (3.1548)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7438 (0.8033)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [183]  [2000/2502]  eta: 0:04:57  lr: 0.001473  min_lr: 0.001473  loss: 3.2880 (3.1484)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7529 (0.8007)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [183]  [2200/2502]  eta: 0:02:59  lr: 0.001471  min_lr: 0.001471  loss: 3.2022 (3.1511)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8323 (0.8022)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [183]  [2400/2502]  eta: 0:01:00  lr: 0.001469  min_lr: 0.001469  loss: 3.0956 (3.1471)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8268 (0.8043)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [183]  [2501/2502]  eta: 0:00:00  lr: 0.001469  min_lr: 0.001469  loss: 3.1701 (3.1471)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7867 (0.8033)  time: 0.5326  data: 0.0007  max mem: 53905
Epoch: [183] Total time: 0:24:43 (0.5930 s / it)
Averaged stats: lr: 0.001469  min_lr: 0.001469  loss: 3.1701 (3.1461)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7867 (0.8033)
Test:  [ 0/50]  eta: 0:03:09  loss: 0.6060 (0.6060)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 3.7819  data: 3.4568  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8468 (0.8620)  acc1: 88.0000 (86.0000)  acc5: 98.0000 (97.4545)  time: 0.6010  data: 0.3147  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8623 (0.8879)  acc1: 83.2000 (84.7619)  acc5: 97.2000 (97.3905)  time: 0.2831  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0354 (0.9794)  acc1: 80.4000 (82.5806)  acc5: 95.6000 (96.4516)  time: 0.2833  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1854 (1.0297)  acc1: 76.0000 (81.1415)  acc5: 94.0000 (95.9415)  time: 0.2830  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1854 (1.0446)  acc1: 75.6000 (80.6400)  acc5: 94.4000 (95.7840)  time: 0.2827  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3558 s / it)
* Acc@1 80.768 Acc@5 95.812 loss 1.044
Accuracy of the model on the 50000 test images: 80.8%
Max accuracy: 81.17%
Epoch: [184]  [   0/2502]  eta: 1:36:56  lr: 0.001469  min_lr: 0.001469  loss: 3.5357 (3.5357)  weight_decay: 0.0500 (0.0500)  time: 2.3246  data: 1.6870  max mem: 53905
Epoch: [184]  [ 200/2502]  eta: 0:23:03  lr: 0.001467  min_lr: 0.001467  loss: 3.4028 (3.1081)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8284 (0.8230)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [184]  [ 400/2502]  eta: 0:20:54  lr: 0.001465  min_lr: 0.001465  loss: 3.3811 (3.1185)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7386 (0.8110)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [184]  [ 600/2502]  eta: 0:18:53  lr: 0.001463  min_lr: 0.001463  loss: 3.2671 (3.1206)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7825 (0.8072)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [184]  [ 800/2502]  eta: 0:16:52  lr: 0.001462  min_lr: 0.001462  loss: 3.3279 (3.1395)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7416 (0.8025)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [184]  [1000/2502]  eta: 0:14:52  lr: 0.001460  min_lr: 0.001460  loss: 3.0668 (3.1425)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8309 (0.8065)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [184]  [1200/2502]  eta: 0:12:54  lr: 0.001458  min_lr: 0.001458  loss: 3.0769 (3.1489)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7548 (0.8068)  time: 0.5946  data: 0.0006  max mem: 53905
Epoch: [184]  [1400/2502]  eta: 0:10:54  lr: 0.001456  min_lr: 0.001456  loss: 3.1126 (3.1511)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7873 (0.8058)  time: 0.5926  data: 0.0006  max mem: 53905
Epoch: [184]  [1600/2502]  eta: 0:08:55  lr: 0.001455  min_lr: 0.001455  loss: 3.2955 (3.1520)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8288 (0.8095)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [184]  [1800/2502]  eta: 0:06:56  lr: 0.001453  min_lr: 0.001453  loss: 3.3389 (3.1440)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7550 (inf)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [184]  [2000/2502]  eta: 0:04:57  lr: 0.001451  min_lr: 0.001451  loss: 3.2799 (3.1457)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7788 (inf)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [184]  [2200/2502]  eta: 0:02:59  lr: 0.001450  min_lr: 0.001450  loss: 3.4741 (3.1453)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7503 (inf)  time: 0.5952  data: 0.0006  max mem: 53905
Epoch: [184]  [2400/2502]  eta: 0:01:00  lr: 0.001448  min_lr: 0.001448  loss: 3.1379 (3.1518)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8380 (inf)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [184]  [2501/2502]  eta: 0:00:00  lr: 0.001447  min_lr: 0.001447  loss: 3.2824 (3.1544)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9033 (inf)  time: 0.5323  data: 0.0006  max mem: 53905
Epoch: [184] Total time: 0:24:43 (0.5930 s / it)
Averaged stats: lr: 0.001447  min_lr: 0.001447  loss: 3.2824 (3.1402)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9033 (inf)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.6138 (0.6138)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.0568  data: 2.7399  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.9593 (0.9255)  acc1: 86.8000 (86.0364)  acc5: 98.0000 (97.6364)  time: 0.5486  data: 0.2644  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9593 (0.9471)  acc1: 84.4000 (85.1429)  acc5: 97.6000 (97.6952)  time: 0.2896  data: 0.0087  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0235 (1.0374)  acc1: 81.2000 (83.1226)  acc5: 96.4000 (96.5807)  time: 0.2814  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2476 (1.0945)  acc1: 78.0000 (81.5902)  acc5: 93.6000 (95.9902)  time: 0.2813  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2410 (1.1095)  acc1: 76.0000 (81.0000)  acc5: 94.4000 (95.8080)  time: 0.2811  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3419 s / it)
* Acc@1 81.056 Acc@5 95.862 loss 1.111
Accuracy of the model on the 50000 test images: 81.1%
Max accuracy: 81.17%
Epoch: [185]  [   0/2502]  eta: 2:00:35  lr: 0.001447  min_lr: 0.001447  loss: 3.5299 (3.5299)  weight_decay: 0.0500 (0.0500)  time: 2.8917  data: 1.8676  max mem: 53905
Epoch: [185]  [ 200/2502]  eta: 0:23:08  lr: 0.001445  min_lr: 0.001445  loss: 2.9887 (3.1550)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7622 (0.8006)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [185]  [ 400/2502]  eta: 0:20:56  lr: 0.001444  min_lr: 0.001444  loss: 3.2296 (3.1461)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8139 (0.7998)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [185]  [ 600/2502]  eta: 0:18:53  lr: 0.001442  min_lr: 0.001442  loss: 3.1260 (3.1293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8281 (0.8064)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [185]  [ 800/2502]  eta: 0:16:52  lr: 0.001440  min_lr: 0.001440  loss: 3.1245 (3.1370)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7607 (0.8022)  time: 0.5999  data: 0.0004  max mem: 53905
Epoch: [185]  [1000/2502]  eta: 0:14:52  lr: 0.001438  min_lr: 0.001438  loss: 3.2739 (3.1333)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7721 (0.8050)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [185]  [1200/2502]  eta: 0:12:53  lr: 0.001437  min_lr: 0.001437  loss: 3.2730 (3.1289)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7848 (0.8077)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [185]  [1400/2502]  eta: 0:10:54  lr: 0.001435  min_lr: 0.001435  loss: 2.9963 (3.1284)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8054 (0.8095)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [185]  [1600/2502]  eta: 0:08:55  lr: 0.001433  min_lr: 0.001433  loss: 3.2849 (3.1330)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7407 (0.8061)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [185]  [1800/2502]  eta: 0:06:56  lr: 0.001431  min_lr: 0.001431  loss: 3.3278 (3.1351)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7136 (0.8043)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [185]  [2000/2502]  eta: 0:04:57  lr: 0.001430  min_lr: 0.001430  loss: 3.1905 (3.1354)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8333 (0.8081)  time: 0.5954  data: 0.0005  max mem: 53905
Epoch: [185]  [2200/2502]  eta: 0:02:59  lr: 0.001428  min_lr: 0.001428  loss: 3.2445 (3.1349)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7596 (0.8042)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [185]  [2400/2502]  eta: 0:01:00  lr: 0.001426  min_lr: 0.001426  loss: 3.1590 (3.1330)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7990 (0.8059)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [185]  [2501/2502]  eta: 0:00:00  lr: 0.001425  min_lr: 0.001425  loss: 2.9124 (3.1317)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7624 (0.8054)  time: 0.5317  data: 0.0007  max mem: 53905
Epoch: [185] Total time: 0:24:43 (0.5928 s / it)
Averaged stats: lr: 0.001425  min_lr: 0.001425  loss: 2.9124 (3.1375)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7624 (0.8054)
Test:  [ 0/50]  eta: 0:03:10  loss: 0.5419 (0.5419)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.8100  data: 3.4903  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8584 (0.8092)  acc1: 85.2000 (86.1091)  acc5: 98.0000 (97.5636)  time: 0.6020  data: 0.3177  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8486 (0.8292)  acc1: 84.4000 (85.1619)  acc5: 97.6000 (97.4667)  time: 0.2812  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9384 (0.9208)  acc1: 80.8000 (83.0710)  acc5: 95.6000 (96.3742)  time: 0.2813  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1647 (0.9762)  acc1: 77.2000 (81.5610)  acc5: 94.0000 (95.8634)  time: 0.2811  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1555 (0.9919)  acc1: 76.4000 (81.0000)  acc5: 94.4000 (95.8000)  time: 0.2809  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3540 s / it)
* Acc@1 80.980 Acc@5 95.856 loss 0.991
Accuracy of the model on the 50000 test images: 81.0%
Max accuracy: 81.17%
Epoch: [186]  [   0/2502]  eta: 2:11:22  lr: 0.001425  min_lr: 0.001425  loss: 2.7905 (2.7905)  weight_decay: 0.0500 (0.0500)  time: 3.1506  data: 1.7740  max mem: 53905
Epoch: [186]  [ 200/2502]  eta: 0:23:09  lr: 0.001424  min_lr: 0.001424  loss: 3.1809 (3.0802)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7596 (0.8045)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [186]  [ 400/2502]  eta: 0:20:56  lr: 0.001422  min_lr: 0.001422  loss: 3.2541 (3.1122)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8026 (0.8213)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [186]  [ 600/2502]  eta: 0:18:54  lr: 0.001420  min_lr: 0.001420  loss: 3.1892 (3.1276)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8171 (0.8298)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [186]  [ 800/2502]  eta: 0:16:53  lr: 0.001419  min_lr: 0.001419  loss: 3.3274 (3.1208)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8257 (0.8303)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [186]  [1000/2502]  eta: 0:14:52  lr: 0.001417  min_lr: 0.001417  loss: 3.3248 (3.1194)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9124 (0.8449)  time: 0.5903  data: 0.0005  max mem: 53905
Epoch: [186]  [1200/2502]  eta: 0:12:53  lr: 0.001415  min_lr: 0.001415  loss: 3.2006 (3.1229)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7817 (0.8368)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [186]  [1400/2502]  eta: 0:10:54  lr: 0.001413  min_lr: 0.001413  loss: 3.2749 (3.1216)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8256 (0.8380)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [186]  [1600/2502]  eta: 0:08:55  lr: 0.001412  min_lr: 0.001412  loss: 3.2307 (3.1247)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7686 (0.8324)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [186]  [1800/2502]  eta: 0:06:56  lr: 0.001410  min_lr: 0.001410  loss: 3.2601 (3.1303)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8674 (0.8336)  time: 0.5902  data: 0.0004  max mem: 53905
Epoch: [186]  [2000/2502]  eta: 0:04:57  lr: 0.001408  min_lr: 0.001408  loss: 3.0983 (3.1316)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8060 (0.8320)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [186]  [2200/2502]  eta: 0:02:59  lr: 0.001407  min_lr: 0.001407  loss: 3.3581 (3.1375)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7799 (0.8326)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [186]  [2400/2502]  eta: 0:01:00  lr: 0.001405  min_lr: 0.001405  loss: 3.1540 (3.1328)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9176 (0.8326)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [186]  [2501/2502]  eta: 0:00:00  lr: 0.001404  min_lr: 0.001404  loss: 3.0939 (3.1348)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7999 (0.8318)  time: 0.5317  data: 0.0009  max mem: 53905
Epoch: [186] Total time: 0:24:43 (0.5928 s / it)
Averaged stats: lr: 0.001404  min_lr: 0.001404  loss: 3.0939 (3.1284)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7999 (0.8318)
Test:  [ 0/50]  eta: 0:03:18  loss: 0.5216 (0.5216)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.9614  data: 3.6549  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8308 (0.8367)  acc1: 87.6000 (86.2546)  acc5: 98.0000 (97.6364)  time: 0.6158  data: 0.3327  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8508 (0.8380)  acc1: 82.8000 (85.2762)  acc5: 98.0000 (97.8286)  time: 0.2814  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9462 (0.9336)  acc1: 82.0000 (83.0452)  acc5: 96.8000 (96.6323)  time: 0.2816  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1306 (0.9891)  acc1: 77.6000 (81.6488)  acc5: 94.0000 (96.0488)  time: 0.2813  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1306 (1.0145)  acc1: 77.2000 (80.9920)  acc5: 94.8000 (95.8880)  time: 0.2810  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3581 s / it)
* Acc@1 80.994 Acc@5 95.888 loss 1.019
Accuracy of the model on the 50000 test images: 81.0%
Max accuracy: 81.17%
Epoch: [187]  [   0/2502]  eta: 1:53:09  lr: 0.001404  min_lr: 0.001404  loss: 2.9783 (2.9783)  weight_decay: 0.0500 (0.0500)  time: 2.7135  data: 2.1032  max mem: 53905
Epoch: [187]  [ 200/2502]  eta: 0:23:07  lr: 0.001402  min_lr: 0.001402  loss: 3.3117 (3.0569)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8436 (0.8254)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [187]  [ 400/2502]  eta: 0:20:58  lr: 0.001401  min_lr: 0.001401  loss: 3.2119 (3.1335)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7974 (0.8347)  time: 0.5939  data: 0.0006  max mem: 53905
Epoch: [187]  [ 600/2502]  eta: 0:18:54  lr: 0.001399  min_lr: 0.001399  loss: 3.0227 (3.1300)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8088 (0.8296)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [187]  [ 800/2502]  eta: 0:16:54  lr: 0.001397  min_lr: 0.001397  loss: 3.2587 (3.1323)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8602 (0.8320)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [187]  [1000/2502]  eta: 0:14:53  lr: 0.001395  min_lr: 0.001395  loss: 2.8018 (3.1271)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7836 (0.8249)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [187]  [1200/2502]  eta: 0:12:53  lr: 0.001394  min_lr: 0.001394  loss: 3.1729 (3.1239)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8314 (0.8268)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [187]  [1400/2502]  eta: 0:10:54  lr: 0.001392  min_lr: 0.001392  loss: 3.3048 (3.1235)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8121 (0.8265)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [187]  [1600/2502]  eta: 0:08:55  lr: 0.001390  min_lr: 0.001390  loss: 3.3571 (3.1190)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7997 (0.8255)  time: 0.5979  data: 0.0005  max mem: 53905
Epoch: [187]  [1800/2502]  eta: 0:06:57  lr: 0.001389  min_lr: 0.001389  loss: 3.1971 (3.1192)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7485 (0.8256)  time: 0.5952  data: 0.0007  max mem: 53905
Epoch: [187]  [2000/2502]  eta: 0:04:58  lr: 0.001387  min_lr: 0.001387  loss: 3.2177 (3.1218)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8209 (0.8281)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [187]  [2200/2502]  eta: 0:02:59  lr: 0.001385  min_lr: 0.001385  loss: 3.2926 (3.1200)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7839 (0.8311)  time: 0.5902  data: 0.0005  max mem: 53905
Epoch: [187]  [2400/2502]  eta: 0:01:00  lr: 0.001383  min_lr: 0.001383  loss: 3.3701 (3.1170)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7491 (0.8281)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [187]  [2501/2502]  eta: 0:00:00  lr: 0.001383  min_lr: 0.001383  loss: 3.1448 (3.1166)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8608 (0.8301)  time: 0.5320  data: 0.0008  max mem: 53905
Epoch: [187] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.001383  min_lr: 0.001383  loss: 3.1448 (3.1192)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8608 (0.8301)
Test:  [ 0/50]  eta: 0:03:15  loss: 0.5066 (0.5066)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.9157  data: 3.6148  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.7699 (0.8153)  acc1: 86.8000 (85.8182)  acc5: 97.6000 (97.6000)  time: 0.6132  data: 0.3290  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8272 (0.8290)  acc1: 83.6000 (85.4667)  acc5: 97.6000 (97.6381)  time: 0.2833  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9627 (0.9327)  acc1: 82.4000 (83.3290)  acc5: 96.8000 (96.6194)  time: 0.2835  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1238 (0.9838)  acc1: 78.0000 (81.9220)  acc5: 93.6000 (96.0488)  time: 0.2832  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1238 (1.0022)  acc1: 78.0000 (81.2320)  acc5: 94.4000 (95.9360)  time: 0.2829  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3588 s / it)
* Acc@1 81.292 Acc@5 95.890 loss 1.002
Accuracy of the model on the 50000 test images: 81.3%
Max accuracy: 81.29%
Epoch: [188]  [   0/2502]  eta: 1:41:01  lr: 0.001383  min_lr: 0.001383  loss: 3.1225 (3.1225)  weight_decay: 0.0500 (0.0500)  time: 2.4225  data: 1.8304  max mem: 53905
Epoch: [188]  [ 200/2502]  eta: 0:23:03  lr: 0.001381  min_lr: 0.001381  loss: 3.1397 (3.1032)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7963 (0.8165)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [188]  [ 400/2502]  eta: 0:20:54  lr: 0.001379  min_lr: 0.001379  loss: 3.2929 (3.0952)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8123 (0.8290)  time: 0.6033  data: 0.0005  max mem: 53905
Epoch: [188]  [ 600/2502]  eta: 0:18:51  lr: 0.001378  min_lr: 0.001378  loss: 3.0816 (3.0871)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8550 (0.8440)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [188]  [ 800/2502]  eta: 0:16:51  lr: 0.001376  min_lr: 0.001376  loss: 3.3950 (3.1007)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7847 (0.8348)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [188]  [1000/2502]  eta: 0:14:52  lr: 0.001374  min_lr: 0.001374  loss: 3.1436 (3.0954)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8458 (nan)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [188]  [1200/2502]  eta: 0:12:53  lr: 0.001372  min_lr: 0.001372  loss: 3.2883 (3.1053)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8352 (nan)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [188]  [1400/2502]  eta: 0:10:54  lr: 0.001371  min_lr: 0.001371  loss: 3.2457 (3.0945)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8649 (nan)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [188]  [1600/2502]  eta: 0:08:55  lr: 0.001369  min_lr: 0.001369  loss: 2.9543 (3.0986)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8880 (nan)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [188]  [1800/2502]  eta: 0:06:56  lr: 0.001367  min_lr: 0.001367  loss: 3.1609 (3.1033)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8125 (nan)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [188]  [2000/2502]  eta: 0:04:57  lr: 0.001366  min_lr: 0.001366  loss: 3.0731 (3.1113)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7826 (nan)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [188]  [2200/2502]  eta: 0:02:59  lr: 0.001364  min_lr: 0.001364  loss: 3.2777 (3.1116)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7913 (nan)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [188]  [2400/2502]  eta: 0:01:00  lr: 0.001362  min_lr: 0.001362  loss: 3.3846 (3.1189)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8084 (nan)  time: 0.5975  data: 0.0004  max mem: 53905
Epoch: [188]  [2501/2502]  eta: 0:00:00  lr: 0.001361  min_lr: 0.001361  loss: 3.2169 (3.1201)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7947 (nan)  time: 0.5315  data: 0.0006  max mem: 53905
Epoch: [188] Total time: 0:24:42 (0.5924 s / it)
Averaged stats: lr: 0.001361  min_lr: 0.001361  loss: 3.2169 (3.1219)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7947 (nan)
Test:  [ 0/50]  eta: 0:02:46  loss: 0.5313 (0.5313)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.3200  data: 3.0056  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7547 (0.7772)  acc1: 85.6000 (86.5091)  acc5: 98.0000 (97.8182)  time: 0.5708  data: 0.2853  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8289 (0.8091)  acc1: 84.0000 (85.5238)  acc5: 97.6000 (97.6571)  time: 0.2896  data: 0.0069  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9228 (0.9054)  acc1: 81.6000 (83.5226)  acc5: 97.2000 (96.6581)  time: 0.2836  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1312 (0.9640)  acc1: 78.4000 (82.0000)  acc5: 94.4000 (96.0585)  time: 0.2833  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1193 (0.9842)  acc1: 76.8000 (81.4160)  acc5: 94.8000 (95.9040)  time: 0.2830  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3485 s / it)
* Acc@1 81.144 Acc@5 95.944 loss 0.985
Accuracy of the model on the 50000 test images: 81.1%
Max accuracy: 81.29%
Epoch: [189]  [   0/2502]  eta: 1:53:02  lr: 0.001361  min_lr: 0.001361  loss: 2.2931 (2.2931)  weight_decay: 0.0500 (0.0500)  time: 2.7108  data: 2.1117  max mem: 53905
Epoch: [189]  [ 200/2502]  eta: 0:23:08  lr: 0.001360  min_lr: 0.001360  loss: 3.1170 (3.0956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8869 (0.8617)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [189]  [ 400/2502]  eta: 0:20:55  lr: 0.001358  min_lr: 0.001358  loss: 3.3318 (3.1176)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8651 (0.8730)  time: 0.6021  data: 0.0004  max mem: 53905
Epoch: [189]  [ 600/2502]  eta: 0:18:52  lr: 0.001356  min_lr: 0.001356  loss: 3.3785 (3.1096)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8243 (0.8647)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [189]  [ 800/2502]  eta: 0:16:52  lr: 0.001355  min_lr: 0.001355  loss: 3.3504 (3.1168)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8254 (0.8547)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [189]  [1000/2502]  eta: 0:14:52  lr: 0.001353  min_lr: 0.001353  loss: 3.2851 (3.1191)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8226 (0.8489)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [189]  [1200/2502]  eta: 0:12:52  lr: 0.001351  min_lr: 0.001351  loss: 3.3036 (3.1151)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7642 (0.8424)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [189]  [1400/2502]  eta: 0:10:53  lr: 0.001349  min_lr: 0.001349  loss: 3.3170 (3.1178)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7822 (0.8377)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [189]  [1600/2502]  eta: 0:08:55  lr: 0.001348  min_lr: 0.001348  loss: 3.2353 (3.1183)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8452 (0.8379)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [189]  [1800/2502]  eta: 0:06:56  lr: 0.001346  min_lr: 0.001346  loss: 3.1984 (3.1153)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8006 (0.8409)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [189]  [2000/2502]  eta: 0:04:57  lr: 0.001344  min_lr: 0.001344  loss: 3.1656 (3.1113)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9271 (0.8463)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [189]  [2200/2502]  eta: 0:02:59  lr: 0.001343  min_lr: 0.001343  loss: 3.2431 (3.1072)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7840 (0.8497)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [189]  [2400/2502]  eta: 0:01:00  lr: 0.001341  min_lr: 0.001341  loss: 3.2239 (3.1115)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7688 (0.8466)  time: 0.5945  data: 0.0004  max mem: 53905
Epoch: [189]  [2501/2502]  eta: 0:00:00  lr: 0.001340  min_lr: 0.001340  loss: 3.2574 (3.1100)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8902 (0.8482)  time: 0.5319  data: 0.0007  max mem: 53905
Epoch: [189] Total time: 0:24:42 (0.5925 s / it)
Averaged stats: lr: 0.001340  min_lr: 0.001340  loss: 3.2574 (3.1219)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8902 (0.8482)
Test:  [ 0/50]  eta: 0:02:43  loss: 0.5127 (0.5127)  acc1: 92.4000 (92.4000)  acc5: 99.2000 (99.2000)  time: 3.2783  data: 2.9490  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7963 (0.7909)  acc1: 84.8000 (86.4364)  acc5: 98.8000 (97.6000)  time: 0.5544  data: 0.2685  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8210 (0.8246)  acc1: 84.4000 (85.4667)  acc5: 97.6000 (97.5810)  time: 0.2823  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9701 (0.9189)  acc1: 81.2000 (83.3161)  acc5: 96.4000 (96.5290)  time: 0.2826  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1487 (0.9744)  acc1: 78.4000 (81.7171)  acc5: 94.4000 (95.9902)  time: 0.2824  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1380 (0.9935)  acc1: 76.0000 (81.1280)  acc5: 94.4000 (95.8640)  time: 0.2822  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3450 s / it)
* Acc@1 81.196 Acc@5 95.978 loss 0.995
Accuracy of the model on the 50000 test images: 81.2%
Max accuracy: 81.29%
Epoch: [190]  [   0/2502]  eta: 1:40:21  lr: 0.001340  min_lr: 0.001340  loss: 3.3012 (3.3012)  weight_decay: 0.0500 (0.0500)  time: 2.4067  data: 1.4955  max mem: 53905
Epoch: [190]  [ 200/2502]  eta: 0:23:05  lr: 0.001338  min_lr: 0.001338  loss: 3.1839 (3.0861)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8661 (0.8449)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [190]  [ 400/2502]  eta: 0:20:55  lr: 0.001337  min_lr: 0.001337  loss: 3.2993 (3.0767)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8300 (0.8351)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [190]  [ 600/2502]  eta: 0:18:53  lr: 0.001335  min_lr: 0.001335  loss: 2.9422 (3.0656)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7974 (0.8263)  time: 0.5950  data: 0.0004  max mem: 53905
Epoch: [190]  [ 800/2502]  eta: 0:16:52  lr: 0.001333  min_lr: 0.001333  loss: 3.3702 (3.0707)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7906 (0.8226)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [190]  [1000/2502]  eta: 0:14:52  lr: 0.001332  min_lr: 0.001332  loss: 3.3695 (3.0862)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8625 (0.8268)  time: 0.5947  data: 0.0004  max mem: 53905
Epoch: [190]  [1200/2502]  eta: 0:12:53  lr: 0.001330  min_lr: 0.001330  loss: 3.2705 (3.0879)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8312 (0.8252)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [190]  [1400/2502]  eta: 0:10:54  lr: 0.001328  min_lr: 0.001328  loss: 3.4191 (3.0987)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7836 (0.8293)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [190]  [1600/2502]  eta: 0:08:55  lr: 0.001327  min_lr: 0.001327  loss: 3.0905 (3.0996)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8201 (0.8293)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [190]  [1800/2502]  eta: 0:06:56  lr: 0.001325  min_lr: 0.001325  loss: 3.3654 (3.1050)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8277 (0.8314)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [190]  [2000/2502]  eta: 0:04:57  lr: 0.001323  min_lr: 0.001323  loss: 3.3256 (3.1083)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8209 (0.8322)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [190]  [2200/2502]  eta: 0:02:59  lr: 0.001322  min_lr: 0.001322  loss: 3.2983 (3.1063)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8793 (0.8364)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [190]  [2400/2502]  eta: 0:01:00  lr: 0.001320  min_lr: 0.001320  loss: 3.0544 (3.1072)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8081 (0.8360)  time: 0.5980  data: 0.0004  max mem: 53905
Epoch: [190]  [2501/2502]  eta: 0:00:00  lr: 0.001319  min_lr: 0.001319  loss: 3.1608 (3.1047)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8179 (0.8364)  time: 0.5340  data: 0.0007  max mem: 53905
Epoch: [190] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.001319  min_lr: 0.001319  loss: 3.1608 (3.1120)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8179 (0.8364)
Test:  [ 0/50]  eta: 0:02:47  loss: 0.4566 (0.4566)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.3510  data: 3.0117  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7170 (0.7561)  acc1: 86.0000 (86.8727)  acc5: 98.0000 (97.6727)  time: 0.5979  data: 0.3092  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7733 (0.7807)  acc1: 84.4000 (85.7714)  acc5: 98.0000 (97.6381)  time: 0.3036  data: 0.0197  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.8907 (0.8703)  acc1: 81.6000 (83.7161)  acc5: 96.0000 (96.5419)  time: 0.2846  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0730 (0.9265)  acc1: 78.0000 (82.2342)  acc5: 94.0000 (96.0098)  time: 0.2843  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0688 (0.9414)  acc1: 77.2000 (81.5280)  acc5: 94.4000 (95.8720)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3558 s / it)
* Acc@1 81.550 Acc@5 96.006 loss 0.936
Accuracy of the model on the 50000 test images: 81.6%
Max accuracy: 81.55%
Epoch: [191]  [   0/2502]  eta: 1:49:49  lr: 0.001319  min_lr: 0.001319  loss: 3.4003 (3.4003)  weight_decay: 0.0500 (0.0500)  time: 2.6338  data: 2.0247  max mem: 53905
Epoch: [191]  [ 200/2502]  eta: 0:23:15  lr: 0.001317  min_lr: 0.001317  loss: 2.9375 (3.1039)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7980 (0.8114)  time: 0.5936  data: 0.0005  max mem: 53905
Epoch: [191]  [ 400/2502]  eta: 0:20:58  lr: 0.001316  min_lr: 0.001316  loss: 3.1159 (3.0837)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8732 (0.8434)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [191]  [ 600/2502]  eta: 0:18:54  lr: 0.001314  min_lr: 0.001314  loss: 3.2256 (3.0708)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7759 (0.8340)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [191]  [ 800/2502]  eta: 0:16:54  lr: 0.001312  min_lr: 0.001312  loss: 3.0434 (3.0921)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8040 (0.8444)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [191]  [1000/2502]  eta: 0:14:54  lr: 0.001311  min_lr: 0.001311  loss: 3.2548 (3.0890)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8286 (0.8506)  time: 0.5995  data: 0.0004  max mem: 53905
Epoch: [191]  [1200/2502]  eta: 0:12:54  lr: 0.001309  min_lr: 0.001309  loss: 3.2934 (3.0919)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8506 (0.8487)  time: 0.5987  data: 0.0004  max mem: 53905
Epoch: [191]  [1400/2502]  eta: 0:10:55  lr: 0.001307  min_lr: 0.001307  loss: 3.2369 (3.0972)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7978 (0.8446)  time: 0.5931  data: 0.0006  max mem: 53905
Epoch: [191]  [1600/2502]  eta: 0:08:56  lr: 0.001305  min_lr: 0.001305  loss: 3.2188 (3.0995)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8508 (0.8434)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [191]  [1800/2502]  eta: 0:06:57  lr: 0.001304  min_lr: 0.001304  loss: 3.2129 (3.1010)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8925 (0.8471)  time: 0.5936  data: 0.0004  max mem: 53905
Epoch: [191]  [2000/2502]  eta: 0:04:58  lr: 0.001302  min_lr: 0.001302  loss: 3.0678 (3.1070)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8175 (0.8472)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [191]  [2200/2502]  eta: 0:02:59  lr: 0.001300  min_lr: 0.001300  loss: 3.2284 (3.1100)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8983 (0.8515)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [191]  [2400/2502]  eta: 0:01:00  lr: 0.001299  min_lr: 0.001299  loss: 3.1499 (3.1058)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8240 (inf)  time: 0.5980  data: 0.0004  max mem: 53905
Epoch: [191]  [2501/2502]  eta: 0:00:00  lr: 0.001298  min_lr: 0.001298  loss: 3.0659 (3.1059)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8762 (inf)  time: 0.5333  data: 0.0007  max mem: 53905
Epoch: [191] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.001298  min_lr: 0.001298  loss: 3.0659 (3.1031)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8762 (inf)
Test:  [ 0/50]  eta: 0:02:36  loss: 0.5420 (0.5420)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 3.1289  data: 2.7905  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8176 (0.7973)  acc1: 88.0000 (86.7273)  acc5: 98.4000 (97.8545)  time: 0.5941  data: 0.3062  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8302 (0.8359)  acc1: 84.4000 (85.5810)  acc5: 97.2000 (97.6000)  time: 0.3122  data: 0.0291  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9890 (0.9329)  acc1: 80.0000 (83.1484)  acc5: 96.0000 (96.4774)  time: 0.2838  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1661 (0.9915)  acc1: 78.0000 (81.8146)  acc5: 93.6000 (95.8634)  time: 0.2836  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1661 (1.0055)  acc1: 77.6000 (81.2400)  acc5: 94.4000 (95.6880)  time: 0.2833  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3540 s / it)
* Acc@1 81.384 Acc@5 95.972 loss 1.002
Accuracy of the model on the 50000 test images: 81.4%
Max accuracy: 81.55%
Epoch: [192]  [   0/2502]  eta: 1:44:18  lr: 0.001298  min_lr: 0.001298  loss: 3.3778 (3.3778)  weight_decay: 0.0500 (0.0500)  time: 2.5015  data: 1.8810  max mem: 53905
Epoch: [192]  [ 200/2502]  eta: 0:23:06  lr: 0.001296  min_lr: 0.001296  loss: 3.2479 (3.0852)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7783 (0.8696)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [192]  [ 400/2502]  eta: 0:20:57  lr: 0.001295  min_lr: 0.001295  loss: 3.2005 (3.0690)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8611 (0.8581)  time: 0.6004  data: 0.0005  max mem: 53905
Epoch: [192]  [ 600/2502]  eta: 0:18:54  lr: 0.001293  min_lr: 0.001293  loss: 3.2918 (3.0613)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8305 (0.8557)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [192]  [ 800/2502]  eta: 0:16:53  lr: 0.001291  min_lr: 0.001291  loss: 3.1351 (3.0627)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8587 (0.8666)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [192]  [1000/2502]  eta: 0:14:54  lr: 0.001290  min_lr: 0.001290  loss: 2.9971 (3.0737)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8322 (0.8621)  time: 0.5940  data: 0.0005  max mem: 53905
Epoch: [192]  [1200/2502]  eta: 0:12:54  lr: 0.001288  min_lr: 0.001288  loss: 3.2944 (3.0743)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8335 (0.8548)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [192]  [1400/2502]  eta: 0:10:55  lr: 0.001286  min_lr: 0.001286  loss: 3.2313 (3.0808)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8543 (0.8587)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [192]  [1600/2502]  eta: 0:08:56  lr: 0.001285  min_lr: 0.001285  loss: 3.1537 (3.0806)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8257 (0.8540)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [192]  [1800/2502]  eta: 0:06:57  lr: 0.001283  min_lr: 0.001283  loss: 3.1528 (3.0837)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8568 (0.8541)  time: 0.5950  data: 0.0004  max mem: 53905
Epoch: [192]  [2000/2502]  eta: 0:04:58  lr: 0.001281  min_lr: 0.001281  loss: 3.1102 (3.0830)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9383 (0.8580)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [192]  [2200/2502]  eta: 0:02:59  lr: 0.001279  min_lr: 0.001279  loss: 3.1538 (3.0854)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8254 (0.8576)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [192]  [2400/2502]  eta: 0:01:00  lr: 0.001278  min_lr: 0.001278  loss: 3.2443 (3.0887)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8557 (0.8614)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [192]  [2501/2502]  eta: 0:00:00  lr: 0.001277  min_lr: 0.001277  loss: 3.0075 (3.0872)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8159 (0.8599)  time: 0.5327  data: 0.0009  max mem: 53905
Epoch: [192] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.001277  min_lr: 0.001277  loss: 3.0075 (3.0965)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8159 (0.8599)
Test:  [ 0/50]  eta: 0:03:00  loss: 0.5172 (0.5172)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 3.6041  data: 3.2788  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8125 (0.7894)  acc1: 87.2000 (86.6182)  acc5: 98.4000 (98.0000)  time: 0.5838  data: 0.2985  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8454 (0.8230)  acc1: 83.6000 (85.3714)  acc5: 98.0000 (97.8286)  time: 0.2820  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9406 (0.9151)  acc1: 80.8000 (83.2000)  acc5: 96.0000 (96.6323)  time: 0.2824  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1328 (0.9647)  acc1: 78.4000 (81.8146)  acc5: 93.6000 (96.0781)  time: 0.2821  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0970 (0.9838)  acc1: 78.4000 (81.0800)  acc5: 94.4000 (95.9920)  time: 0.2819  data: 0.0003  max mem: 53905
Test: Total time: 0:00:17 (0.3507 s / it)
* Acc@1 81.374 Acc@5 96.004 loss 0.984
Accuracy of the model on the 50000 test images: 81.4%
Max accuracy: 81.55%
Epoch: [193]  [   0/2502]  eta: 1:47:14  lr: 0.001277  min_lr: 0.001277  loss: 3.5336 (3.5336)  weight_decay: 0.0500 (0.0500)  time: 2.5716  data: 1.8553  max mem: 53905
Epoch: [193]  [ 200/2502]  eta: 0:23:08  lr: 0.001275  min_lr: 0.001275  loss: 3.2155 (3.0788)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8459 (0.8209)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [193]  [ 400/2502]  eta: 0:20:56  lr: 0.001274  min_lr: 0.001274  loss: 3.2502 (3.0478)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8403 (0.8412)  time: 0.6008  data: 0.0004  max mem: 53905
Epoch: [193]  [ 600/2502]  eta: 0:18:53  lr: 0.001272  min_lr: 0.001272  loss: 3.2119 (3.0403)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8234 (0.8482)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [193]  [ 800/2502]  eta: 0:16:53  lr: 0.001270  min_lr: 0.001270  loss: 3.1290 (3.0470)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8386 (0.8473)  time: 0.5944  data: 0.0005  max mem: 53905
Epoch: [193]  [1000/2502]  eta: 0:14:53  lr: 0.001269  min_lr: 0.001269  loss: 3.2620 (3.0545)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8014 (0.8573)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [193]  [1200/2502]  eta: 0:12:54  lr: 0.001267  min_lr: 0.001267  loss: 3.4021 (3.0563)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9143 (0.8644)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [193]  [1400/2502]  eta: 0:10:55  lr: 0.001265  min_lr: 0.001265  loss: 3.0472 (3.0595)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8381 (0.8629)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [193]  [1600/2502]  eta: 0:08:56  lr: 0.001264  min_lr: 0.001264  loss: 3.2079 (3.0652)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9157 (0.8675)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [193]  [1800/2502]  eta: 0:06:57  lr: 0.001262  min_lr: 0.001262  loss: 3.0681 (3.0686)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8060 (0.8667)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [193]  [2000/2502]  eta: 0:04:58  lr: 0.001260  min_lr: 0.001260  loss: 3.0762 (3.0702)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8424 (0.8675)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [193]  [2200/2502]  eta: 0:02:59  lr: 0.001259  min_lr: 0.001259  loss: 3.0601 (3.0783)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8575 (0.8670)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [193]  [2400/2502]  eta: 0:01:00  lr: 0.001257  min_lr: 0.001257  loss: 3.1468 (3.0778)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8089 (nan)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [193]  [2501/2502]  eta: 0:00:00  lr: 0.001256  min_lr: 0.001256  loss: 3.2027 (3.0778)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8792 (nan)  time: 0.5317  data: 0.0007  max mem: 53905
Epoch: [193] Total time: 0:24:44 (0.5935 s / it)
Averaged stats: lr: 0.001256  min_lr: 0.001256  loss: 3.2027 (3.0893)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8792 (nan)
Test:  [ 0/50]  eta: 0:03:23  loss: 0.5668 (0.5668)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 4.0617  data: 3.7412  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 0.8134 (0.8575)  acc1: 86.8000 (86.9455)  acc5: 97.6000 (97.6364)  time: 0.6253  data: 0.3405  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8783 (0.8841)  acc1: 85.2000 (85.9429)  acc5: 97.6000 (97.4857)  time: 0.2818  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.0036 (0.9794)  acc1: 80.8000 (84.1161)  acc5: 96.0000 (96.3613)  time: 0.2821  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1840 (1.0331)  acc1: 79.2000 (82.5951)  acc5: 94.0000 (95.8146)  time: 0.2820  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1840 (1.0552)  acc1: 77.6000 (81.7440)  acc5: 94.8000 (95.7680)  time: 0.2819  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3598 s / it)
* Acc@1 81.666 Acc@5 96.016 loss 1.053
Accuracy of the model on the 50000 test images: 81.7%
Max accuracy: 81.67%
Epoch: [194]  [   0/2502]  eta: 1:30:41  lr: 0.001256  min_lr: 0.001256  loss: 3.2120 (3.2120)  weight_decay: 0.0500 (0.0500)  time: 2.1747  data: 1.5751  max mem: 53905
Epoch: [194]  [ 200/2502]  eta: 0:23:02  lr: 0.001254  min_lr: 0.001254  loss: 3.1757 (3.0666)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8101 (0.8765)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [194]  [ 400/2502]  eta: 0:20:56  lr: 0.001253  min_lr: 0.001253  loss: 3.1871 (3.0972)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8476 (0.8739)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [194]  [ 600/2502]  eta: 0:18:53  lr: 0.001251  min_lr: 0.001251  loss: 3.2021 (3.0872)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8925 (0.8874)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [194]  [ 800/2502]  eta: 0:16:53  lr: 0.001249  min_lr: 0.001249  loss: 3.3826 (3.0954)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8263 (0.8738)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [194]  [1000/2502]  eta: 0:14:53  lr: 0.001248  min_lr: 0.001248  loss: 3.2148 (3.0920)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8283 (0.8675)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [194]  [1200/2502]  eta: 0:12:54  lr: 0.001246  min_lr: 0.001246  loss: 3.1569 (3.0937)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8734 (0.8705)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [194]  [1400/2502]  eta: 0:10:54  lr: 0.001244  min_lr: 0.001244  loss: 3.4084 (3.0966)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8912 (0.8747)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [194]  [1600/2502]  eta: 0:08:55  lr: 0.001243  min_lr: 0.001243  loss: 3.0644 (3.0952)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8517 (0.8765)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [194]  [1800/2502]  eta: 0:06:56  lr: 0.001241  min_lr: 0.001241  loss: 3.2419 (3.0940)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8173 (0.8736)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [194]  [2000/2502]  eta: 0:04:58  lr: 0.001239  min_lr: 0.001239  loss: 2.9790 (3.0934)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8381 (0.8788)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [194]  [2200/2502]  eta: 0:02:59  lr: 0.001238  min_lr: 0.001238  loss: 3.0545 (3.0952)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9259 (0.8756)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [194]  [2400/2502]  eta: 0:01:00  lr: 0.001236  min_lr: 0.001236  loss: 3.1373 (3.0998)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7853 (0.8740)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [194]  [2501/2502]  eta: 0:00:00  lr: 0.001235  min_lr: 0.001235  loss: 3.2844 (3.0973)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9062 (0.8751)  time: 0.5336  data: 0.0007  max mem: 53905
Epoch: [194] Total time: 0:24:44 (0.5935 s / it)
Averaged stats: lr: 0.001235  min_lr: 0.001235  loss: 3.2844 (3.0987)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9062 (0.8751)
Test:  [ 0/50]  eta: 0:02:37  loss: 0.5671 (0.5671)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 3.1424  data: 2.8046  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8238 (0.8513)  acc1: 86.4000 (86.7636)  acc5: 98.4000 (97.7818)  time: 0.5590  data: 0.2701  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8781 (0.8807)  acc1: 84.0000 (85.7333)  acc5: 97.6000 (97.5810)  time: 0.2930  data: 0.0086  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0440 (0.9718)  acc1: 82.0000 (83.4452)  acc5: 96.4000 (96.5419)  time: 0.2866  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1796 (1.0249)  acc1: 78.4000 (82.0098)  acc5: 94.0000 (95.9220)  time: 0.2917  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1796 (1.0415)  acc1: 77.6000 (81.3920)  acc5: 94.0000 (95.8160)  time: 0.2904  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3506 s / it)
* Acc@1 81.456 Acc@5 95.998 loss 1.039
Accuracy of the model on the 50000 test images: 81.5%
Max accuracy: 81.67%
Epoch: [195]  [   0/2502]  eta: 1:38:22  lr: 0.001235  min_lr: 0.001235  loss: 3.2789 (3.2789)  weight_decay: 0.0500 (0.0500)  time: 2.3591  data: 1.7034  max mem: 53905
Epoch: [195]  [ 200/2502]  eta: 0:23:05  lr: 0.001234  min_lr: 0.001234  loss: 3.2713 (3.1165)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8185 (0.8769)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [195]  [ 400/2502]  eta: 0:20:55  lr: 0.001232  min_lr: 0.001232  loss: 3.1721 (3.0952)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8372 (0.8788)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [195]  [ 600/2502]  eta: 0:18:53  lr: 0.001230  min_lr: 0.001230  loss: 3.2591 (3.1002)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8671 (0.8780)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [195]  [ 800/2502]  eta: 0:16:52  lr: 0.001229  min_lr: 0.001229  loss: 3.1100 (3.0871)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9052 (0.8787)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [195]  [1000/2502]  eta: 0:14:53  lr: 0.001227  min_lr: 0.001227  loss: 3.2490 (3.0908)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8541 (0.8816)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [195]  [1200/2502]  eta: 0:12:54  lr: 0.001225  min_lr: 0.001225  loss: 2.9647 (3.0879)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9149 (0.8823)  time: 0.5956  data: 0.0004  max mem: 53905
Epoch: [195]  [1400/2502]  eta: 0:10:54  lr: 0.001224  min_lr: 0.001224  loss: 3.3692 (3.0903)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8404 (0.8775)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [195]  [1600/2502]  eta: 0:08:55  lr: 0.001222  min_lr: 0.001222  loss: 3.3373 (3.0899)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8265 (0.8738)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [195]  [1800/2502]  eta: 0:06:56  lr: 0.001220  min_lr: 0.001220  loss: 3.2941 (3.0985)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8512 (0.8710)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [195]  [2000/2502]  eta: 0:04:58  lr: 0.001219  min_lr: 0.001219  loss: 2.8166 (3.0964)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9883 (0.8762)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [195]  [2200/2502]  eta: 0:02:59  lr: 0.001217  min_lr: 0.001217  loss: 3.0404 (3.0958)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7802 (0.8764)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [195]  [2400/2502]  eta: 0:01:00  lr: 0.001215  min_lr: 0.001215  loss: 3.1810 (3.0925)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8499 (0.8760)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [195]  [2501/2502]  eta: 0:00:00  lr: 0.001215  min_lr: 0.001215  loss: 3.2234 (3.0936)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8529 (0.8757)  time: 0.5321  data: 0.0009  max mem: 53905
Epoch: [195] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.001215  min_lr: 0.001215  loss: 3.2234 (3.0890)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8529 (0.8757)
Test:  [ 0/50]  eta: 0:02:34  loss: 0.5976 (0.5976)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.0902  data: 2.7735  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8605 (0.8558)  acc1: 86.0000 (86.4000)  acc5: 98.0000 (97.6727)  time: 0.5927  data: 0.3063  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8650 (0.8914)  acc1: 84.8000 (85.6762)  acc5: 98.0000 (97.6381)  time: 0.3137  data: 0.0300  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0264 (0.9744)  acc1: 81.6000 (83.8452)  acc5: 95.2000 (96.6065)  time: 0.2845  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1721 (1.0233)  acc1: 78.0000 (82.2439)  acc5: 94.4000 (96.1756)  time: 0.2842  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1441 (1.0413)  acc1: 76.8000 (81.5280)  acc5: 95.2000 (96.0960)  time: 0.2839  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3545 s / it)
* Acc@1 81.654 Acc@5 96.056 loss 1.043
Accuracy of the model on the 50000 test images: 81.7%
Max accuracy: 81.67%
Epoch: [196]  [   0/2502]  eta: 1:50:38  lr: 0.001215  min_lr: 0.001215  loss: 3.0550 (3.0550)  weight_decay: 0.0500 (0.0500)  time: 2.6534  data: 2.0299  max mem: 53905
Epoch: [196]  [ 200/2502]  eta: 0:23:07  lr: 0.001213  min_lr: 0.001213  loss: 3.0576 (3.0419)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8119 (0.8383)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [196]  [ 400/2502]  eta: 0:20:57  lr: 0.001211  min_lr: 0.001211  loss: 2.8170 (3.0447)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8967 (0.8724)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [196]  [ 600/2502]  eta: 0:18:54  lr: 0.001210  min_lr: 0.001210  loss: 3.1203 (3.0671)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8259 (0.8734)  time: 0.5923  data: 0.0009  max mem: 53905
Epoch: [196]  [ 800/2502]  eta: 0:16:53  lr: 0.001208  min_lr: 0.001208  loss: 3.1516 (3.0703)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8654 (0.8808)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [196]  [1000/2502]  eta: 0:14:53  lr: 0.001206  min_lr: 0.001206  loss: 3.0586 (3.0806)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8254 (0.8789)  time: 0.5922  data: 0.0007  max mem: 53905
Epoch: [196]  [1200/2502]  eta: 0:12:54  lr: 0.001205  min_lr: 0.001205  loss: 3.4145 (3.0784)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8583 (0.8818)  time: 0.5955  data: 0.0006  max mem: 53905
Epoch: [196]  [1400/2502]  eta: 0:10:55  lr: 0.001203  min_lr: 0.001203  loss: 3.2333 (3.0770)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8686 (0.8845)  time: 0.5924  data: 0.0006  max mem: 53905
Epoch: [196]  [1600/2502]  eta: 0:08:55  lr: 0.001201  min_lr: 0.001201  loss: 3.3089 (3.0782)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2270 (inf)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [196]  [1800/2502]  eta: 0:06:57  lr: 0.001200  min_lr: 0.001200  loss: 3.1040 (3.0872)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7781 (inf)  time: 0.5934  data: 0.0006  max mem: 53905
Epoch: [196]  [2000/2502]  eta: 0:04:58  lr: 0.001198  min_lr: 0.001198  loss: 3.1853 (3.0915)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8900 (inf)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [196]  [2200/2502]  eta: 0:02:59  lr: 0.001197  min_lr: 0.001197  loss: 3.2654 (3.0913)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8617 (inf)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [196]  [2400/2502]  eta: 0:01:00  lr: 0.001195  min_lr: 0.001195  loss: 3.1822 (3.0897)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8559 (inf)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [196]  [2501/2502]  eta: 0:00:00  lr: 0.001194  min_lr: 0.001194  loss: 3.0708 (3.0889)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8321 (inf)  time: 0.5357  data: 0.0007  max mem: 53905
Epoch: [196] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.001194  min_lr: 0.001194  loss: 3.0708 (3.0875)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8321 (inf)
Test:  [ 0/50]  eta: 0:02:42  loss: 0.5120 (0.5120)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 3.2510  data: 2.9227  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8366 (0.8118)  acc1: 86.4000 (86.8727)  acc5: 98.0000 (97.7818)  time: 0.5679  data: 0.2799  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8366 (0.8396)  acc1: 84.0000 (85.9238)  acc5: 98.0000 (97.7143)  time: 0.2924  data: 0.0081  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9796 (0.9321)  acc1: 82.0000 (84.0000)  acc5: 95.6000 (96.5161)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1664 (0.9906)  acc1: 78.4000 (82.4098)  acc5: 94.0000 (95.9415)  time: 0.2902  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1408 (1.0069)  acc1: 76.8000 (81.8560)  acc5: 94.4000 (95.8560)  time: 0.2898  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3513 s / it)
* Acc@1 81.600 Acc@5 96.042 loss 1.010
Accuracy of the model on the 50000 test images: 81.6%
Max accuracy: 81.67%
Epoch: [197]  [   0/2502]  eta: 1:59:03  lr: 0.001194  min_lr: 0.001194  loss: 3.5155 (3.5155)  weight_decay: 0.0500 (0.0500)  time: 2.8551  data: 2.2504  max mem: 53905
Epoch: [197]  [ 200/2502]  eta: 0:23:08  lr: 0.001192  min_lr: 0.001192  loss: 2.7754 (3.0233)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8852 (0.9037)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [197]  [ 400/2502]  eta: 0:20:57  lr: 0.001191  min_lr: 0.001191  loss: 3.3991 (3.0372)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8385 (0.8770)  time: 0.5920  data: 0.0007  max mem: 53905
Epoch: [197]  [ 600/2502]  eta: 0:18:54  lr: 0.001189  min_lr: 0.001189  loss: 3.1293 (3.0501)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9408 (0.8792)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [197]  [ 800/2502]  eta: 0:16:53  lr: 0.001188  min_lr: 0.001188  loss: 3.1605 (3.0500)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8120 (0.8773)  time: 0.5924  data: 0.0008  max mem: 53905
Epoch: [197]  [1000/2502]  eta: 0:14:53  lr: 0.001186  min_lr: 0.001186  loss: 3.1377 (3.0480)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8730 (0.8767)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [197]  [1200/2502]  eta: 0:12:54  lr: 0.001184  min_lr: 0.001184  loss: 3.2890 (3.0563)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9445 (0.8911)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [197]  [1400/2502]  eta: 0:10:55  lr: 0.001183  min_lr: 0.001183  loss: 3.1125 (3.0552)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8512 (0.8933)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [197]  [1600/2502]  eta: 0:08:55  lr: 0.001181  min_lr: 0.001181  loss: 3.1434 (3.0669)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8328 (0.8873)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [197]  [1800/2502]  eta: 0:06:57  lr: 0.001179  min_lr: 0.001179  loss: 3.2260 (3.0664)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9084 (0.8885)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [197]  [2000/2502]  eta: 0:04:58  lr: 0.001178  min_lr: 0.001178  loss: 3.1971 (3.0676)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9318 (0.8904)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [197]  [2200/2502]  eta: 0:02:59  lr: 0.001176  min_lr: 0.001176  loss: 3.0370 (3.0662)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8070 (0.8896)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [197]  [2400/2502]  eta: 0:01:00  lr: 0.001174  min_lr: 0.001174  loss: 3.2867 (3.0716)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8451 (0.8878)  time: 0.5915  data: 0.0006  max mem: 53905
Epoch: [197]  [2501/2502]  eta: 0:00:00  lr: 0.001174  min_lr: 0.001174  loss: 3.3376 (3.0715)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8488 (0.8877)  time: 0.5327  data: 0.0008  max mem: 53905
Epoch: [197] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.001174  min_lr: 0.001174  loss: 3.3376 (3.0726)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8488 (0.8877)
Test:  [ 0/50]  eta: 0:02:58  loss: 0.6449 (0.6449)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.5633  data: 3.2462  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.9343 (0.9422)  acc1: 86.8000 (86.8364)  acc5: 98.4000 (97.6727)  time: 0.6081  data: 0.3212  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9351 (0.9560)  acc1: 85.6000 (86.0000)  acc5: 97.6000 (97.6571)  time: 0.2987  data: 0.0146  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0131 (1.0359)  acc1: 82.0000 (83.8581)  acc5: 96.8000 (96.8129)  time: 0.2849  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2381 (1.0916)  acc1: 78.4000 (82.3512)  acc5: 94.0000 (96.1366)  time: 0.2845  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2409 (1.1103)  acc1: 77.6000 (81.7040)  acc5: 94.4000 (96.0960)  time: 0.2841  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3579 s / it)
* Acc@1 81.652 Acc@5 96.138 loss 1.110
Accuracy of the model on the 50000 test images: 81.7%
Max accuracy: 81.67%
Epoch: [198]  [   0/2502]  eta: 1:43:06  lr: 0.001174  min_lr: 0.001174  loss: 2.1595 (2.1595)  weight_decay: 0.0500 (0.0500)  time: 2.4725  data: 1.5695  max mem: 53905
Epoch: [198]  [ 200/2502]  eta: 0:23:06  lr: 0.001172  min_lr: 0.001172  loss: 2.8221 (3.0557)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9265 (0.9430)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [198]  [ 400/2502]  eta: 0:20:56  lr: 0.001170  min_lr: 0.001170  loss: 3.1316 (3.0566)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8768 (0.9177)  time: 0.5917  data: 0.0007  max mem: 53905
Epoch: [198]  [ 600/2502]  eta: 0:18:54  lr: 0.001169  min_lr: 0.001169  loss: 3.2887 (3.0753)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8752 (0.9019)  time: 0.5916  data: 0.0007  max mem: 53905
Epoch: [198]  [ 800/2502]  eta: 0:16:53  lr: 0.001167  min_lr: 0.001167  loss: 3.2281 (3.0849)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8480 (0.8981)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [198]  [1000/2502]  eta: 0:14:54  lr: 0.001165  min_lr: 0.001165  loss: 3.0492 (3.0678)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9024 (0.8951)  time: 0.5941  data: 0.0010  max mem: 53905
Epoch: [198]  [1200/2502]  eta: 0:12:54  lr: 0.001164  min_lr: 0.001164  loss: 3.1653 (3.0741)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8850 (0.9012)  time: 0.5932  data: 0.0006  max mem: 53905
Epoch: [198]  [1400/2502]  eta: 0:10:55  lr: 0.001162  min_lr: 0.001162  loss: 3.2575 (3.0787)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8647 (0.9009)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [198]  [1600/2502]  eta: 0:08:56  lr: 0.001161  min_lr: 0.001161  loss: 3.1827 (3.0842)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8929 (0.8999)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [198]  [1800/2502]  eta: 0:06:57  lr: 0.001159  min_lr: 0.001159  loss: 3.0583 (3.0844)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9109 (0.9011)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [198]  [2000/2502]  eta: 0:04:58  lr: 0.001157  min_lr: 0.001157  loss: 3.0968 (3.0841)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8759 (0.8971)  time: 0.5965  data: 0.0005  max mem: 53905
Epoch: [198]  [2200/2502]  eta: 0:02:59  lr: 0.001156  min_lr: 0.001156  loss: 3.0268 (3.0811)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8343 (0.8948)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [198]  [2400/2502]  eta: 0:01:00  lr: 0.001154  min_lr: 0.001154  loss: 3.2417 (3.0819)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9572 (0.8959)  time: 0.5928  data: 0.0006  max mem: 53905
Epoch: [198]  [2501/2502]  eta: 0:00:00  lr: 0.001153  min_lr: 0.001153  loss: 3.1217 (3.0802)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8809 (0.8964)  time: 0.5403  data: 0.0007  max mem: 53905
Epoch: [198] Total time: 0:24:45 (0.5935 s / it)
Averaged stats: lr: 0.001153  min_lr: 0.001153  loss: 3.1217 (3.0697)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8809 (0.8964)
Test:  [ 0/50]  eta: 0:03:00  loss: 0.6174 (0.6174)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.6149  data: 3.2993  max mem: 53905
Test:  [10/50]  eta: 0:00:29  loss: 0.8767 (0.8442)  acc1: 86.4000 (86.6909)  acc5: 98.0000 (97.9636)  time: 0.7441  data: 0.4531  max mem: 53905
Test:  [20/50]  eta: 0:00:15  loss: 0.9072 (0.8763)  acc1: 84.0000 (85.7714)  acc5: 97.6000 (97.8095)  time: 0.3702  data: 0.0845  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9945 (0.9495)  acc1: 81.6000 (83.8323)  acc5: 96.4000 (96.8258)  time: 0.2834  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:04  loss: 1.1214 (1.0008)  acc1: 78.8000 (82.3317)  acc5: 94.4000 (96.2927)  time: 0.2832  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1243 (1.0233)  acc1: 76.4000 (81.5040)  acc5: 94.8000 (96.1440)  time: 0.2829  data: 0.0002  max mem: 53905
Test: Total time: 0:00:19 (0.3869 s / it)
* Acc@1 81.702 Acc@5 96.136 loss 1.020
Accuracy of the model on the 50000 test images: 81.7%
Max accuracy: 81.70%
Epoch: [199]  [   0/2502]  eta: 1:35:47  lr: 0.001153  min_lr: 0.001153  loss: 3.0911 (3.0911)  weight_decay: 0.0500 (0.0500)  time: 2.2972  data: 1.7011  max mem: 53905
Epoch: [199]  [ 200/2502]  eta: 0:23:03  lr: 0.001152  min_lr: 0.001152  loss: 3.1266 (3.0476)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8298 (0.8824)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [199]  [ 400/2502]  eta: 0:20:54  lr: 0.001150  min_lr: 0.001150  loss: 2.9648 (3.0488)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8809 (0.8892)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [199]  [ 600/2502]  eta: 0:18:53  lr: 0.001148  min_lr: 0.001148  loss: 3.1656 (3.0414)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8602 (0.8956)  time: 0.5981  data: 0.0004  max mem: 53905
Epoch: [199]  [ 800/2502]  eta: 0:16:52  lr: 0.001147  min_lr: 0.001147  loss: 3.1397 (3.0509)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8533 (0.8942)  time: 0.5929  data: 0.0007  max mem: 53905
Epoch: [199]  [1000/2502]  eta: 0:14:53  lr: 0.001145  min_lr: 0.001145  loss: 2.9959 (3.0579)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9030 (0.8971)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [199]  [1200/2502]  eta: 0:12:53  lr: 0.001143  min_lr: 0.001143  loss: 3.1957 (3.0702)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8912 (0.9049)  time: 0.5906  data: 0.0006  max mem: 53905
Epoch: [199]  [1400/2502]  eta: 0:10:54  lr: 0.001142  min_lr: 0.001142  loss: 2.9201 (3.0670)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9533 (0.9076)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [199]  [1600/2502]  eta: 0:08:55  lr: 0.001140  min_lr: 0.001140  loss: 3.1830 (3.0672)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8735 (0.9051)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [199]  [1800/2502]  eta: 0:06:56  lr: 0.001139  min_lr: 0.001139  loss: 3.2915 (3.0721)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8726 (0.9042)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [199]  [2000/2502]  eta: 0:04:57  lr: 0.001137  min_lr: 0.001137  loss: 3.1584 (3.0736)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9057 (0.9041)  time: 0.5964  data: 0.0006  max mem: 53905
Epoch: [199]  [2200/2502]  eta: 0:02:59  lr: 0.001135  min_lr: 0.001135  loss: 3.2342 (3.0686)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8960 (0.9041)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [199]  [2400/2502]  eta: 0:01:00  lr: 0.001134  min_lr: 0.001134  loss: 3.2920 (3.0658)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9492 (0.9061)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [199]  [2501/2502]  eta: 0:00:00  lr: 0.001133  min_lr: 0.001133  loss: 3.0853 (3.0650)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8383 (0.9061)  time: 0.5331  data: 0.0007  max mem: 53905
Epoch: [199] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.001133  min_lr: 0.001133  loss: 3.0853 (3.0664)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8383 (0.9061)
Test:  [ 0/50]  eta: 0:03:16  loss: 0.6011 (0.6011)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.9209  data: 3.6076  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8413 (0.8803)  acc1: 89.2000 (86.8727)  acc5: 98.4000 (97.8909)  time: 0.6154  data: 0.3283  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8859 (0.9011)  acc1: 83.6000 (85.8286)  acc5: 98.0000 (97.8476)  time: 0.2851  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.0598 (0.9910)  acc1: 81.6000 (83.6774)  acc5: 96.0000 (96.7484)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2174 (1.0467)  acc1: 78.0000 (82.1951)  acc5: 94.0000 (96.0878)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2132 (1.0636)  acc1: 77.6000 (81.5840)  acc5: 94.4000 (96.0000)  time: 0.2846  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3600 s / it)
* Acc@1 81.690 Acc@5 96.100 loss 1.060
Accuracy of the model on the 50000 test images: 81.7%
Max accuracy: 81.70%
Epoch: [200]  [   0/2502]  eta: 1:42:45  lr: 0.001133  min_lr: 0.001133  loss: 3.1717 (3.1717)  weight_decay: 0.0500 (0.0500)  time: 2.4640  data: 1.4637  max mem: 53905
Epoch: [200]  [ 200/2502]  eta: 0:23:04  lr: 0.001131  min_lr: 0.001131  loss: 3.3248 (3.0327)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8659 (0.8801)  time: 0.5929  data: 0.0006  max mem: 53905
Epoch: [200]  [ 400/2502]  eta: 0:20:57  lr: 0.001130  min_lr: 0.001130  loss: 3.3259 (3.0424)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9228 (0.9108)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [200]  [ 600/2502]  eta: 0:18:54  lr: 0.001128  min_lr: 0.001128  loss: 3.3056 (3.0525)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8399 (0.8974)  time: 0.5993  data: 0.0004  max mem: 53905
Epoch: [200]  [ 800/2502]  eta: 0:16:53  lr: 0.001126  min_lr: 0.001126  loss: 2.9721 (3.0612)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8689 (0.8970)  time: 0.5968  data: 0.0006  max mem: 53905
Epoch: [200]  [1000/2502]  eta: 0:14:53  lr: 0.001125  min_lr: 0.001125  loss: 3.1590 (3.0524)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8746 (0.8924)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [200]  [1200/2502]  eta: 0:12:53  lr: 0.001123  min_lr: 0.001123  loss: 3.1633 (3.0460)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8441 (0.8917)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [200]  [1400/2502]  eta: 0:10:54  lr: 0.001122  min_lr: 0.001122  loss: 3.3546 (3.0562)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9376 (0.8950)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [200]  [1600/2502]  eta: 0:08:55  lr: 0.001120  min_lr: 0.001120  loss: 3.1533 (3.0586)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8227 (0.9004)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [200]  [1800/2502]  eta: 0:06:56  lr: 0.001118  min_lr: 0.001118  loss: 3.1856 (3.0596)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8998 (0.9020)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [200]  [2000/2502]  eta: 0:04:58  lr: 0.001117  min_lr: 0.001117  loss: 3.1562 (3.0606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9428 (0.9043)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [200]  [2200/2502]  eta: 0:02:59  lr: 0.001115  min_lr: 0.001115  loss: 3.1924 (3.0588)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9139 (0.9043)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [200]  [2400/2502]  eta: 0:01:00  lr: 0.001114  min_lr: 0.001114  loss: 3.0495 (3.0590)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8899 (0.9047)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [200]  [2501/2502]  eta: 0:00:00  lr: 0.001113  min_lr: 0.001113  loss: 2.8152 (3.0585)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8794 (0.9028)  time: 0.5318  data: 0.0007  max mem: 53905
Epoch: [200] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.001113  min_lr: 0.001113  loss: 2.8152 (3.0610)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8794 (0.9028)
Test:  [ 0/50]  eta: 0:03:05  loss: 0.5531 (0.5531)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 3.7156  data: 3.4149  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8481 (0.8202)  acc1: 87.2000 (86.4000)  acc5: 98.4000 (97.7455)  time: 0.5940  data: 0.3109  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8494 (0.8426)  acc1: 84.0000 (85.0476)  acc5: 97.6000 (97.6381)  time: 0.2821  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9949 (0.9336)  acc1: 80.0000 (83.1613)  acc5: 95.6000 (96.6194)  time: 0.2825  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1245 (0.9886)  acc1: 78.8000 (81.7951)  acc5: 94.8000 (96.1756)  time: 0.2822  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1245 (1.0038)  acc1: 78.4000 (81.2640)  acc5: 95.2000 (96.2000)  time: 0.2820  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3531 s / it)
* Acc@1 81.650 Acc@5 96.178 loss 1.005
Accuracy of the model on the 50000 test images: 81.7%
Max accuracy: 81.70%
Epoch: [201]  [   0/2502]  eta: 1:54:45  lr: 0.001113  min_lr: 0.001113  loss: 2.6880 (2.6880)  weight_decay: 0.0500 (0.0500)  time: 2.7518  data: 1.6204  max mem: 53905
Epoch: [201]  [ 200/2502]  eta: 0:23:06  lr: 0.001111  min_lr: 0.001111  loss: 3.1564 (3.0812)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8750 (0.9007)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [201]  [ 400/2502]  eta: 0:20:54  lr: 0.001110  min_lr: 0.001110  loss: 3.0899 (3.0539)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9027 (0.9175)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [201]  [ 600/2502]  eta: 0:18:53  lr: 0.001108  min_lr: 0.001108  loss: 3.0959 (3.0510)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8640 (0.9166)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [201]  [ 800/2502]  eta: 0:16:53  lr: 0.001106  min_lr: 0.001106  loss: 3.0163 (3.0423)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8787 (0.9164)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [201]  [1000/2502]  eta: 0:14:53  lr: 0.001105  min_lr: 0.001105  loss: 3.2933 (3.0464)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8845 (0.9178)  time: 0.5959  data: 0.0004  max mem: 53905
Epoch: [201]  [1200/2502]  eta: 0:12:53  lr: 0.001103  min_lr: 0.001103  loss: 3.0677 (3.0475)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9355 (0.9285)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [201]  [1400/2502]  eta: 0:10:54  lr: 0.001102  min_lr: 0.001102  loss: 3.0448 (3.0513)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8809 (0.9291)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [201]  [1600/2502]  eta: 0:08:55  lr: 0.001100  min_lr: 0.001100  loss: 3.2335 (3.0479)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8842 (0.9279)  time: 0.6038  data: 0.0005  max mem: 53905
Epoch: [201]  [1800/2502]  eta: 0:06:56  lr: 0.001098  min_lr: 0.001098  loss: 3.0572 (3.0440)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8602 (0.9277)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [201]  [2000/2502]  eta: 0:04:58  lr: 0.001097  min_lr: 0.001097  loss: 2.9040 (3.0447)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9086 (0.9285)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [201]  [2200/2502]  eta: 0:02:59  lr: 0.001095  min_lr: 0.001095  loss: 3.2768 (3.0475)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8608 (0.9259)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [201]  [2400/2502]  eta: 0:01:00  lr: 0.001094  min_lr: 0.001094  loss: 3.2490 (3.0529)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8683 (0.9236)  time: 0.5937  data: 0.0004  max mem: 53905
Epoch: [201]  [2501/2502]  eta: 0:00:00  lr: 0.001093  min_lr: 0.001093  loss: 3.1661 (3.0534)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8945 (0.9222)  time: 0.5343  data: 0.0008  max mem: 53905
Epoch: [201] Total time: 0:24:44 (0.5935 s / it)
Averaged stats: lr: 0.001093  min_lr: 0.001093  loss: 3.1661 (3.0445)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8945 (0.9222)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.5343 (0.5343)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 3.0533  data: 2.7230  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8345 (0.8350)  acc1: 86.8000 (86.5455)  acc5: 98.0000 (97.4182)  time: 0.5731  data: 0.2851  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8604 (0.8526)  acc1: 84.8000 (85.8476)  acc5: 97.6000 (97.4286)  time: 0.3049  data: 0.0209  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9755 (0.9324)  acc1: 81.6000 (83.8968)  acc5: 96.4000 (96.5548)  time: 0.2848  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1206 (0.9780)  acc1: 78.0000 (82.2732)  acc5: 94.8000 (96.0976)  time: 0.2845  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1208 (0.9917)  acc1: 76.8000 (81.6720)  acc5: 94.8000 (96.0400)  time: 0.2843  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3504 s / it)
* Acc@1 81.824 Acc@5 96.150 loss 0.992
Accuracy of the model on the 50000 test images: 81.8%
Max accuracy: 81.82%
Epoch: [202]  [   0/2502]  eta: 1:40:26  lr: 0.001093  min_lr: 0.001093  loss: 3.0982 (3.0982)  weight_decay: 0.0500 (0.0500)  time: 2.4087  data: 1.8024  max mem: 53905
Epoch: [202]  [ 200/2502]  eta: 0:23:06  lr: 0.001091  min_lr: 0.001091  loss: 3.1669 (3.0479)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8803 (0.8954)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [202]  [ 400/2502]  eta: 0:20:57  lr: 0.001090  min_lr: 0.001090  loss: 3.0154 (3.0340)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9036 (0.9098)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [202]  [ 600/2502]  eta: 0:18:53  lr: 0.001088  min_lr: 0.001088  loss: 2.8506 (3.0355)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9177 (0.9105)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [202]  [ 800/2502]  eta: 0:16:52  lr: 0.001086  min_lr: 0.001086  loss: 3.2903 (3.0421)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9851 (0.9273)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [202]  [1000/2502]  eta: 0:14:53  lr: 0.001085  min_lr: 0.001085  loss: 3.0299 (3.0302)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9207 (0.9264)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [202]  [1200/2502]  eta: 0:12:53  lr: 0.001083  min_lr: 0.001083  loss: 3.1770 (3.0285)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8435 (0.9229)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [202]  [1400/2502]  eta: 0:10:55  lr: 0.001082  min_lr: 0.001082  loss: 3.1237 (3.0302)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9777 (0.9261)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [202]  [1600/2502]  eta: 0:08:56  lr: 0.001080  min_lr: 0.001080  loss: 3.0087 (3.0320)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8251 (0.9218)  time: 0.5936  data: 0.0004  max mem: 53905
Epoch: [202]  [1800/2502]  eta: 0:06:57  lr: 0.001078  min_lr: 0.001078  loss: 2.8319 (3.0344)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8701 (0.9173)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [202]  [2000/2502]  eta: 0:04:58  lr: 0.001077  min_lr: 0.001077  loss: 3.2543 (3.0396)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9280 (0.9230)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [202]  [2200/2502]  eta: 0:02:59  lr: 0.001075  min_lr: 0.001075  loss: 3.1299 (3.0418)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8484 (0.9209)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [202]  [2400/2502]  eta: 0:01:00  lr: 0.001074  min_lr: 0.001074  loss: 3.2706 (3.0428)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8613 (0.9184)  time: 0.6011  data: 0.0006  max mem: 53905
Epoch: [202]  [2501/2502]  eta: 0:00:00  lr: 0.001073  min_lr: 0.001073  loss: 3.2074 (3.0461)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8618 (0.9193)  time: 0.5342  data: 0.0008  max mem: 53905
Epoch: [202] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.001073  min_lr: 0.001073  loss: 3.2074 (3.0414)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8618 (0.9193)
Test:  [ 0/50]  eta: 0:03:31  loss: 0.5416 (0.5416)  acc1: 93.6000 (93.6000)  acc5: 99.2000 (99.2000)  time: 4.2345  data: 3.9121  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 0.8055 (0.7920)  acc1: 88.8000 (87.5636)  acc5: 98.8000 (98.0000)  time: 0.6438  data: 0.3561  max mem: 53905
Test:  [20/50]  eta: 0:00:14  loss: 0.8322 (0.8294)  acc1: 84.4000 (86.1333)  acc5: 98.0000 (97.9429)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9608 (0.9241)  acc1: 82.0000 (84.0258)  acc5: 96.0000 (96.7484)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1290 (0.9834)  acc1: 77.6000 (82.4488)  acc5: 94.4000 (96.2439)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1290 (1.0033)  acc1: 76.8000 (81.7680)  acc5: 94.8000 (96.0880)  time: 0.2847  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3672 s / it)
* Acc@1 81.950 Acc@5 96.200 loss 0.999
Accuracy of the model on the 50000 test images: 82.0%
Max accuracy: 81.95%
Epoch: [203]  [   0/2502]  eta: 1:34:18  lr: 0.001073  min_lr: 0.001073  loss: 2.7056 (2.7056)  weight_decay: 0.0500 (0.0500)  time: 2.2615  data: 1.6677  max mem: 53905
Epoch: [203]  [ 200/2502]  eta: 0:23:05  lr: 0.001071  min_lr: 0.001071  loss: 2.9400 (2.9458)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8674 (0.9070)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [203]  [ 400/2502]  eta: 0:20:55  lr: 0.001070  min_lr: 0.001070  loss: 3.2196 (2.9647)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8429 (0.9112)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [203]  [ 600/2502]  eta: 0:18:53  lr: 0.001068  min_lr: 0.001068  loss: 3.0816 (2.9979)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9540 (0.9076)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [203]  [ 800/2502]  eta: 0:16:53  lr: 0.001066  min_lr: 0.001066  loss: 3.1692 (3.0065)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9031 (0.9109)  time: 0.5996  data: 0.0006  max mem: 53905
Epoch: [203]  [1000/2502]  eta: 0:14:53  lr: 0.001065  min_lr: 0.001065  loss: 3.0282 (3.0125)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9336 (0.9073)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [203]  [1200/2502]  eta: 0:12:54  lr: 0.001063  min_lr: 0.001063  loss: 3.2482 (3.0097)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9416 (0.9068)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [203]  [1400/2502]  eta: 0:10:55  lr: 0.001062  min_lr: 0.001062  loss: 3.2011 (3.0164)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9931 (0.9155)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [203]  [1600/2502]  eta: 0:08:55  lr: 0.001060  min_lr: 0.001060  loss: 3.2583 (3.0196)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9555 (0.9186)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [203]  [1800/2502]  eta: 0:06:57  lr: 0.001059  min_lr: 0.001059  loss: 3.0368 (3.0220)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8770 (0.9148)  time: 0.6007  data: 0.0005  max mem: 53905
Epoch: [203]  [2000/2502]  eta: 0:04:58  lr: 0.001057  min_lr: 0.001057  loss: 3.1290 (3.0285)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8795 (0.9183)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [203]  [2200/2502]  eta: 0:02:59  lr: 0.001055  min_lr: 0.001055  loss: 3.1511 (3.0253)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8886 (0.9157)  time: 0.5958  data: 0.0004  max mem: 53905
Epoch: [203]  [2400/2502]  eta: 0:01:00  lr: 0.001054  min_lr: 0.001054  loss: 3.2620 (3.0310)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9180 (0.9183)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [203]  [2501/2502]  eta: 0:00:00  lr: 0.001053  min_lr: 0.001053  loss: 3.0958 (3.0325)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8511 (0.9169)  time: 0.5339  data: 0.0009  max mem: 53905
Epoch: [203] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.001053  min_lr: 0.001053  loss: 3.0958 (3.0419)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8511 (0.9169)
Test:  [ 0/50]  eta: 0:03:01  loss: 0.5434 (0.5434)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.6310  data: 3.3033  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8008 (0.8333)  acc1: 88.4000 (87.2000)  acc5: 97.6000 (97.5273)  time: 0.5889  data: 0.3007  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8739 (0.8733)  acc1: 84.4000 (85.9810)  acc5: 97.6000 (97.5619)  time: 0.2851  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0132 (0.9630)  acc1: 81.2000 (83.9613)  acc5: 96.0000 (96.6452)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1678 (1.0142)  acc1: 78.8000 (82.5366)  acc5: 94.0000 (96.1854)  time: 0.2851  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1861 (1.0353)  acc1: 78.4000 (81.7440)  acc5: 95.2000 (96.0720)  time: 0.2849  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3540 s / it)
* Acc@1 81.936 Acc@5 96.254 loss 1.029
Accuracy of the model on the 50000 test images: 81.9%
Max accuracy: 81.95%
Epoch: [204]  [   0/2502]  eta: 1:59:11  lr: 0.001053  min_lr: 0.001053  loss: 2.8977 (2.8977)  weight_decay: 0.0500 (0.0500)  time: 2.8584  data: 1.5257  max mem: 53905
Epoch: [204]  [ 200/2502]  eta: 0:23:10  lr: 0.001051  min_lr: 0.001051  loss: 3.1129 (2.9968)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8346 (0.8609)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [204]  [ 400/2502]  eta: 0:20:59  lr: 0.001050  min_lr: 0.001050  loss: 3.1660 (3.0123)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9074 (0.9243)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [204]  [ 600/2502]  eta: 0:18:55  lr: 0.001048  min_lr: 0.001048  loss: 3.0479 (3.0165)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9426 (0.9330)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [204]  [ 800/2502]  eta: 0:16:55  lr: 0.001047  min_lr: 0.001047  loss: 3.1924 (3.0150)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8715 (0.9249)  time: 0.5983  data: 0.0004  max mem: 53905
Epoch: [204]  [1000/2502]  eta: 0:14:55  lr: 0.001045  min_lr: 0.001045  loss: 3.2590 (3.0277)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9693 (0.9329)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [204]  [1200/2502]  eta: 0:12:55  lr: 0.001044  min_lr: 0.001044  loss: 3.1932 (3.0251)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9038 (0.9322)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [204]  [1400/2502]  eta: 0:10:56  lr: 0.001042  min_lr: 0.001042  loss: 2.9028 (3.0234)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9013 (0.9333)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [204]  [1600/2502]  eta: 0:08:56  lr: 0.001040  min_lr: 0.001040  loss: 3.1230 (3.0214)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9015 (0.9330)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [204]  [1800/2502]  eta: 0:06:57  lr: 0.001039  min_lr: 0.001039  loss: 2.9386 (3.0266)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9053 (0.9309)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [204]  [2000/2502]  eta: 0:04:58  lr: 0.001037  min_lr: 0.001037  loss: 3.2037 (3.0293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9040 (0.9331)  time: 0.5993  data: 0.0004  max mem: 53905
Epoch: [204]  [2200/2502]  eta: 0:02:59  lr: 0.001036  min_lr: 0.001036  loss: 3.2195 (3.0264)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8691 (0.9310)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [204]  [2400/2502]  eta: 0:01:00  lr: 0.001034  min_lr: 0.001034  loss: 3.0160 (3.0280)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9042 (0.9319)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [204]  [2501/2502]  eta: 0:00:00  lr: 0.001033  min_lr: 0.001033  loss: 3.0323 (3.0308)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9740 (0.9352)  time: 0.5327  data: 0.0007  max mem: 53905
Epoch: [204] Total time: 0:24:46 (0.5942 s / it)
Averaged stats: lr: 0.001033  min_lr: 0.001033  loss: 3.0323 (3.0409)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9740 (0.9352)
Test:  [ 0/50]  eta: 0:02:26  loss: 0.5914 (0.5914)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 2.9258  data: 2.6040  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.8445 (0.8255)  acc1: 90.4000 (86.7636)  acc5: 98.0000 (97.7455)  time: 0.5318  data: 0.2462  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8503 (0.8559)  acc1: 84.4000 (86.0191)  acc5: 97.6000 (97.6000)  time: 0.2876  data: 0.0054  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9556 (0.9329)  acc1: 81.6000 (83.8839)  acc5: 96.0000 (96.8000)  time: 0.2829  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1136 (0.9829)  acc1: 78.4000 (82.3707)  acc5: 94.4000 (96.2829)  time: 0.2829  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1136 (1.0029)  acc1: 78.0000 (81.7120)  acc5: 94.8000 (96.1440)  time: 0.2827  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3405 s / it)
* Acc@1 81.888 Acc@5 96.216 loss 0.997
Accuracy of the model on the 50000 test images: 81.9%
Max accuracy: 81.95%
Epoch: [205]  [   0/2502]  eta: 1:49:25  lr: 0.001033  min_lr: 0.001033  loss: 3.1419 (3.1419)  weight_decay: 0.0500 (0.0500)  time: 2.6240  data: 1.9115  max mem: 53905
Epoch: [205]  [ 200/2502]  eta: 0:23:05  lr: 0.001032  min_lr: 0.001032  loss: 3.1809 (3.0703)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8831 (0.8981)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [205]  [ 400/2502]  eta: 0:20:55  lr: 0.001030  min_lr: 0.001030  loss: 2.9942 (3.0438)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0036 (0.9605)  time: 0.5933  data: 0.0004  max mem: 53905
Epoch: [205]  [ 600/2502]  eta: 0:18:54  lr: 0.001029  min_lr: 0.001029  loss: 3.0180 (3.0302)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9066 (0.9453)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [205]  [ 800/2502]  eta: 0:16:53  lr: 0.001027  min_lr: 0.001027  loss: 3.1837 (3.0304)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9671 (0.9411)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [205]  [1000/2502]  eta: 0:14:53  lr: 0.001025  min_lr: 0.001025  loss: 3.0815 (3.0355)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9380 (0.9405)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [205]  [1200/2502]  eta: 0:12:54  lr: 0.001024  min_lr: 0.001024  loss: 3.2061 (3.0259)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8879 (0.9333)  time: 0.5938  data: 0.0004  max mem: 53905
Epoch: [205]  [1400/2502]  eta: 0:10:55  lr: 0.001022  min_lr: 0.001022  loss: 3.1143 (3.0290)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9294 (0.9302)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [205]  [1600/2502]  eta: 0:08:56  lr: 0.001021  min_lr: 0.001021  loss: 3.1676 (3.0310)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8865 (0.9265)  time: 0.5958  data: 0.0005  max mem: 53905
Epoch: [205]  [1800/2502]  eta: 0:06:57  lr: 0.001019  min_lr: 0.001019  loss: 2.8893 (3.0258)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8776 (0.9274)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [205]  [2000/2502]  eta: 0:04:58  lr: 0.001018  min_lr: 0.001018  loss: 3.0671 (3.0314)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9025 (0.9245)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [205]  [2200/2502]  eta: 0:02:59  lr: 0.001016  min_lr: 0.001016  loss: 3.1800 (3.0355)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8966 (0.9253)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [205]  [2400/2502]  eta: 0:01:00  lr: 0.001015  min_lr: 0.001015  loss: 3.2720 (3.0361)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9882 (0.9262)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [205]  [2501/2502]  eta: 0:00:00  lr: 0.001014  min_lr: 0.001014  loss: 3.2665 (3.0366)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9098 (0.9267)  time: 0.5339  data: 0.0009  max mem: 53905
Epoch: [205] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.001014  min_lr: 0.001014  loss: 3.2665 (3.0226)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9098 (0.9267)
Test:  [ 0/50]  eta: 0:03:12  loss: 0.5858 (0.5858)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.8594  data: 3.5264  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.9592 (0.9157)  acc1: 87.2000 (87.3091)  acc5: 97.6000 (97.7455)  time: 0.6097  data: 0.3209  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9761 (0.9464)  acc1: 85.2000 (86.4952)  acc5: 97.6000 (97.6191)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.0444 (1.0319)  acc1: 82.4000 (84.3742)  acc5: 96.4000 (96.6710)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2679 (1.0877)  acc1: 78.4000 (82.7122)  acc5: 94.4000 (96.0878)  time: 0.2851  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2564 (1.1069)  acc1: 76.4000 (82.1200)  acc5: 94.4000 (95.9520)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3590 s / it)
* Acc@1 82.114 Acc@5 96.196 loss 1.105
Accuracy of the model on the 50000 test images: 82.1%
Max accuracy: 82.11%
Epoch: [206]  [   0/2502]  eta: 1:42:11  lr: 0.001014  min_lr: 0.001014  loss: 3.4220 (3.4220)  weight_decay: 0.0500 (0.0500)  time: 2.4506  data: 1.8575  max mem: 53905
Epoch: [206]  [ 200/2502]  eta: 0:23:13  lr: 0.001012  min_lr: 0.001012  loss: 3.2570 (3.0288)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9047 (0.9168)  time: 0.6042  data: 0.0004  max mem: 53905
Epoch: [206]  [ 400/2502]  eta: 0:20:58  lr: 0.001011  min_lr: 0.001011  loss: 3.2094 (3.0141)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8826 (0.9212)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [206]  [ 600/2502]  eta: 0:18:55  lr: 0.001009  min_lr: 0.001009  loss: 3.2610 (3.0235)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9167 (0.9183)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [206]  [ 800/2502]  eta: 0:16:54  lr: 0.001007  min_lr: 0.001007  loss: 2.8861 (3.0185)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9692 (0.9281)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [206]  [1000/2502]  eta: 0:14:54  lr: 0.001006  min_lr: 0.001006  loss: 2.9123 (3.0177)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9821 (0.9305)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [206]  [1200/2502]  eta: 0:12:54  lr: 0.001004  min_lr: 0.001004  loss: 3.1752 (3.0287)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9179 (0.9340)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [206]  [1400/2502]  eta: 0:10:55  lr: 0.001003  min_lr: 0.001003  loss: 3.0921 (3.0307)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8772 (0.9291)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [206]  [1600/2502]  eta: 0:08:56  lr: 0.001001  min_lr: 0.001001  loss: 3.2161 (3.0293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9015 (0.9267)  time: 0.5935  data: 0.0004  max mem: 53905
Epoch: [206]  [1800/2502]  eta: 0:06:57  lr: 0.001000  min_lr: 0.001000  loss: 3.0338 (3.0282)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9702 (0.9324)  time: 0.6045  data: 0.0005  max mem: 53905
Epoch: [206]  [2000/2502]  eta: 0:04:58  lr: 0.000998  min_lr: 0.000998  loss: 2.7240 (3.0263)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9133 (0.9355)  time: 0.5941  data: 0.0005  max mem: 53905
Epoch: [206]  [2200/2502]  eta: 0:02:59  lr: 0.000997  min_lr: 0.000997  loss: 3.1298 (3.0255)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9055 (0.9336)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [206]  [2400/2502]  eta: 0:01:00  lr: 0.000995  min_lr: 0.000995  loss: 2.9807 (3.0240)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9438 (0.9363)  time: 0.5936  data: 0.0006  max mem: 53905
Epoch: [206]  [2501/2502]  eta: 0:00:00  lr: 0.000994  min_lr: 0.000994  loss: 3.2309 (3.0254)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9204 (0.9369)  time: 0.5339  data: 0.0009  max mem: 53905
Epoch: [206] Total time: 0:24:46 (0.5942 s / it)
Averaged stats: lr: 0.000994  min_lr: 0.000994  loss: 3.2309 (3.0238)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9204 (0.9369)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.5432 (0.5432)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.0070  data: 2.6907  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8486 (0.8368)  acc1: 86.8000 (87.2727)  acc5: 97.6000 (97.8546)  time: 0.5594  data: 0.2719  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8968 (0.8766)  acc1: 84.0000 (86.0762)  acc5: 98.0000 (97.8667)  time: 0.3000  data: 0.0153  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9830 (0.9585)  acc1: 82.4000 (84.0774)  acc5: 96.0000 (96.9677)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1480 (1.0098)  acc1: 78.0000 (82.6342)  acc5: 94.8000 (96.3805)  time: 0.2851  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1480 (1.0273)  acc1: 77.6000 (82.0080)  acc5: 94.8000 (96.2960)  time: 0.2849  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3473 s / it)
* Acc@1 82.074 Acc@5 96.294 loss 1.027
Accuracy of the model on the 50000 test images: 82.1%
Max accuracy: 82.11%
Epoch: [207]  [   0/2502]  eta: 1:54:48  lr: 0.000994  min_lr: 0.000994  loss: 2.8467 (2.8467)  weight_decay: 0.0500 (0.0500)  time: 2.7533  data: 1.9268  max mem: 53905
Epoch: [207]  [ 200/2502]  eta: 0:23:10  lr: 0.000993  min_lr: 0.000993  loss: 3.1794 (3.0490)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9079 (0.9404)  time: 0.6036  data: 0.0004  max mem: 53905
Epoch: [207]  [ 400/2502]  eta: 0:20:57  lr: 0.000991  min_lr: 0.000991  loss: 3.1100 (3.0239)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9219 (0.9549)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [207]  [ 600/2502]  eta: 0:18:53  lr: 0.000990  min_lr: 0.000990  loss: 3.1097 (3.0086)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8886 (0.9366)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [207]  [ 800/2502]  eta: 0:16:52  lr: 0.000988  min_lr: 0.000988  loss: 2.9712 (3.0160)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0323 (0.9523)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [207]  [1000/2502]  eta: 0:14:53  lr: 0.000987  min_lr: 0.000987  loss: 3.0778 (3.0094)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9300 (0.9536)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [207]  [1200/2502]  eta: 0:12:54  lr: 0.000985  min_lr: 0.000985  loss: 3.1727 (3.0138)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8473 (0.9564)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [207]  [1400/2502]  eta: 0:10:55  lr: 0.000983  min_lr: 0.000983  loss: 3.2385 (3.0198)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9413 (0.9576)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [207]  [1600/2502]  eta: 0:08:56  lr: 0.000982  min_lr: 0.000982  loss: 3.2496 (3.0253)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8431 (0.9516)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [207]  [1800/2502]  eta: 0:06:57  lr: 0.000980  min_lr: 0.000980  loss: 3.2096 (3.0300)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9074 (0.9532)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [207]  [2000/2502]  eta: 0:04:58  lr: 0.000979  min_lr: 0.000979  loss: 3.1965 (3.0266)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8866 (0.9528)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [207]  [2200/2502]  eta: 0:02:59  lr: 0.000977  min_lr: 0.000977  loss: 3.1467 (3.0252)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9165 (inf)  time: 0.5935  data: 0.0005  max mem: 53905
Epoch: [207]  [2400/2502]  eta: 0:01:00  lr: 0.000976  min_lr: 0.000976  loss: 3.2096 (3.0245)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9721 (inf)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [207]  [2501/2502]  eta: 0:00:00  lr: 0.000975  min_lr: 0.000975  loss: 3.3863 (3.0237)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9048 (inf)  time: 0.5327  data: 0.0007  max mem: 53905
Epoch: [207] Total time: 0:24:45 (0.5938 s / it)
Averaged stats: lr: 0.000975  min_lr: 0.000975  loss: 3.3863 (3.0245)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9048 (inf)
Test:  [ 0/50]  eta: 0:03:00  loss: 0.6273 (0.6273)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 3.6182  data: 3.3100  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8841 (0.8891)  acc1: 89.2000 (87.2000)  acc5: 97.6000 (97.8182)  time: 0.5853  data: 0.3013  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9339 (0.9308)  acc1: 84.4000 (85.9619)  acc5: 97.6000 (97.6762)  time: 0.2822  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0557 (1.0173)  acc1: 81.2000 (83.8065)  acc5: 96.0000 (96.7226)  time: 0.2825  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2159 (1.0724)  acc1: 79.2000 (82.1854)  acc5: 94.4000 (96.1463)  time: 0.2823  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2159 (1.0951)  acc1: 78.4000 (81.5920)  acc5: 94.8000 (96.0960)  time: 0.2821  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3510 s / it)
* Acc@1 81.918 Acc@5 96.248 loss 1.090
Accuracy of the model on the 50000 test images: 81.9%
Max accuracy: 82.11%
Epoch: [208]  [   0/2502]  eta: 1:46:47  lr: 0.000975  min_lr: 0.000975  loss: 2.0990 (2.0990)  weight_decay: 0.0500 (0.0500)  time: 2.5609  data: 1.9222  max mem: 53905
Epoch: [208]  [ 200/2502]  eta: 0:23:07  lr: 0.000973  min_lr: 0.000973  loss: 3.1046 (2.9615)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9360 (0.9426)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [208]  [ 400/2502]  eta: 0:20:55  lr: 0.000972  min_lr: 0.000972  loss: 2.8857 (2.9988)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0042 (0.9578)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [208]  [ 600/2502]  eta: 0:18:54  lr: 0.000970  min_lr: 0.000970  loss: 3.1817 (2.9985)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5950  data: 0.0005  max mem: 53905
Epoch: [208]  [ 800/2502]  eta: 0:16:54  lr: 0.000969  min_lr: 0.000969  loss: 2.9309 (2.9999)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9202 (nan)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [208]  [1000/2502]  eta: 0:14:53  lr: 0.000967  min_lr: 0.000967  loss: 3.2038 (2.9922)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9409 (nan)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [208]  [1200/2502]  eta: 0:12:54  lr: 0.000966  min_lr: 0.000966  loss: 3.3867 (2.9934)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9355 (nan)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [208]  [1400/2502]  eta: 0:10:55  lr: 0.000964  min_lr: 0.000964  loss: 3.1192 (2.9901)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9168 (nan)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [208]  [1600/2502]  eta: 0:08:56  lr: 0.000963  min_lr: 0.000963  loss: 2.9105 (2.9974)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8994 (nan)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [208]  [1800/2502]  eta: 0:06:57  lr: 0.000961  min_lr: 0.000961  loss: 2.9167 (3.0002)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9196 (nan)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [208]  [2000/2502]  eta: 0:04:58  lr: 0.000960  min_lr: 0.000960  loss: 2.9259 (2.9975)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9235 (nan)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [208]  [2200/2502]  eta: 0:02:59  lr: 0.000958  min_lr: 0.000958  loss: 3.1349 (3.0006)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9583 (nan)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [208]  [2400/2502]  eta: 0:01:00  lr: 0.000957  min_lr: 0.000957  loss: 3.1606 (3.0024)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9432 (nan)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [208]  [2501/2502]  eta: 0:00:00  lr: 0.000956  min_lr: 0.000956  loss: 3.0838 (3.0063)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8717 (nan)  time: 0.5335  data: 0.0007  max mem: 53905
Epoch: [208] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.000956  min_lr: 0.000956  loss: 3.0838 (3.0112)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8717 (nan)
Test:  [ 0/50]  eta: 0:03:06  loss: 0.5056 (0.5056)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.7308  data: 3.4142  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7925 (0.7956)  acc1: 88.0000 (87.4546)  acc5: 98.0000 (97.6727)  time: 0.5983  data: 0.3109  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8065 (0.8231)  acc1: 84.4000 (86.4571)  acc5: 98.0000 (97.7333)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9563 (0.9140)  acc1: 82.8000 (84.2065)  acc5: 96.0000 (96.7484)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0883 (0.9650)  acc1: 80.0000 (82.8781)  acc5: 94.4000 (96.1951)  time: 0.2851  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0883 (0.9826)  acc1: 77.2000 (82.1680)  acc5: 95.2000 (96.2240)  time: 0.2847  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3565 s / it)
* Acc@1 82.172 Acc@5 96.262 loss 0.982
Accuracy of the model on the 50000 test images: 82.2%
Max accuracy: 82.17%
Epoch: [209]  [   0/2502]  eta: 1:51:20  lr: 0.000956  min_lr: 0.000956  loss: 2.1034 (2.1034)  weight_decay: 0.0500 (0.0500)  time: 2.6700  data: 2.0623  max mem: 53905
Epoch: [209]  [ 200/2502]  eta: 0:23:11  lr: 0.000954  min_lr: 0.000954  loss: 2.8589 (3.0022)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9398 (0.9480)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [209]  [ 400/2502]  eta: 0:20:58  lr: 0.000953  min_lr: 0.000953  loss: 3.1865 (3.0182)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9349 (0.9467)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [209]  [ 600/2502]  eta: 0:18:55  lr: 0.000951  min_lr: 0.000951  loss: 3.1235 (3.0077)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8898 (0.9450)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [209]  [ 800/2502]  eta: 0:16:53  lr: 0.000950  min_lr: 0.000950  loss: 2.8470 (2.9974)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9709 (0.9482)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [209]  [1000/2502]  eta: 0:14:54  lr: 0.000948  min_lr: 0.000948  loss: 3.0963 (3.0089)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9446 (0.9471)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [209]  [1200/2502]  eta: 0:12:54  lr: 0.000947  min_lr: 0.000947  loss: 3.1718 (3.0091)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9028 (0.9509)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [209]  [1400/2502]  eta: 0:10:55  lr: 0.000945  min_lr: 0.000945  loss: 3.1675 (3.0195)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0078 (0.9516)  time: 0.5955  data: 0.0005  max mem: 53905
Epoch: [209]  [1600/2502]  eta: 0:08:56  lr: 0.000944  min_lr: 0.000944  loss: 3.2058 (3.0172)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9275 (0.9523)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [209]  [1800/2502]  eta: 0:06:57  lr: 0.000942  min_lr: 0.000942  loss: 3.2367 (3.0221)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0314 (0.9522)  time: 0.5953  data: 0.0005  max mem: 53905
Epoch: [209]  [2000/2502]  eta: 0:04:58  lr: 0.000941  min_lr: 0.000941  loss: 3.3067 (3.0294)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9329 (0.9502)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [209]  [2200/2502]  eta: 0:02:59  lr: 0.000939  min_lr: 0.000939  loss: 3.2050 (3.0300)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8888 (0.9487)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [209]  [2400/2502]  eta: 0:01:00  lr: 0.000937  min_lr: 0.000937  loss: 3.0966 (3.0292)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9763 (0.9586)  time: 0.5961  data: 0.0004  max mem: 53905
Epoch: [209]  [2501/2502]  eta: 0:00:00  lr: 0.000937  min_lr: 0.000937  loss: 3.1392 (3.0288)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9592 (0.9580)  time: 0.5320  data: 0.0007  max mem: 53905
Epoch: [209] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.000937  min_lr: 0.000937  loss: 3.1392 (3.0100)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9592 (0.9580)
Test:  [ 0/50]  eta: 0:03:01  loss: 0.5023 (0.5023)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 3.6271  data: 3.3041  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8076 (0.8020)  acc1: 88.0000 (87.2364)  acc5: 98.4000 (97.7455)  time: 0.5867  data: 0.3008  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8274 (0.8306)  acc1: 86.0000 (86.1524)  acc5: 97.6000 (97.6191)  time: 0.2828  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9686 (0.9227)  acc1: 82.0000 (84.1548)  acc5: 96.0000 (96.6452)  time: 0.2831  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1067 (0.9759)  acc1: 78.8000 (82.7512)  acc5: 94.4000 (96.1366)  time: 0.2829  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0862 (0.9956)  acc1: 78.4000 (82.0480)  acc5: 95.2000 (96.0800)  time: 0.2827  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3519 s / it)
* Acc@1 82.294 Acc@5 96.260 loss 1.000
Accuracy of the model on the 50000 test images: 82.3%
Max accuracy: 82.29%
Epoch: [210]  [   0/2502]  eta: 1:35:55  lr: 0.000937  min_lr: 0.000937  loss: 3.3892 (3.3892)  weight_decay: 0.0500 (0.0500)  time: 2.3003  data: 1.6905  max mem: 53905
Epoch: [210]  [ 200/2502]  eta: 0:23:06  lr: 0.000935  min_lr: 0.000935  loss: 3.1465 (3.0082)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9961 (0.9587)  time: 0.5998  data: 0.0004  max mem: 53905
Epoch: [210]  [ 400/2502]  eta: 0:20:54  lr: 0.000934  min_lr: 0.000934  loss: 2.9381 (2.9928)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9868 (0.9671)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [210]  [ 600/2502]  eta: 0:18:53  lr: 0.000932  min_lr: 0.000932  loss: 2.8307 (3.0035)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9513 (0.9699)  time: 0.5988  data: 0.0004  max mem: 53905
Epoch: [210]  [ 800/2502]  eta: 0:16:53  lr: 0.000931  min_lr: 0.000931  loss: 3.1404 (2.9993)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0520 (0.9730)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [210]  [1000/2502]  eta: 0:14:53  lr: 0.000929  min_lr: 0.000929  loss: 2.9282 (3.0039)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9498 (0.9788)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [210]  [1200/2502]  eta: 0:12:54  lr: 0.000928  min_lr: 0.000928  loss: 3.3730 (3.0080)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9075 (0.9706)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [210]  [1400/2502]  eta: 0:10:55  lr: 0.000926  min_lr: 0.000926  loss: 3.0152 (3.0077)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9287 (0.9697)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [210]  [1600/2502]  eta: 0:08:55  lr: 0.000925  min_lr: 0.000925  loss: 3.2597 (3.0062)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9660 (0.9723)  time: 0.5944  data: 0.0005  max mem: 53905
Epoch: [210]  [1800/2502]  eta: 0:06:57  lr: 0.000923  min_lr: 0.000923  loss: 2.9524 (3.0050)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0062 (0.9733)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [210]  [2000/2502]  eta: 0:04:58  lr: 0.000922  min_lr: 0.000922  loss: 3.3472 (3.0113)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9103 (0.9695)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [210]  [2200/2502]  eta: 0:02:59  lr: 0.000920  min_lr: 0.000920  loss: 2.9447 (3.0088)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9929 (0.9723)  time: 0.5916  data: 0.0003  max mem: 53905
Epoch: [210]  [2400/2502]  eta: 0:01:00  lr: 0.000919  min_lr: 0.000919  loss: 3.1429 (3.0094)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0536 (0.9723)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [210]  [2501/2502]  eta: 0:00:00  lr: 0.000918  min_lr: 0.000918  loss: 2.8298 (3.0085)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9881 (0.9739)  time: 0.5331  data: 0.0007  max mem: 53905
Epoch: [210] Total time: 0:24:45 (0.5935 s / it)
Averaged stats: lr: 0.000918  min_lr: 0.000918  loss: 2.8298 (3.0090)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9881 (0.9739)
Test:  [ 0/50]  eta: 0:03:20  loss: 0.4966 (0.4966)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 4.0117  data: 3.6798  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.7722 (0.7832)  acc1: 89.6000 (87.2364)  acc5: 98.4000 (97.9636)  time: 0.6236  data: 0.3350  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7859 (0.8134)  acc1: 85.6000 (86.3238)  acc5: 98.0000 (97.8667)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9276 (0.8931)  acc1: 83.2000 (84.5548)  acc5: 96.4000 (96.9548)  time: 0.2855  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1033 (0.9509)  acc1: 80.0000 (82.9561)  acc5: 94.8000 (96.4585)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1033 (0.9670)  acc1: 77.6000 (82.2400)  acc5: 95.6000 (96.3600)  time: 0.2850  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3621 s / it)
* Acc@1 82.418 Acc@5 96.378 loss 0.966
Accuracy of the model on the 50000 test images: 82.4%
Max accuracy: 82.42%
Epoch: [211]  [   0/2502]  eta: 1:56:10  lr: 0.000918  min_lr: 0.000918  loss: 3.3342 (3.3342)  weight_decay: 0.0500 (0.0500)  time: 2.7862  data: 2.1759  max mem: 53905
Epoch: [211]  [ 200/2502]  eta: 0:23:11  lr: 0.000916  min_lr: 0.000916  loss: 2.9634 (2.9651)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9708 (0.9891)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [211]  [ 400/2502]  eta: 0:21:00  lr: 0.000915  min_lr: 0.000915  loss: 3.1236 (2.9853)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9120 (0.9781)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [211]  [ 600/2502]  eta: 0:18:55  lr: 0.000913  min_lr: 0.000913  loss: 3.3064 (3.0040)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9434 (0.9778)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [211]  [ 800/2502]  eta: 0:16:54  lr: 0.000912  min_lr: 0.000912  loss: 3.1861 (2.9887)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8729 (0.9567)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [211]  [1000/2502]  eta: 0:14:54  lr: 0.000910  min_lr: 0.000910  loss: 2.8360 (2.9959)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0089 (0.9581)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [211]  [1200/2502]  eta: 0:12:55  lr: 0.000909  min_lr: 0.000909  loss: 3.0257 (2.9958)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9704 (0.9635)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [211]  [1400/2502]  eta: 0:10:55  lr: 0.000907  min_lr: 0.000907  loss: 3.0051 (2.9895)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9546 (0.9661)  time: 0.5967  data: 0.0005  max mem: 53905
Epoch: [211]  [1600/2502]  eta: 0:08:56  lr: 0.000906  min_lr: 0.000906  loss: 3.0695 (2.9858)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9733 (0.9681)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [211]  [1800/2502]  eta: 0:06:57  lr: 0.000904  min_lr: 0.000904  loss: 3.0760 (2.9866)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0565 (0.9730)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [211]  [2000/2502]  eta: 0:04:58  lr: 0.000903  min_lr: 0.000903  loss: 2.8994 (2.9873)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9827 (0.9770)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [211]  [2200/2502]  eta: 0:02:59  lr: 0.000901  min_lr: 0.000901  loss: 2.9261 (2.9859)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0850 (0.9819)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [211]  [2400/2502]  eta: 0:01:00  lr: 0.000900  min_lr: 0.000900  loss: 3.2882 (2.9888)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0252 (0.9809)  time: 0.5989  data: 0.0004  max mem: 53905
Epoch: [211]  [2501/2502]  eta: 0:00:00  lr: 0.000899  min_lr: 0.000899  loss: 3.1328 (2.9888)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9363 (0.9799)  time: 0.5335  data: 0.0009  max mem: 53905
Epoch: [211] Total time: 0:24:47 (0.5943 s / it)
Averaged stats: lr: 0.000899  min_lr: 0.000899  loss: 3.1328 (2.9895)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9363 (0.9799)
Test:  [ 0/50]  eta: 0:02:58  loss: 0.5253 (0.5253)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 3.5726  data: 3.2463  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8205 (0.8092)  acc1: 89.2000 (87.3455)  acc5: 98.0000 (97.8182)  time: 0.6090  data: 0.3215  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8205 (0.8345)  acc1: 83.6000 (86.0762)  acc5: 98.0000 (97.7524)  time: 0.2987  data: 0.0147  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9833 (0.9139)  acc1: 81.6000 (84.4000)  acc5: 96.4000 (96.8516)  time: 0.2849  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1014 (0.9675)  acc1: 79.2000 (82.7317)  acc5: 94.4000 (96.3707)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1014 (0.9909)  acc1: 78.8000 (82.0400)  acc5: 94.8000 (96.2640)  time: 0.2846  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3588 s / it)
* Acc@1 82.306 Acc@5 96.336 loss 0.985
Accuracy of the model on the 50000 test images: 82.3%
Max accuracy: 82.42%
Epoch: [212]  [   0/2502]  eta: 1:55:26  lr: 0.000899  min_lr: 0.000899  loss: 3.3139 (3.3139)  weight_decay: 0.0500 (0.0500)  time: 2.7683  data: 2.1734  max mem: 53905
Epoch: [212]  [ 200/2502]  eta: 0:23:14  lr: 0.000897  min_lr: 0.000897  loss: 3.1207 (2.9691)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9821 (1.0249)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [212]  [ 400/2502]  eta: 0:20:59  lr: 0.000896  min_lr: 0.000896  loss: 3.0862 (2.9946)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9565 (0.9945)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [212]  [ 600/2502]  eta: 0:18:57  lr: 0.000894  min_lr: 0.000894  loss: 3.2271 (2.9633)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9222 (0.9881)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [212]  [ 800/2502]  eta: 0:16:55  lr: 0.000893  min_lr: 0.000893  loss: 3.0671 (2.9513)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0474 (0.9953)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [212]  [1000/2502]  eta: 0:14:55  lr: 0.000892  min_lr: 0.000892  loss: 3.2712 (2.9640)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9684 (0.9962)  time: 0.5970  data: 0.0005  max mem: 53905
Epoch: [212]  [1200/2502]  eta: 0:12:55  lr: 0.000890  min_lr: 0.000890  loss: 2.9410 (2.9604)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9977 (0.9975)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [212]  [1400/2502]  eta: 0:10:55  lr: 0.000889  min_lr: 0.000889  loss: 2.9546 (2.9679)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9126 (0.9972)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [212]  [1600/2502]  eta: 0:08:56  lr: 0.000887  min_lr: 0.000887  loss: 3.2144 (2.9665)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9845 (0.9972)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [212]  [1800/2502]  eta: 0:06:57  lr: 0.000886  min_lr: 0.000886  loss: 2.9979 (2.9729)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9368 (0.9910)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [212]  [2000/2502]  eta: 0:04:58  lr: 0.000884  min_lr: 0.000884  loss: 3.1039 (2.9758)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9876 (nan)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [212]  [2200/2502]  eta: 0:02:59  lr: 0.000883  min_lr: 0.000883  loss: 2.9238 (2.9763)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0437 (nan)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [212]  [2400/2502]  eta: 0:01:00  lr: 0.000881  min_lr: 0.000881  loss: 3.0105 (2.9798)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9180 (nan)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [212]  [2501/2502]  eta: 0:00:00  lr: 0.000880  min_lr: 0.000880  loss: 3.2691 (2.9823)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9898 (nan)  time: 0.5334  data: 0.0007  max mem: 53905
Epoch: [212] Total time: 0:24:47 (0.5946 s / it)
Averaged stats: lr: 0.000880  min_lr: 0.000880  loss: 3.2691 (2.9902)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9898 (nan)
Test:  [ 0/50]  eta: 0:02:45  loss: 0.6367 (0.6367)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 3.3094  data: 2.9857  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.9140 (0.9041)  acc1: 87.6000 (87.3091)  acc5: 98.0000 (97.8182)  time: 0.5646  data: 0.2767  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9217 (0.9304)  acc1: 85.6000 (86.5714)  acc5: 97.6000 (97.7333)  time: 0.2878  data: 0.0031  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0387 (1.0222)  acc1: 82.4000 (84.5548)  acc5: 96.8000 (96.9032)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2363 (1.0791)  acc1: 79.6000 (83.0927)  acc5: 94.8000 (96.4488)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2284 (1.1004)  acc1: 78.8000 (82.2240)  acc5: 95.2000 (96.2800)  time: 0.2850  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3487 s / it)
* Acc@1 82.242 Acc@5 96.258 loss 1.101
Accuracy of the model on the 50000 test images: 82.2%
Max accuracy: 82.42%
Epoch: [213]  [   0/2502]  eta: 1:50:36  lr: 0.000880  min_lr: 0.000880  loss: 3.4161 (3.4161)  weight_decay: 0.0500 (0.0500)  time: 2.6525  data: 1.9350  max mem: 53905
Epoch: [213]  [ 200/2502]  eta: 0:23:09  lr: 0.000879  min_lr: 0.000879  loss: 3.0291 (3.0295)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9630 (0.9562)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [213]  [ 400/2502]  eta: 0:20:59  lr: 0.000877  min_lr: 0.000877  loss: 2.7903 (3.0107)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0069 (0.9794)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [213]  [ 600/2502]  eta: 0:18:55  lr: 0.000876  min_lr: 0.000876  loss: 3.1755 (3.0088)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9108 (0.9607)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [213]  [ 800/2502]  eta: 0:16:54  lr: 0.000874  min_lr: 0.000874  loss: 2.8566 (2.9980)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9177 (0.9648)  time: 0.5996  data: 0.0004  max mem: 53905
Epoch: [213]  [1000/2502]  eta: 0:14:54  lr: 0.000873  min_lr: 0.000873  loss: 3.0298 (3.0044)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9546 (0.9685)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [213]  [1200/2502]  eta: 0:12:54  lr: 0.000871  min_lr: 0.000871  loss: 3.1418 (3.0023)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0506 (0.9754)  time: 0.5926  data: 0.0006  max mem: 53905
Epoch: [213]  [1400/2502]  eta: 0:10:55  lr: 0.000870  min_lr: 0.000870  loss: 3.1820 (2.9999)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9704 (0.9738)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [213]  [1600/2502]  eta: 0:08:56  lr: 0.000868  min_lr: 0.000868  loss: 3.2891 (3.0002)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9576 (0.9737)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [213]  [1800/2502]  eta: 0:06:57  lr: 0.000867  min_lr: 0.000867  loss: 2.5061 (2.9990)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9365 (0.9759)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [213]  [2000/2502]  eta: 0:04:58  lr: 0.000866  min_lr: 0.000866  loss: 3.1353 (2.9986)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9761 (0.9747)  time: 0.5961  data: 0.0004  max mem: 53905
Epoch: [213]  [2200/2502]  eta: 0:02:59  lr: 0.000864  min_lr: 0.000864  loss: 3.1068 (2.9988)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0485 (0.9753)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [213]  [2400/2502]  eta: 0:01:00  lr: 0.000863  min_lr: 0.000863  loss: 3.2202 (2.9993)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0227 (0.9835)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [213]  [2501/2502]  eta: 0:00:00  lr: 0.000862  min_lr: 0.000862  loss: 3.1042 (2.9989)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9300 (0.9826)  time: 0.5321  data: 0.0009  max mem: 53905
Epoch: [213] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.000862  min_lr: 0.000862  loss: 3.1042 (2.9829)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9300 (0.9826)
Test:  [ 0/50]  eta: 0:03:11  loss: 0.5995 (0.5995)  acc1: 93.6000 (93.6000)  acc5: 99.2000 (99.2000)  time: 3.8388  data: 3.5186  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8835 (0.8664)  acc1: 88.8000 (87.2727)  acc5: 98.8000 (98.1455)  time: 0.6058  data: 0.3203  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8835 (0.8863)  acc1: 86.0000 (86.1905)  acc5: 98.0000 (97.8857)  time: 0.2827  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0032 (0.9600)  acc1: 81.6000 (84.3871)  acc5: 96.4000 (96.9548)  time: 0.2829  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1578 (1.0101)  acc1: 80.0000 (82.9463)  acc5: 95.2000 (96.4390)  time: 0.2827  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1578 (1.0273)  acc1: 79.2000 (82.2880)  acc5: 95.2000 (96.3520)  time: 0.2826  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3563 s / it)
* Acc@1 82.502 Acc@5 96.458 loss 1.021
Accuracy of the model on the 50000 test images: 82.5%
Max accuracy: 82.50%
Epoch: [214]  [   0/2502]  eta: 1:48:08  lr: 0.000862  min_lr: 0.000862  loss: 2.4372 (2.4372)  weight_decay: 0.0500 (0.0500)  time: 2.5932  data: 1.9850  max mem: 53905
Epoch: [214]  [ 200/2502]  eta: 0:23:07  lr: 0.000860  min_lr: 0.000860  loss: 3.1308 (2.9681)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0009 (0.9613)  time: 0.5929  data: 0.0007  max mem: 53905
Epoch: [214]  [ 400/2502]  eta: 0:20:56  lr: 0.000859  min_lr: 0.000859  loss: 3.1676 (2.9801)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1178 (0.9951)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [214]  [ 600/2502]  eta: 0:18:54  lr: 0.000857  min_lr: 0.000857  loss: 2.9681 (2.9794)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0185 (1.0028)  time: 0.5977  data: 0.0006  max mem: 53905
Epoch: [214]  [ 800/2502]  eta: 0:16:52  lr: 0.000856  min_lr: 0.000856  loss: 3.0160 (2.9610)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0213 (1.0088)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [214]  [1000/2502]  eta: 0:14:53  lr: 0.000854  min_lr: 0.000854  loss: 2.8297 (2.9539)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9570 (1.0091)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [214]  [1200/2502]  eta: 0:12:53  lr: 0.000853  min_lr: 0.000853  loss: 3.0508 (2.9579)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0766 (1.0106)  time: 0.5929  data: 0.0007  max mem: 53905
Epoch: [214]  [1400/2502]  eta: 0:10:54  lr: 0.000852  min_lr: 0.000852  loss: 3.1063 (2.9618)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9932 (1.0083)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [214]  [1600/2502]  eta: 0:08:55  lr: 0.000850  min_lr: 0.000850  loss: 3.1129 (2.9638)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0234 (1.0127)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [214]  [1800/2502]  eta: 0:06:56  lr: 0.000849  min_lr: 0.000849  loss: 2.9107 (2.9650)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9396 (1.0100)  time: 0.5922  data: 0.0008  max mem: 53905
Epoch: [214]  [2000/2502]  eta: 0:04:57  lr: 0.000847  min_lr: 0.000847  loss: 3.0180 (2.9632)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9375 (1.0066)  time: 0.5916  data: 0.0007  max mem: 53905
Epoch: [214]  [2200/2502]  eta: 0:02:59  lr: 0.000846  min_lr: 0.000846  loss: 3.1587 (2.9644)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9806 (1.0035)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [214]  [2400/2502]  eta: 0:01:00  lr: 0.000844  min_lr: 0.000844  loss: 3.0265 (2.9680)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0269 (1.0088)  time: 0.5924  data: 0.0007  max mem: 53905
Epoch: [214]  [2501/2502]  eta: 0:00:00  lr: 0.000843  min_lr: 0.000843  loss: 2.9755 (2.9679)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9673 (1.0082)  time: 0.5337  data: 0.0008  max mem: 53905
Epoch: [214] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.000843  min_lr: 0.000843  loss: 2.9755 (2.9693)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9673 (1.0082)
Test:  [ 0/50]  eta: 0:03:12  loss: 0.5166 (0.5166)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.8535  data: 3.5435  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8374 (0.8060)  acc1: 88.8000 (87.0909)  acc5: 98.4000 (98.1091)  time: 0.6094  data: 0.3225  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8375 (0.8342)  acc1: 84.4000 (86.1524)  acc5: 98.4000 (97.9048)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9461 (0.9211)  acc1: 82.8000 (84.1161)  acc5: 96.8000 (96.8258)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0891 (0.9701)  acc1: 79.2000 (82.8878)  acc5: 94.4000 (96.3512)  time: 0.2851  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0700 (0.9912)  acc1: 79.2000 (82.3040)  acc5: 95.2000 (96.2320)  time: 0.2849  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3589 s / it)
* Acc@1 82.388 Acc@5 96.318 loss 0.989
Accuracy of the model on the 50000 test images: 82.4%
Max accuracy: 82.50%
Epoch: [215]  [   0/2502]  eta: 1:44:25  lr: 0.000843  min_lr: 0.000843  loss: 3.0397 (3.0397)  weight_decay: 0.0500 (0.0500)  time: 2.5042  data: 1.8444  max mem: 53905
Epoch: [215]  [ 200/2502]  eta: 0:23:07  lr: 0.000842  min_lr: 0.000842  loss: 3.1905 (2.9781)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9277 (0.9982)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [215]  [ 400/2502]  eta: 0:20:57  lr: 0.000841  min_lr: 0.000841  loss: 3.2261 (2.9807)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9414 (0.9910)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [215]  [ 600/2502]  eta: 0:18:54  lr: 0.000839  min_lr: 0.000839  loss: 2.9416 (2.9759)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9463 (0.9976)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [215]  [ 800/2502]  eta: 0:16:53  lr: 0.000838  min_lr: 0.000838  loss: 3.1342 (2.9640)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9383 (0.9913)  time: 0.5923  data: 0.0006  max mem: 53905
Epoch: [215]  [1000/2502]  eta: 0:14:53  lr: 0.000836  min_lr: 0.000836  loss: 3.0834 (2.9785)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9611 (0.9909)  time: 0.5927  data: 0.0006  max mem: 53905
Epoch: [215]  [1200/2502]  eta: 0:12:54  lr: 0.000835  min_lr: 0.000835  loss: 3.1413 (2.9819)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8888 (0.9852)  time: 0.5915  data: 0.0006  max mem: 53905
Epoch: [215]  [1400/2502]  eta: 0:10:55  lr: 0.000833  min_lr: 0.000833  loss: 2.9911 (2.9724)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8857 (0.9762)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [215]  [1600/2502]  eta: 0:08:55  lr: 0.000832  min_lr: 0.000832  loss: 2.8545 (2.9704)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0275 (0.9858)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [215]  [1800/2502]  eta: 0:06:56  lr: 0.000830  min_lr: 0.000830  loss: 2.9789 (2.9702)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9537 (0.9923)  time: 0.5950  data: 0.0004  max mem: 53905
Epoch: [215]  [2000/2502]  eta: 0:04:58  lr: 0.000829  min_lr: 0.000829  loss: 2.8738 (2.9644)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0064 (0.9944)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [215]  [2200/2502]  eta: 0:02:59  lr: 0.000827  min_lr: 0.000827  loss: 3.0594 (2.9610)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0022 (0.9978)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [215]  [2400/2502]  eta: 0:01:00  lr: 0.000826  min_lr: 0.000826  loss: 3.2345 (2.9656)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9664 (0.9991)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [215]  [2501/2502]  eta: 0:00:00  lr: 0.000825  min_lr: 0.000825  loss: 3.0401 (2.9642)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9222 (0.9980)  time: 0.5330  data: 0.0008  max mem: 53905
Epoch: [215] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.000825  min_lr: 0.000825  loss: 3.0401 (2.9705)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9222 (0.9980)
Test:  [ 0/50]  eta: 0:02:43  loss: 0.5546 (0.5546)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.2672  data: 2.9453  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8028 (0.7985)  acc1: 87.6000 (87.8909)  acc5: 98.4000 (98.0000)  time: 0.5561  data: 0.2683  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8330 (0.8327)  acc1: 86.0000 (86.6667)  acc5: 97.6000 (97.8476)  time: 0.2852  data: 0.0006  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9425 (0.9117)  acc1: 81.6000 (84.6968)  acc5: 96.4000 (96.9936)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0943 (0.9679)  acc1: 80.0000 (83.2976)  acc5: 94.8000 (96.4000)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0645 (0.9885)  acc1: 79.2000 (82.5360)  acc5: 94.8000 (96.3360)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3467 s / it)
* Acc@1 82.544 Acc@5 96.376 loss 0.987
Accuracy of the model on the 50000 test images: 82.5%
Max accuracy: 82.54%
Epoch: [216]  [   0/2502]  eta: 1:32:45  lr: 0.000825  min_lr: 0.000825  loss: 3.2633 (3.2633)  weight_decay: 0.0500 (0.0500)  time: 2.2245  data: 1.6249  max mem: 53905
Epoch: [216]  [ 200/2502]  eta: 0:23:06  lr: 0.000824  min_lr: 0.000824  loss: 3.0905 (2.9725)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0064 (0.9846)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [216]  [ 400/2502]  eta: 0:20:57  lr: 0.000822  min_lr: 0.000822  loss: 3.0475 (2.9483)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9860 (0.9915)  time: 0.6097  data: 0.0006  max mem: 53905
Epoch: [216]  [ 600/2502]  eta: 0:18:55  lr: 0.000821  min_lr: 0.000821  loss: 2.8960 (2.9679)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0384 (1.0068)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [216]  [ 800/2502]  eta: 0:16:54  lr: 0.000819  min_lr: 0.000819  loss: 3.1333 (2.9656)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0739 (1.0111)  time: 0.5922  data: 0.0007  max mem: 53905
Epoch: [216]  [1000/2502]  eta: 0:14:54  lr: 0.000818  min_lr: 0.000818  loss: 2.7028 (2.9675)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0188 (1.0178)  time: 0.5910  data: 0.0007  max mem: 53905
Epoch: [216]  [1200/2502]  eta: 0:12:54  lr: 0.000817  min_lr: 0.000817  loss: 3.0148 (2.9594)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9093 (1.0120)  time: 0.5911  data: 0.0008  max mem: 53905
Epoch: [216]  [1400/2502]  eta: 0:10:55  lr: 0.000815  min_lr: 0.000815  loss: 3.1478 (2.9671)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1089 (1.0187)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [216]  [1600/2502]  eta: 0:08:56  lr: 0.000814  min_lr: 0.000814  loss: 2.8361 (2.9660)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0156 (1.0242)  time: 0.5995  data: 0.0008  max mem: 53905
Epoch: [216]  [1800/2502]  eta: 0:06:57  lr: 0.000812  min_lr: 0.000812  loss: 3.1541 (2.9626)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9619 (1.0216)  time: 0.5923  data: 0.0007  max mem: 53905
Epoch: [216]  [2000/2502]  eta: 0:04:58  lr: 0.000811  min_lr: 0.000811  loss: 2.9338 (2.9591)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0658 (1.0257)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [216]  [2200/2502]  eta: 0:02:59  lr: 0.000809  min_lr: 0.000809  loss: 3.1813 (2.9615)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0160 (1.0271)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [216]  [2400/2502]  eta: 0:01:00  lr: 0.000808  min_lr: 0.000808  loss: 3.0664 (2.9618)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0313 (nan)  time: 0.5977  data: 0.0006  max mem: 53905
Epoch: [216]  [2501/2502]  eta: 0:00:00  lr: 0.000807  min_lr: 0.000807  loss: 3.1514 (2.9654)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9878 (nan)  time: 0.5334  data: 0.0007  max mem: 53905
Epoch: [216] Total time: 0:24:45 (0.5936 s / it)
Averaged stats: lr: 0.000807  min_lr: 0.000807  loss: 3.1514 (2.9591)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9878 (nan)
Test:  [ 0/50]  eta: 0:02:57  loss: 0.6009 (0.6009)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.5442  data: 3.2399  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8901 (0.8691)  acc1: 87.6000 (88.0364)  acc5: 97.6000 (97.7818)  time: 0.5811  data: 0.2950  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8901 (0.8892)  acc1: 86.0000 (86.8191)  acc5: 97.6000 (97.7905)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0089 (0.9732)  acc1: 82.8000 (84.6323)  acc5: 96.4000 (96.8000)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1271 (1.0197)  acc1: 79.2000 (83.3366)  acc5: 94.8000 (96.2927)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1271 (1.0385)  acc1: 79.2000 (82.4800)  acc5: 95.2000 (96.2320)  time: 0.2847  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3530 s / it)
* Acc@1 82.734 Acc@5 96.354 loss 1.034
Accuracy of the model on the 50000 test images: 82.7%
Max accuracy: 82.73%
Epoch: [217]  [   0/2502]  eta: 1:33:40  lr: 0.000807  min_lr: 0.000807  loss: 2.8234 (2.8234)  weight_decay: 0.0500 (0.0500)  time: 2.2465  data: 1.6417  max mem: 53905
Epoch: [217]  [ 200/2502]  eta: 0:23:04  lr: 0.000806  min_lr: 0.000806  loss: 3.1020 (2.9400)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9604 (1.0406)  time: 0.5933  data: 0.0006  max mem: 53905
Epoch: [217]  [ 400/2502]  eta: 0:20:57  lr: 0.000804  min_lr: 0.000804  loss: 2.9831 (2.9506)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0155 (1.0170)  time: 0.5928  data: 0.0007  max mem: 53905
Epoch: [217]  [ 600/2502]  eta: 0:18:54  lr: 0.000803  min_lr: 0.000803  loss: 3.0339 (2.9630)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9848 (1.0174)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [217]  [ 800/2502]  eta: 0:16:53  lr: 0.000801  min_lr: 0.000801  loss: 3.1621 (2.9577)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9509 (1.0242)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [217]  [1000/2502]  eta: 0:14:53  lr: 0.000800  min_lr: 0.000800  loss: 3.0442 (2.9608)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9613 (1.0157)  time: 0.5905  data: 0.0005  max mem: 53905
Epoch: [217]  [1200/2502]  eta: 0:12:53  lr: 0.000799  min_lr: 0.000799  loss: 2.9837 (2.9686)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0434 (1.0245)  time: 0.5910  data: 0.0008  max mem: 53905
Epoch: [217]  [1400/2502]  eta: 0:10:55  lr: 0.000797  min_lr: 0.000797  loss: 3.1338 (2.9563)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9451 (1.0255)  time: 0.6006  data: 0.0008  max mem: 53905
Epoch: [217]  [1600/2502]  eta: 0:08:56  lr: 0.000796  min_lr: 0.000796  loss: 3.0755 (2.9519)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9980 (1.0243)  time: 0.5935  data: 0.0006  max mem: 53905
Epoch: [217]  [1800/2502]  eta: 0:06:57  lr: 0.000794  min_lr: 0.000794  loss: 2.9340 (2.9493)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0136 (1.0254)  time: 0.5953  data: 0.0008  max mem: 53905
Epoch: [217]  [2000/2502]  eta: 0:04:58  lr: 0.000793  min_lr: 0.000793  loss: 3.0288 (2.9548)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0073 (1.0255)  time: 0.5924  data: 0.0007  max mem: 53905
Epoch: [217]  [2200/2502]  eta: 0:02:59  lr: 0.000791  min_lr: 0.000791  loss: 3.1718 (2.9568)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9982 (1.0250)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [217]  [2400/2502]  eta: 0:01:00  lr: 0.000790  min_lr: 0.000790  loss: 3.0106 (2.9591)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0729 (1.0298)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [217]  [2501/2502]  eta: 0:00:00  lr: 0.000789  min_lr: 0.000789  loss: 3.0396 (2.9583)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9469 (1.0278)  time: 0.5335  data: 0.0007  max mem: 53905
Epoch: [217] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.000789  min_lr: 0.000789  loss: 3.0396 (2.9544)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9469 (1.0278)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.5401 (0.5401)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 3.0136  data: 2.7094  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8201 (0.8278)  acc1: 87.2000 (87.7091)  acc5: 98.4000 (97.8182)  time: 0.5764  data: 0.2907  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8375 (0.8465)  acc1: 86.4000 (86.6667)  acc5: 97.6000 (97.7524)  time: 0.3089  data: 0.0247  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9528 (0.9279)  acc1: 82.0000 (84.6581)  acc5: 96.4000 (96.9032)  time: 0.2906  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1223 (0.9782)  acc1: 78.8000 (83.1415)  acc5: 94.8000 (96.4683)  time: 0.2904  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0868 (0.9922)  acc1: 77.6000 (82.5280)  acc5: 95.2000 (96.3600)  time: 0.2850  data: 0.0003  max mem: 53905
Test: Total time: 0:00:17 (0.3535 s / it)
* Acc@1 82.536 Acc@5 96.414 loss 0.986
Accuracy of the model on the 50000 test images: 82.5%
Max accuracy: 82.73%
Epoch: [218]  [   0/2502]  eta: 1:43:30  lr: 0.000789  min_lr: 0.000789  loss: 3.0856 (3.0856)  weight_decay: 0.0500 (0.0500)  time: 2.4821  data: 1.8762  max mem: 53905
Epoch: [218]  [ 200/2502]  eta: 0:23:05  lr: 0.000788  min_lr: 0.000788  loss: 3.0232 (2.9612)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9669 (1.0428)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [218]  [ 400/2502]  eta: 0:20:56  lr: 0.000786  min_lr: 0.000786  loss: 3.1562 (2.9400)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0032 (1.0300)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [218]  [ 600/2502]  eta: 0:18:55  lr: 0.000785  min_lr: 0.000785  loss: 3.0759 (2.9324)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0617 (1.0450)  time: 0.5929  data: 0.0006  max mem: 53905
Epoch: [218]  [ 800/2502]  eta: 0:16:54  lr: 0.000784  min_lr: 0.000784  loss: 3.0787 (2.9304)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9956 (1.0466)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [218]  [1000/2502]  eta: 0:14:54  lr: 0.000782  min_lr: 0.000782  loss: 3.0707 (2.9331)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0131 (1.0349)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [218]  [1200/2502]  eta: 0:12:54  lr: 0.000781  min_lr: 0.000781  loss: 3.1335 (2.9335)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9724 (1.0345)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [218]  [1400/2502]  eta: 0:10:55  lr: 0.000779  min_lr: 0.000779  loss: 3.0898 (2.9401)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9415 (1.0322)  time: 0.5932  data: 0.0006  max mem: 53905
Epoch: [218]  [1600/2502]  eta: 0:08:56  lr: 0.000778  min_lr: 0.000778  loss: 2.8435 (2.9474)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9879 (1.0277)  time: 0.6073  data: 0.0004  max mem: 53905
Epoch: [218]  [1800/2502]  eta: 0:06:57  lr: 0.000776  min_lr: 0.000776  loss: 3.1557 (2.9486)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9502 (1.0249)  time: 0.5935  data: 0.0004  max mem: 53905
Epoch: [218]  [2000/2502]  eta: 0:04:58  lr: 0.000775  min_lr: 0.000775  loss: 3.0725 (2.9528)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0118 (1.0274)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [218]  [2200/2502]  eta: 0:02:59  lr: 0.000774  min_lr: 0.000774  loss: 3.2223 (2.9559)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0139 (1.0314)  time: 0.5988  data: 0.0004  max mem: 53905
Epoch: [218]  [2400/2502]  eta: 0:01:00  lr: 0.000772  min_lr: 0.000772  loss: 3.0813 (2.9560)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0432 (1.0317)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [218]  [2501/2502]  eta: 0:00:00  lr: 0.000771  min_lr: 0.000771  loss: 3.0494 (2.9558)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1435 (1.0344)  time: 0.5345  data: 0.0012  max mem: 53905
Epoch: [218] Total time: 0:24:46 (0.5941 s / it)
Averaged stats: lr: 0.000771  min_lr: 0.000771  loss: 3.0494 (2.9498)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1435 (1.0344)
Test:  [ 0/50]  eta: 0:02:59  loss: 0.5751 (0.5751)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.5819  data: 3.2653  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8564 (0.8577)  acc1: 86.4000 (87.2000)  acc5: 98.8000 (98.2546)  time: 0.6007  data: 0.3138  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.9099 (0.8922)  acc1: 85.6000 (86.4762)  acc5: 98.0000 (98.0000)  time: 0.2938  data: 0.0095  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0227 (0.9734)  acc1: 82.4000 (84.7742)  acc5: 96.4000 (97.1097)  time: 0.2891  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1709 (1.0233)  acc1: 80.4000 (83.5122)  acc5: 94.8000 (96.5854)  time: 0.2914  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1472 (1.0376)  acc1: 80.0000 (82.8240)  acc5: 95.2000 (96.5040)  time: 0.2878  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3589 s / it)
* Acc@1 82.758 Acc@5 96.488 loss 1.037
Accuracy of the model on the 50000 test images: 82.8%
Max accuracy: 82.76%
Epoch: [219]  [   0/2502]  eta: 1:49:44  lr: 0.000771  min_lr: 0.000771  loss: 2.0178 (2.0178)  weight_decay: 0.0500 (0.0500)  time: 2.6319  data: 2.0411  max mem: 53905
Epoch: [219]  [ 200/2502]  eta: 0:23:11  lr: 0.000770  min_lr: 0.000770  loss: 3.0833 (2.9917)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9746 (1.0345)  time: 0.6007  data: 0.0004  max mem: 53905
Epoch: [219]  [ 400/2502]  eta: 0:20:58  lr: 0.000769  min_lr: 0.000769  loss: 3.0452 (2.9767)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0058 (1.0284)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [219]  [ 600/2502]  eta: 0:18:55  lr: 0.000767  min_lr: 0.000767  loss: 2.8951 (2.9577)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9792 (1.0242)  time: 0.5980  data: 0.0004  max mem: 53905
Epoch: [219]  [ 800/2502]  eta: 0:16:53  lr: 0.000766  min_lr: 0.000766  loss: 3.0293 (2.9656)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0105 (1.0230)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [219]  [1000/2502]  eta: 0:14:53  lr: 0.000764  min_lr: 0.000764  loss: 2.9176 (2.9558)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0681 (1.0391)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [219]  [1200/2502]  eta: 0:12:53  lr: 0.000763  min_lr: 0.000763  loss: 3.0985 (2.9606)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0085 (1.0382)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [219]  [1400/2502]  eta: 0:10:54  lr: 0.000762  min_lr: 0.000762  loss: 3.1628 (2.9576)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9567 (1.0411)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [219]  [1600/2502]  eta: 0:08:55  lr: 0.000760  min_lr: 0.000760  loss: 3.0622 (2.9554)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0775 (1.0497)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [219]  [1800/2502]  eta: 0:06:56  lr: 0.000759  min_lr: 0.000759  loss: 3.2649 (2.9602)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0406 (1.0485)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [219]  [2000/2502]  eta: 0:04:58  lr: 0.000757  min_lr: 0.000757  loss: 2.7296 (2.9603)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0290 (1.0524)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [219]  [2200/2502]  eta: 0:02:59  lr: 0.000756  min_lr: 0.000756  loss: 3.0892 (2.9578)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0081 (1.0557)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [219]  [2400/2502]  eta: 0:01:00  lr: 0.000755  min_lr: 0.000755  loss: 3.0333 (2.9564)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9725 (1.0527)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [219]  [2501/2502]  eta: 0:00:00  lr: 0.000754  min_lr: 0.000754  loss: 3.2067 (2.9576)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9727 (1.0503)  time: 0.5323  data: 0.0006  max mem: 53905
Epoch: [219] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.000754  min_lr: 0.000754  loss: 3.2067 (2.9534)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9727 (1.0503)
Test:  [ 0/50]  eta: 0:02:42  loss: 0.5781 (0.5781)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.2416  data: 2.9143  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8360 (0.8148)  acc1: 87.2000 (87.3091)  acc5: 98.4000 (97.8909)  time: 0.5509  data: 0.2654  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8339 (0.8420)  acc1: 85.2000 (86.1524)  acc5: 97.6000 (97.8095)  time: 0.2820  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9302 (0.9304)  acc1: 81.6000 (84.2323)  acc5: 96.8000 (96.7742)  time: 0.2822  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1266 (0.9819)  acc1: 78.8000 (82.7415)  acc5: 94.0000 (96.1171)  time: 0.2821  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1167 (0.9970)  acc1: 77.6000 (82.2240)  acc5: 95.2000 (96.1200)  time: 0.2819  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3440 s / it)
* Acc@1 82.402 Acc@5 96.366 loss 0.993
Accuracy of the model on the 50000 test images: 82.4%
Max accuracy: 82.76%
Epoch: [220]  [   0/2502]  eta: 1:54:37  lr: 0.000754  min_lr: 0.000754  loss: 3.2963 (3.2963)  weight_decay: 0.0500 (0.0500)  time: 2.7488  data: 2.1401  max mem: 53905
Epoch: [220]  [ 200/2502]  eta: 0:23:06  lr: 0.000752  min_lr: 0.000752  loss: 3.1614 (2.9577)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9944 (0.9892)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [220]  [ 400/2502]  eta: 0:20:57  lr: 0.000751  min_lr: 0.000751  loss: 2.7960 (2.9503)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0427 (1.0028)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [220]  [ 600/2502]  eta: 0:18:54  lr: 0.000750  min_lr: 0.000750  loss: 3.1842 (2.9600)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0290 (1.0267)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [220]  [ 800/2502]  eta: 0:16:53  lr: 0.000748  min_lr: 0.000748  loss: 2.8194 (2.9395)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9723 (1.0292)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [220]  [1000/2502]  eta: 0:14:53  lr: 0.000747  min_lr: 0.000747  loss: 3.2237 (2.9362)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9950 (1.0286)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [220]  [1200/2502]  eta: 0:12:54  lr: 0.000745  min_lr: 0.000745  loss: 3.1067 (2.9351)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0352 (1.0345)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [220]  [1400/2502]  eta: 0:10:55  lr: 0.000744  min_lr: 0.000744  loss: 3.2808 (2.9368)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1141 (1.0365)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [220]  [1600/2502]  eta: 0:08:56  lr: 0.000743  min_lr: 0.000743  loss: 3.1552 (2.9274)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9942 (1.0389)  time: 0.5967  data: 0.0004  max mem: 53905
Epoch: [220]  [1800/2502]  eta: 0:06:56  lr: 0.000741  min_lr: 0.000741  loss: 3.2137 (2.9262)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0084 (1.0354)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [220]  [2000/2502]  eta: 0:04:58  lr: 0.000740  min_lr: 0.000740  loss: 3.2387 (2.9312)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0568 (1.0390)  time: 0.5977  data: 0.0004  max mem: 53905
Epoch: [220]  [2200/2502]  eta: 0:02:59  lr: 0.000738  min_lr: 0.000738  loss: 2.9716 (2.9332)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0928 (1.0421)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [220]  [2400/2502]  eta: 0:01:00  lr: 0.000737  min_lr: 0.000737  loss: 2.9627 (2.9312)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9946 (1.0411)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [220]  [2501/2502]  eta: 0:00:00  lr: 0.000736  min_lr: 0.000736  loss: 3.1343 (2.9314)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0838 (1.0440)  time: 0.5330  data: 0.0007  max mem: 53905
Epoch: [220] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.000736  min_lr: 0.000736  loss: 3.1343 (2.9356)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0838 (1.0440)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.5519 (0.5519)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.0567  data: 2.7351  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8599 (0.8279)  acc1: 87.2000 (87.2364)  acc5: 98.4000 (97.9273)  time: 0.5814  data: 0.2943  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8606 (0.8636)  acc1: 84.8000 (86.4571)  acc5: 97.2000 (97.5429)  time: 0.3148  data: 0.0253  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0140 (0.9394)  acc1: 82.8000 (84.8516)  acc5: 96.0000 (96.7613)  time: 0.2905  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1158 (0.9885)  acc1: 80.4000 (83.4927)  acc5: 94.4000 (96.3415)  time: 0.2847  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1158 (1.0094)  acc1: 79.2000 (82.8400)  acc5: 95.6000 (96.2880)  time: 0.2841  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3546 s / it)
* Acc@1 82.754 Acc@5 96.468 loss 1.009
Accuracy of the model on the 50000 test images: 82.8%
Max accuracy: 82.76%
Epoch: [221]  [   0/2502]  eta: 1:59:33  lr: 0.000736  min_lr: 0.000736  loss: 3.4908 (3.4908)  weight_decay: 0.0500 (0.0500)  time: 2.8672  data: 1.7177  max mem: 53905
Epoch: [221]  [ 200/2502]  eta: 0:23:15  lr: 0.000735  min_lr: 0.000735  loss: 3.0964 (2.9313)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0186 (1.0995)  time: 0.6007  data: 0.0006  max mem: 53905
Epoch: [221]  [ 400/2502]  eta: 0:21:00  lr: 0.000734  min_lr: 0.000734  loss: 3.1786 (2.9398)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9678 (1.0527)  time: 0.6013  data: 0.0005  max mem: 53905
Epoch: [221]  [ 600/2502]  eta: 0:18:56  lr: 0.000732  min_lr: 0.000732  loss: 2.9535 (2.9322)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0647 (1.0597)  time: 0.5984  data: 0.0007  max mem: 53905
Epoch: [221]  [ 800/2502]  eta: 0:16:55  lr: 0.000731  min_lr: 0.000731  loss: 2.9967 (2.9417)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0376 (1.0652)  time: 0.5935  data: 0.0005  max mem: 53905
Epoch: [221]  [1000/2502]  eta: 0:14:54  lr: 0.000729  min_lr: 0.000729  loss: 3.0710 (2.9406)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9669 (1.0551)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [221]  [1200/2502]  eta: 0:12:55  lr: 0.000728  min_lr: 0.000728  loss: 3.0129 (2.9412)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9953 (1.0525)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [221]  [1400/2502]  eta: 0:10:55  lr: 0.000727  min_lr: 0.000727  loss: 2.9945 (2.9395)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0451 (1.0469)  time: 0.5904  data: 0.0005  max mem: 53905
Epoch: [221]  [1600/2502]  eta: 0:08:56  lr: 0.000725  min_lr: 0.000725  loss: 3.0870 (2.9387)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0892 (1.0434)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [221]  [1800/2502]  eta: 0:06:57  lr: 0.000724  min_lr: 0.000724  loss: 3.1718 (2.9432)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0288 (1.0486)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [221]  [2000/2502]  eta: 0:04:58  lr: 0.000723  min_lr: 0.000723  loss: 2.9209 (2.9413)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0448 (1.0509)  time: 0.5933  data: 0.0006  max mem: 53905
Epoch: [221]  [2200/2502]  eta: 0:02:59  lr: 0.000721  min_lr: 0.000721  loss: 3.1092 (2.9450)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0632 (1.0511)  time: 0.5936  data: 0.0009  max mem: 53905
Epoch: [221]  [2400/2502]  eta: 0:01:00  lr: 0.000720  min_lr: 0.000720  loss: 2.8809 (2.9469)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1266 (1.0550)  time: 0.5968  data: 0.0007  max mem: 53905
Epoch: [221]  [2501/2502]  eta: 0:00:00  lr: 0.000719  min_lr: 0.000719  loss: 2.9380 (2.9447)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0368 (1.0552)  time: 0.5338  data: 0.0007  max mem: 53905
Epoch: [221] Total time: 0:24:45 (0.5939 s / it)
Averaged stats: lr: 0.000719  min_lr: 0.000719  loss: 2.9380 (2.9372)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0368 (1.0552)
Test:  [ 0/50]  eta: 0:02:35  loss: 0.5470 (0.5470)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.1081  data: 2.7938  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.8332 (0.8309)  acc1: 88.4000 (87.5273)  acc5: 98.8000 (97.9636)  time: 0.5415  data: 0.2544  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8384 (0.8684)  acc1: 85.2000 (86.4191)  acc5: 98.0000 (97.6762)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0043 (0.9462)  acc1: 82.0000 (84.5419)  acc5: 96.0000 (96.7484)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1020 (0.9915)  acc1: 80.8000 (83.2683)  acc5: 94.4000 (96.2634)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0850 (1.0055)  acc1: 79.6000 (82.5920)  acc5: 95.2000 (96.2560)  time: 0.2844  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3443 s / it)
* Acc@1 82.754 Acc@5 96.396 loss 0.999
Accuracy of the model on the 50000 test images: 82.8%
Max accuracy: 82.76%
Epoch: [222]  [   0/2502]  eta: 1:59:09  lr: 0.000719  min_lr: 0.000719  loss: 2.8392 (2.8392)  weight_decay: 0.0500 (0.0500)  time: 2.8576  data: 2.1145  max mem: 53905
Epoch: [222]  [ 200/2502]  eta: 0:23:08  lr: 0.000718  min_lr: 0.000718  loss: 3.0907 (2.8880)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0026 (1.0143)  time: 0.5917  data: 0.0007  max mem: 53905
Epoch: [222]  [ 400/2502]  eta: 0:20:57  lr: 0.000716  min_lr: 0.000716  loss: 2.9574 (2.9156)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9575 (1.0271)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [222]  [ 600/2502]  eta: 0:18:54  lr: 0.000715  min_lr: 0.000715  loss: 3.0655 (2.9256)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0539 (1.0483)  time: 0.5915  data: 0.0008  max mem: 53905
Epoch: [222]  [ 800/2502]  eta: 0:16:53  lr: 0.000714  min_lr: 0.000714  loss: 2.9107 (2.9224)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0550 (1.0479)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [222]  [1000/2502]  eta: 0:14:54  lr: 0.000712  min_lr: 0.000712  loss: 2.8636 (2.9221)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0623 (1.0478)  time: 0.5959  data: 0.0006  max mem: 53905
Epoch: [222]  [1200/2502]  eta: 0:12:54  lr: 0.000711  min_lr: 0.000711  loss: 3.0346 (2.9229)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0026 (1.0519)  time: 0.5971  data: 0.0006  max mem: 53905
Epoch: [222]  [1400/2502]  eta: 0:10:54  lr: 0.000709  min_lr: 0.000709  loss: 2.9391 (2.9297)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0252 (1.0522)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [222]  [1600/2502]  eta: 0:08:55  lr: 0.000708  min_lr: 0.000708  loss: 2.9943 (2.9255)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9919 (1.0479)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [222]  [1800/2502]  eta: 0:06:57  lr: 0.000707  min_lr: 0.000707  loss: 3.1900 (2.9313)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0837 (1.0550)  time: 0.5936  data: 0.0004  max mem: 53905
Epoch: [222]  [2000/2502]  eta: 0:04:58  lr: 0.000705  min_lr: 0.000705  loss: 3.1091 (2.9334)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0381 (1.0516)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [222]  [2200/2502]  eta: 0:02:59  lr: 0.000704  min_lr: 0.000704  loss: 3.2036 (2.9347)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0085 (1.0551)  time: 0.5932  data: 0.0006  max mem: 53905
Epoch: [222]  [2400/2502]  eta: 0:01:00  lr: 0.000703  min_lr: 0.000703  loss: 3.1906 (2.9370)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0650 (1.0572)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [222]  [2501/2502]  eta: 0:00:00  lr: 0.000702  min_lr: 0.000702  loss: 2.8784 (2.9337)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0504 (1.0578)  time: 0.5320  data: 0.0007  max mem: 53905
Epoch: [222] Total time: 0:24:45 (0.5936 s / it)
Averaged stats: lr: 0.000702  min_lr: 0.000702  loss: 2.8784 (2.9207)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0504 (1.0578)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.5119 (0.5119)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.0056  data: 2.6946  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7895 (0.7747)  acc1: 87.2000 (87.8182)  acc5: 98.0000 (98.0000)  time: 0.5565  data: 0.2727  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8116 (0.8106)  acc1: 86.0000 (86.8571)  acc5: 98.0000 (97.9619)  time: 0.2968  data: 0.0155  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9274 (0.8942)  acc1: 84.4000 (85.0581)  acc5: 96.4000 (96.9677)  time: 0.2821  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0949 (0.9537)  acc1: 78.8000 (83.4537)  acc5: 94.8000 (96.4390)  time: 0.2820  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1220 (0.9737)  acc1: 78.4000 (82.7760)  acc5: 95.2000 (96.3200)  time: 0.2818  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3448 s / it)
* Acc@1 82.930 Acc@5 96.440 loss 0.970
Accuracy of the model on the 50000 test images: 82.9%
Max accuracy: 82.93%
Epoch: [223]  [   0/2502]  eta: 1:30:03  lr: 0.000702  min_lr: 0.000702  loss: 2.6312 (2.6312)  weight_decay: 0.0500 (0.0500)  time: 2.1596  data: 1.5593  max mem: 53905
Epoch: [223]  [ 200/2502]  eta: 0:23:07  lr: 0.000701  min_lr: 0.000701  loss: 2.9510 (2.9036)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0040 (1.0347)  time: 0.6007  data: 0.0006  max mem: 53905
Epoch: [223]  [ 400/2502]  eta: 0:20:55  lr: 0.000699  min_lr: 0.000699  loss: 2.9618 (2.8818)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9657 (1.0392)  time: 0.5923  data: 0.0007  max mem: 53905
Epoch: [223]  [ 600/2502]  eta: 0:18:53  lr: 0.000698  min_lr: 0.000698  loss: 3.0555 (2.8928)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5923  data: 0.0006  max mem: 53905
Epoch: [223]  [ 800/2502]  eta: 0:16:53  lr: 0.000696  min_lr: 0.000696  loss: 3.0141 (2.9247)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0897 (nan)  time: 0.5936  data: 0.0006  max mem: 53905
Epoch: [223]  [1000/2502]  eta: 0:14:53  lr: 0.000695  min_lr: 0.000695  loss: 2.9750 (2.9204)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9633 (nan)  time: 0.5920  data: 0.0007  max mem: 53905
Epoch: [223]  [1200/2502]  eta: 0:12:54  lr: 0.000694  min_lr: 0.000694  loss: 2.8632 (2.9173)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1129 (nan)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [223]  [1400/2502]  eta: 0:10:55  lr: 0.000692  min_lr: 0.000692  loss: 2.9846 (2.9187)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9350 (nan)  time: 0.5935  data: 0.0007  max mem: 53905
Epoch: [223]  [1600/2502]  eta: 0:08:56  lr: 0.000691  min_lr: 0.000691  loss: 2.7856 (2.9189)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0339 (nan)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [223]  [1800/2502]  eta: 0:06:57  lr: 0.000690  min_lr: 0.000690  loss: 3.2183 (2.9222)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9696 (nan)  time: 0.6067  data: 0.0009  max mem: 53905
Epoch: [223]  [2000/2502]  eta: 0:04:58  lr: 0.000688  min_lr: 0.000688  loss: 2.9169 (2.9208)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9963 (nan)  time: 0.5925  data: 0.0007  max mem: 53905
Epoch: [223]  [2200/2502]  eta: 0:02:59  lr: 0.000687  min_lr: 0.000687  loss: 2.9695 (2.9243)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0456 (nan)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [223]  [2400/2502]  eta: 0:01:00  lr: 0.000686  min_lr: 0.000686  loss: 3.0314 (2.9236)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1070 (nan)  time: 0.5924  data: 0.0007  max mem: 53905
Epoch: [223]  [2501/2502]  eta: 0:00:00  lr: 0.000685  min_lr: 0.000685  loss: 3.0285 (2.9224)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0149 (nan)  time: 0.5331  data: 0.0007  max mem: 53905
Epoch: [223] Total time: 0:24:45 (0.5939 s / it)
Averaged stats: lr: 0.000685  min_lr: 0.000685  loss: 3.0285 (2.9235)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0149 (nan)
Test:  [ 0/50]  eta: 0:02:48  loss: 0.5228 (0.5228)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.3676  data: 3.0419  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7715 (0.7830)  acc1: 88.8000 (88.3636)  acc5: 98.4000 (98.2182)  time: 0.5649  data: 0.2770  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8367 (0.8256)  acc1: 85.2000 (86.8000)  acc5: 98.0000 (98.1143)  time: 0.2849  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9690 (0.9111)  acc1: 83.6000 (85.0839)  acc5: 96.8000 (97.0581)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1362 (0.9680)  acc1: 79.6000 (83.5805)  acc5: 94.4000 (96.5073)  time: 0.2847  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0886 (0.9855)  acc1: 78.4000 (82.8720)  acc5: 95.2000 (96.3840)  time: 0.2844  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3489 s / it)
* Acc@1 82.838 Acc@5 96.510 loss 0.981
Accuracy of the model on the 50000 test images: 82.8%
Max accuracy: 82.93%
Epoch: [224]  [   0/2502]  eta: 1:52:09  lr: 0.000685  min_lr: 0.000685  loss: 2.1791 (2.1791)  weight_decay: 0.0500 (0.0500)  time: 2.6898  data: 2.0749  max mem: 53905
Epoch: [224]  [ 200/2502]  eta: 0:23:07  lr: 0.000684  min_lr: 0.000684  loss: 2.9183 (2.8875)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0027 (1.0132)  time: 0.5932  data: 0.0006  max mem: 53905
Epoch: [224]  [ 400/2502]  eta: 0:20:59  lr: 0.000682  min_lr: 0.000682  loss: 2.9438 (2.8750)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1139 (1.0552)  time: 0.6100  data: 0.0005  max mem: 53905
Epoch: [224]  [ 600/2502]  eta: 0:18:54  lr: 0.000681  min_lr: 0.000681  loss: 2.9879 (2.8802)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9649 (1.0432)  time: 0.5908  data: 0.0006  max mem: 53905
Epoch: [224]  [ 800/2502]  eta: 0:16:53  lr: 0.000680  min_lr: 0.000680  loss: 3.0575 (2.8888)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0915 (1.0569)  time: 0.5911  data: 0.0007  max mem: 53905
Epoch: [224]  [1000/2502]  eta: 0:14:53  lr: 0.000678  min_lr: 0.000678  loss: 3.0426 (2.8976)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0521 (1.0488)  time: 0.5912  data: 0.0007  max mem: 53905
Epoch: [224]  [1200/2502]  eta: 0:12:53  lr: 0.000677  min_lr: 0.000677  loss: 2.9691 (2.9038)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1188 (1.0561)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [224]  [1400/2502]  eta: 0:10:54  lr: 0.000676  min_lr: 0.000676  loss: 2.9763 (2.8987)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1142 (1.0614)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [224]  [1600/2502]  eta: 0:08:55  lr: 0.000674  min_lr: 0.000674  loss: 2.8994 (2.9002)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0744 (1.0655)  time: 0.5923  data: 0.0006  max mem: 53905
Epoch: [224]  [1800/2502]  eta: 0:06:56  lr: 0.000673  min_lr: 0.000673  loss: 2.6846 (2.8992)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0406 (1.0642)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [224]  [2000/2502]  eta: 0:04:58  lr: 0.000671  min_lr: 0.000671  loss: 3.1325 (2.9026)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0637 (1.0670)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [224]  [2200/2502]  eta: 0:02:59  lr: 0.000670  min_lr: 0.000670  loss: 3.0887 (2.8989)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1290 (1.0678)  time: 0.5917  data: 0.0008  max mem: 53905
Epoch: [224]  [2400/2502]  eta: 0:01:00  lr: 0.000669  min_lr: 0.000669  loss: 3.1694 (2.9036)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1555 (1.0728)  time: 0.5914  data: 0.0007  max mem: 53905
Epoch: [224]  [2501/2502]  eta: 0:00:00  lr: 0.000668  min_lr: 0.000668  loss: 2.9496 (2.9042)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0315 (1.0715)  time: 0.5324  data: 0.0007  max mem: 53905
Epoch: [224] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.000668  min_lr: 0.000668  loss: 2.9496 (2.9072)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0315 (1.0715)
Test:  [ 0/50]  eta: 0:02:39  loss: 0.5112 (0.5112)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.1953  data: 2.8749  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7699 (0.7873)  acc1: 90.0000 (87.7091)  acc5: 98.4000 (98.0000)  time: 0.5834  data: 0.2973  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8309 (0.8283)  acc1: 85.6000 (86.8000)  acc5: 98.0000 (97.7905)  time: 0.3030  data: 0.0200  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9738 (0.9077)  acc1: 83.6000 (85.0839)  acc5: 96.4000 (97.0194)  time: 0.2839  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1120 (0.9669)  acc1: 79.6000 (83.5317)  acc5: 94.8000 (96.5366)  time: 0.2837  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1372 (0.9839)  acc1: 78.8000 (82.9440)  acc5: 95.2000 (96.4880)  time: 0.2835  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3518 s / it)
* Acc@1 83.118 Acc@5 96.546 loss 0.976
Accuracy of the model on the 50000 test images: 83.1%
Max accuracy: 83.12%
Epoch: [225]  [   0/2502]  eta: 1:45:06  lr: 0.000668  min_lr: 0.000668  loss: 3.1252 (3.1252)  weight_decay: 0.0500 (0.0500)  time: 2.5204  data: 1.9109  max mem: 53905
Epoch: [225]  [ 200/2502]  eta: 0:23:07  lr: 0.000667  min_lr: 0.000667  loss: 2.8223 (2.8936)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9825 (inf)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [225]  [ 400/2502]  eta: 0:20:55  lr: 0.000665  min_lr: 0.000665  loss: 3.1993 (2.8991)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0421 (inf)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [225]  [ 600/2502]  eta: 0:18:53  lr: 0.000664  min_lr: 0.000664  loss: 2.8578 (2.8984)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0374 (inf)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [225]  [ 800/2502]  eta: 0:16:53  lr: 0.000663  min_lr: 0.000663  loss: 2.8938 (2.8890)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1076 (inf)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [225]  [1000/2502]  eta: 0:14:52  lr: 0.000661  min_lr: 0.000661  loss: 3.1570 (2.8925)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0957 (inf)  time: 0.5900  data: 0.0004  max mem: 53905
Epoch: [225]  [1200/2502]  eta: 0:12:53  lr: 0.000660  min_lr: 0.000660  loss: 2.7846 (2.8991)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0119 (inf)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [225]  [1400/2502]  eta: 0:10:54  lr: 0.000659  min_lr: 0.000659  loss: 3.0891 (2.9025)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0141 (inf)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [225]  [1600/2502]  eta: 0:08:55  lr: 0.000657  min_lr: 0.000657  loss: 2.9959 (2.9009)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0444 (inf)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [225]  [1800/2502]  eta: 0:06:56  lr: 0.000656  min_lr: 0.000656  loss: 2.9930 (2.9043)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0501 (inf)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [225]  [2000/2502]  eta: 0:04:57  lr: 0.000655  min_lr: 0.000655  loss: 3.0289 (2.9055)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0249 (inf)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [225]  [2200/2502]  eta: 0:02:59  lr: 0.000653  min_lr: 0.000653  loss: 2.9009 (2.9059)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1063 (inf)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [225]  [2400/2502]  eta: 0:01:00  lr: 0.000652  min_lr: 0.000652  loss: 2.8090 (2.9037)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0375 (inf)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [225]  [2501/2502]  eta: 0:00:00  lr: 0.000652  min_lr: 0.000652  loss: 3.1837 (2.9031)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0484 (inf)  time: 0.5318  data: 0.0009  max mem: 53905
Epoch: [225] Total time: 0:24:42 (0.5927 s / it)
Averaged stats: lr: 0.000652  min_lr: 0.000652  loss: 3.1837 (2.9031)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0484 (inf)
Test:  [ 0/50]  eta: 0:03:13  loss: 0.5684 (0.5684)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.8714  data: 3.5586  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8084 (0.7982)  acc1: 88.4000 (87.7455)  acc5: 98.4000 (97.8909)  time: 0.6074  data: 0.3239  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8165 (0.8312)  acc1: 85.6000 (86.7429)  acc5: 98.0000 (97.8476)  time: 0.2811  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9561 (0.9082)  acc1: 82.8000 (84.9806)  acc5: 96.8000 (97.0194)  time: 0.2813  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1324 (0.9616)  acc1: 79.6000 (83.3951)  acc5: 94.8000 (96.5268)  time: 0.2810  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1338 (0.9787)  acc1: 78.8000 (82.6960)  acc5: 95.2000 (96.4640)  time: 0.2808  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3551 s / it)
* Acc@1 82.946 Acc@5 96.546 loss 0.975
Accuracy of the model on the 50000 test images: 82.9%
Max accuracy: 83.12%
Epoch: [226]  [   0/2502]  eta: 2:01:55  lr: 0.000651  min_lr: 0.000651  loss: 3.1608 (3.1608)  weight_decay: 0.0500 (0.0500)  time: 2.9239  data: 1.5311  max mem: 53905
Epoch: [226]  [ 200/2502]  eta: 0:23:09  lr: 0.000650  min_lr: 0.000650  loss: 3.0632 (2.8973)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0142 (1.0824)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [226]  [ 400/2502]  eta: 0:20:58  lr: 0.000649  min_lr: 0.000649  loss: 3.1743 (2.9210)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0179 (1.0849)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [226]  [ 600/2502]  eta: 0:18:54  lr: 0.000648  min_lr: 0.000648  loss: 2.8911 (2.9233)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0087 (1.0825)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [226]  [ 800/2502]  eta: 0:16:53  lr: 0.000646  min_lr: 0.000646  loss: 3.0277 (2.9199)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0897 (1.0863)  time: 0.5935  data: 0.0010  max mem: 53905
Epoch: [226]  [1000/2502]  eta: 0:14:53  lr: 0.000645  min_lr: 0.000645  loss: 2.9289 (2.9104)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0232 (1.0893)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [226]  [1200/2502]  eta: 0:12:53  lr: 0.000644  min_lr: 0.000644  loss: 2.8487 (2.9073)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0330 (1.0844)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [226]  [1400/2502]  eta: 0:10:54  lr: 0.000642  min_lr: 0.000642  loss: 3.0175 (2.9059)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0599 (1.0849)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [226]  [1600/2502]  eta: 0:08:55  lr: 0.000641  min_lr: 0.000641  loss: 3.0499 (2.9104)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0733 (1.0874)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [226]  [1800/2502]  eta: 0:06:56  lr: 0.000640  min_lr: 0.000640  loss: 3.0591 (2.9145)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0392 (1.0856)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [226]  [2000/2502]  eta: 0:04:58  lr: 0.000638  min_lr: 0.000638  loss: 3.0529 (2.9104)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0946 (1.0861)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [226]  [2200/2502]  eta: 0:02:59  lr: 0.000637  min_lr: 0.000637  loss: 2.9232 (2.9046)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1079 (1.0889)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [226]  [2400/2502]  eta: 0:01:00  lr: 0.000636  min_lr: 0.000636  loss: 2.5970 (2.9031)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0498 (1.0904)  time: 0.6005  data: 0.0005  max mem: 53905
Epoch: [226]  [2501/2502]  eta: 0:00:00  lr: 0.000635  min_lr: 0.000635  loss: 3.0460 (2.9024)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1586 (1.0933)  time: 0.5331  data: 0.0008  max mem: 53905
Epoch: [226] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.000635  min_lr: 0.000635  loss: 3.0460 (2.8976)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1586 (1.0933)
Test:  [ 0/50]  eta: 0:03:04  loss: 0.5241 (0.5241)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.6869  data: 3.3631  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8299 (0.8169)  acc1: 87.6000 (87.4909)  acc5: 98.0000 (97.8182)  time: 0.5994  data: 0.3122  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8299 (0.8437)  acc1: 85.6000 (86.5714)  acc5: 97.6000 (97.8286)  time: 0.2875  data: 0.0038  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9981 (0.9254)  acc1: 82.8000 (84.9548)  acc5: 96.4000 (96.8387)  time: 0.2844  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1097 (0.9824)  acc1: 80.0000 (83.3463)  acc5: 94.8000 (96.4390)  time: 0.2842  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1097 (0.9967)  acc1: 78.4000 (82.7200)  acc5: 95.6000 (96.3920)  time: 0.2839  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3562 s / it)
* Acc@1 82.884 Acc@5 96.502 loss 0.990
Accuracy of the model on the 50000 test images: 82.9%
Max accuracy: 83.12%
Epoch: [227]  [   0/2502]  eta: 1:52:05  lr: 0.000635  min_lr: 0.000635  loss: 1.6531 (1.6531)  weight_decay: 0.0500 (0.0500)  time: 2.6880  data: 1.9633  max mem: 53905
Epoch: [227]  [ 200/2502]  eta: 0:23:09  lr: 0.000634  min_lr: 0.000634  loss: 2.7510 (2.9140)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1063 (1.1113)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [227]  [ 400/2502]  eta: 0:20:55  lr: 0.000632  min_lr: 0.000632  loss: 3.1055 (2.9288)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0662 (1.0951)  time: 0.5902  data: 0.0005  max mem: 53905
Epoch: [227]  [ 600/2502]  eta: 0:18:52  lr: 0.000631  min_lr: 0.000631  loss: 2.9619 (2.9185)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0248 (1.0816)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [227]  [ 800/2502]  eta: 0:16:51  lr: 0.000630  min_lr: 0.000630  loss: 3.0169 (2.9122)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1203 (1.0976)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [227]  [1000/2502]  eta: 0:14:52  lr: 0.000628  min_lr: 0.000628  loss: 2.9543 (2.9192)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0168 (1.0947)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [227]  [1200/2502]  eta: 0:12:52  lr: 0.000627  min_lr: 0.000627  loss: 2.8549 (2.9163)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0341 (1.0966)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [227]  [1400/2502]  eta: 0:10:53  lr: 0.000626  min_lr: 0.000626  loss: 3.2149 (2.9168)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1437 (1.1080)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [227]  [1600/2502]  eta: 0:08:55  lr: 0.000625  min_lr: 0.000625  loss: 3.0410 (2.9093)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0985 (1.1070)  time: 0.5956  data: 0.0005  max mem: 53905
Epoch: [227]  [1800/2502]  eta: 0:06:56  lr: 0.000623  min_lr: 0.000623  loss: 2.8646 (2.9078)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1296 (1.1045)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [227]  [2000/2502]  eta: 0:04:57  lr: 0.000622  min_lr: 0.000622  loss: 2.9989 (2.9036)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0312 (1.1009)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [227]  [2200/2502]  eta: 0:02:59  lr: 0.000621  min_lr: 0.000621  loss: 2.7575 (2.9049)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0604 (1.1024)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [227]  [2400/2502]  eta: 0:01:00  lr: 0.000619  min_lr: 0.000619  loss: 2.7223 (2.9058)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0808 (1.1050)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [227]  [2501/2502]  eta: 0:00:00  lr: 0.000619  min_lr: 0.000619  loss: 2.9681 (2.9071)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0691 (1.1058)  time: 0.5326  data: 0.0007  max mem: 53905
Epoch: [227] Total time: 0:24:42 (0.5925 s / it)
Averaged stats: lr: 0.000619  min_lr: 0.000619  loss: 2.9681 (2.8972)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0691 (1.1058)
Test:  [ 0/50]  eta: 0:02:40  loss: 0.5455 (0.5455)  acc1: 93.6000 (93.6000)  acc5: 99.2000 (99.2000)  time: 3.2172  data: 2.8883  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8135 (0.7951)  acc1: 88.0000 (88.2909)  acc5: 98.4000 (98.1091)  time: 0.5554  data: 0.2675  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8340 (0.8365)  acc1: 86.4000 (86.8191)  acc5: 98.0000 (97.9619)  time: 0.2869  data: 0.0029  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9599 (0.9186)  acc1: 82.8000 (85.1097)  acc5: 96.8000 (97.0194)  time: 0.2846  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0914 (0.9746)  acc1: 78.4000 (83.5512)  acc5: 94.8000 (96.6537)  time: 0.2843  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0914 (0.9922)  acc1: 78.0000 (82.9680)  acc5: 95.6000 (96.5520)  time: 0.2840  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3463 s / it)
* Acc@1 82.950 Acc@5 96.638 loss 0.988
Accuracy of the model on the 50000 test images: 83.0%
Max accuracy: 83.12%
Epoch: [228]  [   0/2502]  eta: 1:55:16  lr: 0.000619  min_lr: 0.000619  loss: 2.8654 (2.8654)  weight_decay: 0.0500 (0.0500)  time: 2.7643  data: 1.8765  max mem: 53905
Epoch: [228]  [ 200/2502]  eta: 0:23:10  lr: 0.000617  min_lr: 0.000617  loss: 3.0021 (2.8215)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0475 (1.1295)  time: 0.5983  data: 0.0004  max mem: 53905
Epoch: [228]  [ 400/2502]  eta: 0:20:57  lr: 0.000616  min_lr: 0.000616  loss: 2.9064 (2.8389)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0951 (1.1238)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [228]  [ 600/2502]  eta: 0:18:53  lr: 0.000615  min_lr: 0.000615  loss: 3.0959 (2.8580)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0210 (1.1225)  time: 0.5909  data: 0.0006  max mem: 53905
Epoch: [228]  [ 800/2502]  eta: 0:16:52  lr: 0.000614  min_lr: 0.000614  loss: 2.6242 (2.8669)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1520 (1.1380)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [228]  [1000/2502]  eta: 0:14:52  lr: 0.000612  min_lr: 0.000612  loss: 2.9386 (2.8784)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2052 (1.1407)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [228]  [1200/2502]  eta: 0:12:53  lr: 0.000611  min_lr: 0.000611  loss: 2.8948 (2.8775)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9651 (1.1253)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [228]  [1400/2502]  eta: 0:10:54  lr: 0.000610  min_lr: 0.000610  loss: 3.1029 (2.8846)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9966 (1.1242)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [228]  [1600/2502]  eta: 0:08:55  lr: 0.000608  min_lr: 0.000608  loss: 2.9110 (2.8810)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0506 (1.1241)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [228]  [1800/2502]  eta: 0:06:56  lr: 0.000607  min_lr: 0.000607  loss: 2.8330 (2.8796)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1489 (1.1258)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [228]  [2000/2502]  eta: 0:04:57  lr: 0.000606  min_lr: 0.000606  loss: 2.8767 (2.8798)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0763 (1.1237)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [228]  [2200/2502]  eta: 0:02:59  lr: 0.000605  min_lr: 0.000605  loss: 2.9860 (2.8842)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1231 (1.1214)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [228]  [2400/2502]  eta: 0:01:00  lr: 0.000603  min_lr: 0.000603  loss: 3.1469 (2.8831)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0482 (1.1245)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [228]  [2501/2502]  eta: 0:00:00  lr: 0.000603  min_lr: 0.000603  loss: 2.8375 (2.8857)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1062 (1.1233)  time: 0.5329  data: 0.0009  max mem: 53905
Epoch: [228] Total time: 0:24:43 (0.5930 s / it)
Averaged stats: lr: 0.000603  min_lr: 0.000603  loss: 2.8375 (2.8840)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1062 (1.1233)
Test:  [ 0/50]  eta: 0:03:06  loss: 0.5413 (0.5413)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 3.7381  data: 3.4171  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8067 (0.8051)  acc1: 87.2000 (87.8909)  acc5: 98.4000 (97.9636)  time: 0.5975  data: 0.3111  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8202 (0.8338)  acc1: 86.8000 (86.8381)  acc5: 97.6000 (97.8286)  time: 0.2837  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9473 (0.9126)  acc1: 82.4000 (85.2000)  acc5: 96.8000 (96.9936)  time: 0.2840  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0890 (0.9602)  acc1: 79.2000 (83.5805)  acc5: 95.2000 (96.5463)  time: 0.2838  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0890 (0.9787)  acc1: 78.4000 (83.0160)  acc5: 95.6000 (96.4720)  time: 0.2836  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3562 s / it)
* Acc@1 83.128 Acc@5 96.576 loss 0.974
Accuracy of the model on the 50000 test images: 83.1%
Max accuracy: 83.13%
Epoch: [229]  [   0/2502]  eta: 1:41:44  lr: 0.000603  min_lr: 0.000603  loss: 2.3974 (2.3974)  weight_decay: 0.0500 (0.0500)  time: 2.4400  data: 1.8435  max mem: 53905
Epoch: [229]  [ 200/2502]  eta: 0:23:08  lr: 0.000601  min_lr: 0.000601  loss: 2.9760 (2.8153)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0236 (1.0908)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [229]  [ 400/2502]  eta: 0:20:55  lr: 0.000600  min_lr: 0.000600  loss: 2.9607 (2.8646)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0510 (1.0890)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [229]  [ 600/2502]  eta: 0:18:53  lr: 0.000599  min_lr: 0.000599  loss: 2.4422 (2.8492)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0588 (1.0949)  time: 0.5970  data: 0.0004  max mem: 53905
Epoch: [229]  [ 800/2502]  eta: 0:16:53  lr: 0.000597  min_lr: 0.000597  loss: 3.1594 (2.8502)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0599 (1.0972)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [229]  [1000/2502]  eta: 0:14:53  lr: 0.000596  min_lr: 0.000596  loss: 3.0775 (2.8548)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0843 (1.1063)  time: 0.5973  data: 0.0004  max mem: 53905
Epoch: [229]  [1200/2502]  eta: 0:12:53  lr: 0.000595  min_lr: 0.000595  loss: 2.8827 (2.8668)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1325 (1.1186)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [229]  [1400/2502]  eta: 0:10:54  lr: 0.000594  min_lr: 0.000594  loss: 2.9276 (2.8718)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1329 (1.1298)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [229]  [1600/2502]  eta: 0:08:55  lr: 0.000592  min_lr: 0.000592  loss: 2.8950 (2.8750)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0224 (1.1280)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [229]  [1800/2502]  eta: 0:06:56  lr: 0.000591  min_lr: 0.000591  loss: 2.9617 (2.8826)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1696 (1.1346)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [229]  [2000/2502]  eta: 0:04:57  lr: 0.000590  min_lr: 0.000590  loss: 2.8564 (2.8828)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1506 (1.1344)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [229]  [2200/2502]  eta: 0:02:59  lr: 0.000589  min_lr: 0.000589  loss: 2.9373 (2.8828)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2219 (1.1361)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [229]  [2400/2502]  eta: 0:01:00  lr: 0.000587  min_lr: 0.000587  loss: 2.8598 (2.8832)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0534 (1.1335)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [229]  [2501/2502]  eta: 0:00:00  lr: 0.000587  min_lr: 0.000587  loss: 2.8437 (2.8812)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1155 (1.1334)  time: 0.5331  data: 0.0007  max mem: 53905
Epoch: [229] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.000587  min_lr: 0.000587  loss: 2.8437 (2.8866)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1155 (1.1334)
Test:  [ 0/50]  eta: 0:02:45  loss: 0.4989 (0.4989)  acc1: 95.2000 (95.2000)  acc5: 98.8000 (98.8000)  time: 3.3135  data: 2.9892  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.6992 (0.7415)  acc1: 90.4000 (88.2182)  acc5: 98.8000 (97.7818)  time: 0.5930  data: 0.3059  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7673 (0.7763)  acc1: 86.8000 (87.0667)  acc5: 97.6000 (97.7714)  time: 0.3026  data: 0.0190  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9034 (0.8605)  acc1: 83.2000 (85.2774)  acc5: 96.8000 (96.9936)  time: 0.2843  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0429 (0.9168)  acc1: 79.2000 (83.7463)  acc5: 95.2000 (96.5854)  time: 0.2840  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0429 (0.9351)  acc1: 78.8000 (83.0480)  acc5: 96.0000 (96.5280)  time: 0.2836  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3542 s / it)
* Acc@1 83.108 Acc@5 96.668 loss 0.931
Accuracy of the model on the 50000 test images: 83.1%
Max accuracy: 83.13%
Epoch: [230]  [   0/2502]  eta: 2:02:08  lr: 0.000587  min_lr: 0.000587  loss: 3.5175 (3.5175)  weight_decay: 0.0500 (0.0500)  time: 2.9289  data: 1.6347  max mem: 53905
Epoch: [230]  [ 200/2502]  eta: 0:23:10  lr: 0.000585  min_lr: 0.000585  loss: 2.9747 (2.8520)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1224 (1.1747)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [230]  [ 400/2502]  eta: 0:20:58  lr: 0.000584  min_lr: 0.000584  loss: 2.9605 (2.8577)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1078 (1.1395)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [230]  [ 600/2502]  eta: 0:18:54  lr: 0.000583  min_lr: 0.000583  loss: 2.8550 (2.8619)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0968 (1.1358)  time: 0.5917  data: 0.0008  max mem: 53905
Epoch: [230]  [ 800/2502]  eta: 0:16:53  lr: 0.000582  min_lr: 0.000582  loss: 2.9704 (2.8632)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0887 (1.1351)  time: 0.5949  data: 0.0007  max mem: 53905
Epoch: [230]  [1000/2502]  eta: 0:14:54  lr: 0.000580  min_lr: 0.000580  loss: 3.0672 (2.8752)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1562 (1.1373)  time: 0.5925  data: 0.0006  max mem: 53905
Epoch: [230]  [1200/2502]  eta: 0:12:54  lr: 0.000579  min_lr: 0.000579  loss: 3.0664 (2.8802)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1099 (1.1438)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [230]  [1400/2502]  eta: 0:10:54  lr: 0.000578  min_lr: 0.000578  loss: 2.7193 (2.8723)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0671 (1.1436)  time: 0.5912  data: 0.0006  max mem: 53905
Epoch: [230]  [1600/2502]  eta: 0:08:55  lr: 0.000577  min_lr: 0.000577  loss: 3.0346 (2.8782)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1171 (1.1402)  time: 0.5912  data: 0.0007  max mem: 53905
Epoch: [230]  [1800/2502]  eta: 0:06:56  lr: 0.000575  min_lr: 0.000575  loss: 3.0499 (2.8830)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1751 (1.1418)  time: 0.5917  data: 0.0007  max mem: 53905
Epoch: [230]  [2000/2502]  eta: 0:04:58  lr: 0.000574  min_lr: 0.000574  loss: 3.1110 (2.8877)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1656 (1.1417)  time: 0.5982  data: 0.0006  max mem: 53905
Epoch: [230]  [2200/2502]  eta: 0:02:59  lr: 0.000573  min_lr: 0.000573  loss: 3.0722 (2.8897)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1148 (1.1476)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [230]  [2400/2502]  eta: 0:01:00  lr: 0.000571  min_lr: 0.000571  loss: 2.9139 (2.8850)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0799 (1.1474)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [230]  [2501/2502]  eta: 0:00:00  lr: 0.000571  min_lr: 0.000571  loss: 2.9949 (2.8874)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0953 (1.1470)  time: 0.5328  data: 0.0007  max mem: 53905
Epoch: [230] Total time: 0:24:43 (0.5930 s / it)
Averaged stats: lr: 0.000571  min_lr: 0.000571  loss: 2.9949 (2.8793)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0953 (1.1470)
Test:  [ 0/50]  eta: 0:03:04  loss: 0.5689 (0.5689)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.6953  data: 3.3749  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8305 (0.8183)  acc1: 87.6000 (88.0364)  acc5: 98.4000 (98.0364)  time: 0.5943  data: 0.3072  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8638 (0.8613)  acc1: 85.6000 (87.1048)  acc5: 98.0000 (97.9429)  time: 0.2844  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9523 (0.9334)  acc1: 83.2000 (85.0968)  acc5: 96.8000 (96.9161)  time: 0.2845  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1131 (0.9804)  acc1: 80.0000 (83.6195)  acc5: 94.4000 (96.4878)  time: 0.2842  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1194 (0.9977)  acc1: 77.6000 (83.0000)  acc5: 95.2000 (96.3760)  time: 0.2840  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3552 s / it)
* Acc@1 82.936 Acc@5 96.514 loss 0.993
Accuracy of the model on the 50000 test images: 82.9%
Max accuracy: 83.13%
Epoch: [231]  [   0/2502]  eta: 1:33:30  lr: 0.000571  min_lr: 0.000571  loss: 3.1499 (3.1499)  weight_decay: 0.0500 (0.0500)  time: 2.2423  data: 1.5787  max mem: 53905
Epoch: [231]  [ 200/2502]  eta: 0:23:03  lr: 0.000570  min_lr: 0.000570  loss: 3.0951 (2.8821)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1351 (1.1480)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [231]  [ 400/2502]  eta: 0:20:53  lr: 0.000568  min_lr: 0.000568  loss: 3.0251 (2.8741)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0466 (1.1366)  time: 0.5942  data: 0.0009  max mem: 53905
Epoch: [231]  [ 600/2502]  eta: 0:18:54  lr: 0.000567  min_lr: 0.000567  loss: 2.8586 (2.8412)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1114 (1.1353)  time: 0.5934  data: 0.0006  max mem: 53905
Epoch: [231]  [ 800/2502]  eta: 0:16:53  lr: 0.000566  min_lr: 0.000566  loss: 2.9727 (2.8574)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1181 (1.1383)  time: 0.5924  data: 0.0006  max mem: 53905
Epoch: [231]  [1000/2502]  eta: 0:14:53  lr: 0.000565  min_lr: 0.000565  loss: 3.1158 (2.8609)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2462 (1.1408)  time: 0.5934  data: 0.0007  max mem: 53905
Epoch: [231]  [1200/2502]  eta: 0:12:54  lr: 0.000563  min_lr: 0.000563  loss: 2.9121 (2.8681)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0686 (1.1453)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [231]  [1400/2502]  eta: 0:10:55  lr: 0.000562  min_lr: 0.000562  loss: 3.0159 (2.8725)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1374 (1.1457)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [231]  [1600/2502]  eta: 0:08:56  lr: 0.000561  min_lr: 0.000561  loss: 3.0484 (2.8703)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0650 (1.1434)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [231]  [1800/2502]  eta: 0:06:57  lr: 0.000560  min_lr: 0.000560  loss: 2.5995 (2.8736)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1102 (1.1394)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [231]  [2000/2502]  eta: 0:04:58  lr: 0.000558  min_lr: 0.000558  loss: 3.0553 (2.8701)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0777 (1.1381)  time: 0.5920  data: 0.0007  max mem: 53905
Epoch: [231]  [2200/2502]  eta: 0:02:59  lr: 0.000557  min_lr: 0.000557  loss: 2.8825 (2.8699)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1644 (1.1380)  time: 0.5977  data: 0.0006  max mem: 53905
Epoch: [231]  [2400/2502]  eta: 0:01:00  lr: 0.000556  min_lr: 0.000556  loss: 3.0946 (2.8676)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0536 (1.1379)  time: 0.5903  data: 0.0006  max mem: 53905
Epoch: [231]  [2501/2502]  eta: 0:00:00  lr: 0.000555  min_lr: 0.000555  loss: 2.6936 (2.8651)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0594 (1.1370)  time: 0.5320  data: 0.0007  max mem: 53905
Epoch: [231] Total time: 0:24:45 (0.5936 s / it)
Averaged stats: lr: 0.000555  min_lr: 0.000555  loss: 2.6936 (2.8638)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0594 (1.1370)
Test:  [ 0/50]  eta: 0:02:27  loss: 0.4383 (0.4383)  acc1: 95.6000 (95.6000)  acc5: 98.8000 (98.8000)  time: 2.9571  data: 2.6379  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.6759 (0.7069)  acc1: 89.2000 (88.1818)  acc5: 98.0000 (98.0727)  time: 0.5471  data: 0.2609  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.7357 (0.7428)  acc1: 85.6000 (87.1429)  acc5: 98.0000 (98.0381)  time: 0.2984  data: 0.0118  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.8813 (0.8259)  acc1: 83.2000 (85.3032)  acc5: 97.2000 (97.0839)  time: 0.2941  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0040 (0.8743)  acc1: 80.8000 (84.0000)  acc5: 95.2000 (96.7902)  time: 0.2946  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 0.9693 (0.8970)  acc1: 78.8000 (83.1760)  acc5: 96.0000 (96.6400)  time: 0.2884  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3508 s / it)
* Acc@1 83.282 Acc@5 96.662 loss 0.889
Accuracy of the model on the 50000 test images: 83.3%
Max accuracy: 83.28%
Epoch: [232]  [   0/2502]  eta: 2:20:22  lr: 0.000555  min_lr: 0.000555  loss: 3.3250 (3.3250)  weight_decay: 0.0500 (0.0500)  time: 3.3665  data: 2.7609  max mem: 53905
Epoch: [232]  [ 200/2502]  eta: 0:23:18  lr: 0.000554  min_lr: 0.000554  loss: 3.1919 (2.8869)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0827 (1.0823)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [232]  [ 400/2502]  eta: 0:21:01  lr: 0.000553  min_lr: 0.000553  loss: 2.8067 (2.8826)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1303 (1.1159)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [232]  [ 600/2502]  eta: 0:18:55  lr: 0.000552  min_lr: 0.000552  loss: 2.7924 (2.8717)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0974 (1.1229)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [232]  [ 800/2502]  eta: 0:16:54  lr: 0.000550  min_lr: 0.000550  loss: 3.0479 (2.8634)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [232]  [1000/2502]  eta: 0:14:54  lr: 0.000549  min_lr: 0.000549  loss: 2.9254 (2.8744)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1260 (nan)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [232]  [1200/2502]  eta: 0:12:54  lr: 0.000548  min_lr: 0.000548  loss: 2.9924 (2.8742)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2346 (nan)  time: 0.5984  data: 0.0007  max mem: 53905
Epoch: [232]  [1400/2502]  eta: 0:10:55  lr: 0.000547  min_lr: 0.000547  loss: 2.9471 (2.8717)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1302 (nan)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [232]  [1600/2502]  eta: 0:08:56  lr: 0.000545  min_lr: 0.000545  loss: 2.9167 (2.8689)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1707 (nan)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [232]  [1800/2502]  eta: 0:06:57  lr: 0.000544  min_lr: 0.000544  loss: 2.8739 (2.8788)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1215 (nan)  time: 0.5990  data: 0.0004  max mem: 53905
Epoch: [232]  [2000/2502]  eta: 0:04:58  lr: 0.000543  min_lr: 0.000543  loss: 2.8657 (2.8710)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2057 (nan)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [232]  [2200/2502]  eta: 0:02:59  lr: 0.000542  min_lr: 0.000542  loss: 2.9060 (2.8695)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0909 (nan)  time: 0.5915  data: 0.0007  max mem: 53905
Epoch: [232]  [2400/2502]  eta: 0:01:00  lr: 0.000540  min_lr: 0.000540  loss: 3.0367 (2.8722)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0693 (nan)  time: 0.5920  data: 0.0008  max mem: 53905
Epoch: [232]  [2501/2502]  eta: 0:00:00  lr: 0.000540  min_lr: 0.000540  loss: 2.8248 (2.8687)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1574 (nan)  time: 0.5415  data: 0.0008  max mem: 53905
Epoch: [232] Total time: 0:24:45 (0.5938 s / it)
Averaged stats: lr: 0.000540  min_lr: 0.000540  loss: 2.8248 (2.8655)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1574 (nan)
Test:  [ 0/50]  eta: 0:02:46  loss: 0.4943 (0.4943)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.3313  data: 3.0197  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7454 (0.7299)  acc1: 88.8000 (88.4000)  acc5: 98.4000 (98.1818)  time: 0.5728  data: 0.2879  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7545 (0.7617)  acc1: 86.4000 (87.4667)  acc5: 98.0000 (98.0571)  time: 0.2900  data: 0.0076  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.8635 (0.8399)  acc1: 84.0000 (85.4839)  acc5: 96.8000 (97.0839)  time: 0.2830  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0498 (0.8945)  acc1: 80.0000 (83.7854)  acc5: 94.8000 (96.6342)  time: 0.2827  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0458 (0.9144)  acc1: 77.6000 (82.9360)  acc5: 95.2000 (96.5520)  time: 0.2826  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3494 s / it)
* Acc@1 83.184 Acc@5 96.606 loss 0.910
Accuracy of the model on the 50000 test images: 83.2%
Max accuracy: 83.28%
Epoch: [233]  [   0/2502]  eta: 1:44:32  lr: 0.000540  min_lr: 0.000540  loss: 3.0640 (3.0640)  weight_decay: 0.0500 (0.0500)  time: 2.5070  data: 1.6860  max mem: 53905
Epoch: [233]  [ 200/2502]  eta: 0:23:02  lr: 0.000539  min_lr: 0.000539  loss: 2.8982 (2.8265)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1270 (1.1894)  time: 0.5904  data: 0.0006  max mem: 53905
Epoch: [233]  [ 400/2502]  eta: 0:20:53  lr: 0.000537  min_lr: 0.000537  loss: 2.9254 (2.8422)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0738 (1.1842)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [233]  [ 600/2502]  eta: 0:18:51  lr: 0.000536  min_lr: 0.000536  loss: 2.9030 (2.8476)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1372 (1.2046)  time: 0.5911  data: 0.0007  max mem: 53905
Epoch: [233]  [ 800/2502]  eta: 0:16:51  lr: 0.000535  min_lr: 0.000535  loss: 3.0451 (2.8480)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1548 (1.1939)  time: 0.5906  data: 0.0005  max mem: 53905
Epoch: [233]  [1000/2502]  eta: 0:14:52  lr: 0.000534  min_lr: 0.000534  loss: 2.8953 (2.8460)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1284 (1.1843)  time: 0.5908  data: 0.0006  max mem: 53905
Epoch: [233]  [1200/2502]  eta: 0:12:53  lr: 0.000533  min_lr: 0.000533  loss: 2.9184 (2.8449)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1165 (1.1742)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [233]  [1400/2502]  eta: 0:10:54  lr: 0.000531  min_lr: 0.000531  loss: 3.0962 (2.8471)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2099 (1.1827)  time: 0.5952  data: 0.0005  max mem: 53905
Epoch: [233]  [1600/2502]  eta: 0:08:55  lr: 0.000530  min_lr: 0.000530  loss: 3.1202 (2.8501)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1121 (1.1843)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [233]  [1800/2502]  eta: 0:06:56  lr: 0.000529  min_lr: 0.000529  loss: 2.8863 (2.8507)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1340 (1.1765)  time: 0.5929  data: 0.0006  max mem: 53905
Epoch: [233]  [2000/2502]  eta: 0:04:57  lr: 0.000528  min_lr: 0.000528  loss: 2.9717 (2.8492)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1005 (1.1725)  time: 0.5902  data: 0.0007  max mem: 53905
Epoch: [233]  [2200/2502]  eta: 0:02:59  lr: 0.000526  min_lr: 0.000526  loss: 2.9762 (2.8492)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1702 (1.1724)  time: 0.5988  data: 0.0005  max mem: 53905
Epoch: [233]  [2400/2502]  eta: 0:01:00  lr: 0.000525  min_lr: 0.000525  loss: 3.0145 (2.8515)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1742 (1.1713)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [233]  [2501/2502]  eta: 0:00:00  lr: 0.000525  min_lr: 0.000525  loss: 2.5838 (2.8508)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1332 (1.1686)  time: 0.5325  data: 0.0010  max mem: 53905
Epoch: [233] Total time: 0:24:42 (0.5925 s / it)
Averaged stats: lr: 0.000525  min_lr: 0.000525  loss: 2.5838 (2.8537)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1332 (1.1686)
Test:  [ 0/50]  eta: 0:02:49  loss: 0.5041 (0.5041)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.3980  data: 3.0817  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7422 (0.7562)  acc1: 87.2000 (87.8909)  acc5: 98.4000 (98.1455)  time: 0.5842  data: 0.2983  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7845 (0.7862)  acc1: 85.2000 (86.9524)  acc5: 98.4000 (98.1143)  time: 0.2934  data: 0.0102  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9024 (0.8724)  acc1: 83.2000 (85.1742)  acc5: 96.8000 (97.1355)  time: 0.2875  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0356 (0.9231)  acc1: 80.0000 (83.6000)  acc5: 95.2000 (96.7610)  time: 0.2918  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0333 (0.9416)  acc1: 77.6000 (82.9760)  acc5: 95.6000 (96.6480)  time: 0.2886  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3550 s / it)
* Acc@1 83.222 Acc@5 96.732 loss 0.936
Accuracy of the model on the 50000 test images: 83.2%
Max accuracy: 83.28%
Epoch: [234]  [   0/2502]  eta: 1:54:53  lr: 0.000525  min_lr: 0.000525  loss: 3.5187 (3.5187)  weight_decay: 0.0500 (0.0500)  time: 2.7553  data: 1.8279  max mem: 53905
Epoch: [234]  [ 200/2502]  eta: 0:23:07  lr: 0.000523  min_lr: 0.000523  loss: 3.1207 (2.8946)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1532 (1.1741)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [234]  [ 400/2502]  eta: 0:20:57  lr: 0.000522  min_lr: 0.000522  loss: 2.8807 (2.8801)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0831 (1.1625)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [234]  [ 600/2502]  eta: 0:18:54  lr: 0.000521  min_lr: 0.000521  loss: 2.7574 (2.8409)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2080 (1.1707)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [234]  [ 800/2502]  eta: 0:16:53  lr: 0.000520  min_lr: 0.000520  loss: 2.8920 (2.8402)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1315 (1.1795)  time: 0.5990  data: 0.0004  max mem: 53905
Epoch: [234]  [1000/2502]  eta: 0:14:52  lr: 0.000519  min_lr: 0.000519  loss: 2.6793 (2.8366)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1491 (1.1711)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [234]  [1200/2502]  eta: 0:12:53  lr: 0.000517  min_lr: 0.000517  loss: 3.0156 (2.8500)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1184 (1.1637)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [234]  [1400/2502]  eta: 0:10:54  lr: 0.000516  min_lr: 0.000516  loss: 2.8047 (2.8472)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1676 (1.1656)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [234]  [1600/2502]  eta: 0:08:55  lr: 0.000515  min_lr: 0.000515  loss: 2.7850 (2.8423)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0708 (1.1652)  time: 0.5910  data: 0.0006  max mem: 53905
Epoch: [234]  [1800/2502]  eta: 0:06:56  lr: 0.000514  min_lr: 0.000514  loss: 3.0499 (2.8381)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1338 (1.1651)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [234]  [2000/2502]  eta: 0:04:57  lr: 0.000513  min_lr: 0.000513  loss: 2.7448 (2.8334)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1392 (1.1659)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [234]  [2200/2502]  eta: 0:02:59  lr: 0.000511  min_lr: 0.000511  loss: 2.7602 (2.8352)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1217 (1.1665)  time: 0.6000  data: 0.0004  max mem: 53905
Epoch: [234]  [2400/2502]  eta: 0:01:00  lr: 0.000510  min_lr: 0.000510  loss: 2.8132 (2.8421)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2280 (1.1704)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [234]  [2501/2502]  eta: 0:00:00  lr: 0.000510  min_lr: 0.000510  loss: 2.8232 (2.8434)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0632 (1.1741)  time: 0.5330  data: 0.0007  max mem: 53905
Epoch: [234] Total time: 0:24:43 (0.5929 s / it)
Averaged stats: lr: 0.000510  min_lr: 0.000510  loss: 2.8232 (2.8451)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0632 (1.1741)
Test:  [ 0/50]  eta: 0:02:29  loss: 0.4983 (0.4983)  acc1: 93.6000 (93.6000)  acc5: 99.2000 (99.2000)  time: 2.9926  data: 2.6798  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.7408 (0.7261)  acc1: 86.8000 (87.6727)  acc5: 97.6000 (97.7818)  time: 0.6235  data: 0.3377  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7446 (0.7546)  acc1: 85.6000 (87.0667)  acc5: 97.6000 (97.7714)  time: 0.3353  data: 0.0520  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.8739 (0.8302)  acc1: 84.4000 (85.2387)  acc5: 96.8000 (97.0194)  time: 0.2849  data: 0.0004  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0402 (0.8842)  acc1: 80.8000 (83.6781)  acc5: 94.8000 (96.6439)  time: 0.2908  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0402 (0.9035)  acc1: 78.8000 (83.0080)  acc5: 95.6000 (96.5840)  time: 0.2903  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3638 s / it)
* Acc@1 83.218 Acc@5 96.684 loss 0.895
Accuracy of the model on the 50000 test images: 83.2%
Max accuracy: 83.28%
Epoch: [235]  [   0/2502]  eta: 1:42:49  lr: 0.000510  min_lr: 0.000510  loss: 3.3444 (3.3444)  weight_decay: 0.0500 (0.0500)  time: 2.4660  data: 1.5933  max mem: 53905
Epoch: [235]  [ 200/2502]  eta: 0:23:03  lr: 0.000508  min_lr: 0.000508  loss: 2.7254 (2.8014)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0853 (1.1773)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [235]  [ 400/2502]  eta: 0:20:54  lr: 0.000507  min_lr: 0.000507  loss: 2.7387 (2.7841)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1654 (1.1782)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [235]  [ 600/2502]  eta: 0:18:52  lr: 0.000506  min_lr: 0.000506  loss: 2.8134 (2.8087)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2354 (1.1844)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [235]  [ 800/2502]  eta: 0:16:52  lr: 0.000505  min_lr: 0.000505  loss: 3.0849 (2.8201)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2125 (1.2032)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [235]  [1000/2502]  eta: 0:14:52  lr: 0.000504  min_lr: 0.000504  loss: 3.0145 (2.8233)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1065 (1.1976)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [235]  [1200/2502]  eta: 0:12:53  lr: 0.000502  min_lr: 0.000502  loss: 2.9477 (2.8250)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1916 (1.1998)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [235]  [1400/2502]  eta: 0:10:54  lr: 0.000501  min_lr: 0.000501  loss: 2.8538 (2.8331)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2102 (1.2009)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [235]  [1600/2502]  eta: 0:08:55  lr: 0.000500  min_lr: 0.000500  loss: 2.7018 (2.8355)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0892 (1.1919)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [235]  [1800/2502]  eta: 0:06:56  lr: 0.000499  min_lr: 0.000499  loss: 2.9998 (2.8280)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1521 (1.1902)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [235]  [2000/2502]  eta: 0:04:57  lr: 0.000498  min_lr: 0.000498  loss: 3.0854 (2.8330)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1197 (1.1853)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [235]  [2200/2502]  eta: 0:02:59  lr: 0.000497  min_lr: 0.000497  loss: 2.8067 (2.8298)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1097 (1.1831)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [235]  [2400/2502]  eta: 0:01:00  lr: 0.000495  min_lr: 0.000495  loss: 3.0913 (2.8315)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2121 (1.1824)  time: 0.5994  data: 0.0004  max mem: 53905
Epoch: [235]  [2501/2502]  eta: 0:00:00  lr: 0.000495  min_lr: 0.000495  loss: 2.8456 (2.8342)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2063 (1.1838)  time: 0.5323  data: 0.0009  max mem: 53905
Epoch: [235] Total time: 0:24:44 (0.5931 s / it)
Averaged stats: lr: 0.000495  min_lr: 0.000495  loss: 2.8456 (2.8453)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2063 (1.1838)
Test:  [ 0/50]  eta: 0:02:57  loss: 0.5246 (0.5246)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.5409  data: 3.2131  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8289 (0.7907)  acc1: 87.6000 (87.8182)  acc5: 98.4000 (98.0364)  time: 0.5788  data: 0.2925  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8289 (0.8169)  acc1: 85.6000 (87.1429)  acc5: 98.0000 (98.0381)  time: 0.2827  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9127 (0.9021)  acc1: 83.6000 (85.1871)  acc5: 97.2000 (97.2258)  time: 0.2829  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0891 (0.9494)  acc1: 79.6000 (83.7268)  acc5: 95.2000 (96.8878)  time: 0.2828  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0827 (0.9654)  acc1: 79.2000 (83.1200)  acc5: 96.0000 (96.7760)  time: 0.2826  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3514 s / it)
* Acc@1 83.448 Acc@5 96.708 loss 0.957
Accuracy of the model on the 50000 test images: 83.4%
Max accuracy: 83.45%
Epoch: [236]  [   0/2502]  eta: 1:46:27  lr: 0.000495  min_lr: 0.000495  loss: 2.3689 (2.3689)  weight_decay: 0.0500 (0.0500)  time: 2.5531  data: 1.9504  max mem: 53905
Epoch: [236]  [ 200/2502]  eta: 0:23:06  lr: 0.000494  min_lr: 0.000494  loss: 2.7899 (2.8236)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2104 (1.2155)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [236]  [ 400/2502]  eta: 0:20:57  lr: 0.000492  min_lr: 0.000492  loss: 3.0575 (2.8464)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1988 (1.2225)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [236]  [ 600/2502]  eta: 0:18:53  lr: 0.000491  min_lr: 0.000491  loss: 2.9163 (2.8457)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1314 (nan)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [236]  [ 800/2502]  eta: 0:16:53  lr: 0.000490  min_lr: 0.000490  loss: 2.8360 (2.8550)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2274 (nan)  time: 0.5923  data: 0.0006  max mem: 53905
Epoch: [236]  [1000/2502]  eta: 0:14:53  lr: 0.000489  min_lr: 0.000489  loss: 2.9922 (2.8613)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2794 (nan)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [236]  [1200/2502]  eta: 0:12:54  lr: 0.000488  min_lr: 0.000488  loss: 3.1063 (2.8684)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1322 (nan)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [236]  [1400/2502]  eta: 0:10:55  lr: 0.000486  min_lr: 0.000486  loss: 2.8690 (2.8688)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1333 (nan)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [236]  [1600/2502]  eta: 0:08:56  lr: 0.000485  min_lr: 0.000485  loss: 2.9425 (2.8626)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2674 (nan)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [236]  [1800/2502]  eta: 0:06:57  lr: 0.000484  min_lr: 0.000484  loss: 2.9600 (2.8580)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1635 (nan)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [236]  [2000/2502]  eta: 0:04:58  lr: 0.000483  min_lr: 0.000483  loss: 3.0056 (2.8589)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2042 (nan)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [236]  [2200/2502]  eta: 0:02:59  lr: 0.000482  min_lr: 0.000482  loss: 3.0241 (2.8584)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2126 (nan)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [236]  [2400/2502]  eta: 0:01:00  lr: 0.000481  min_lr: 0.000481  loss: 2.8540 (2.8584)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1285 (nan)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [236]  [2501/2502]  eta: 0:00:00  lr: 0.000480  min_lr: 0.000480  loss: 3.0489 (2.8577)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0288 (nan)  time: 0.5333  data: 0.0011  max mem: 53905
Epoch: [236] Total time: 0:24:45 (0.5936 s / it)
Averaged stats: lr: 0.000480  min_lr: 0.000480  loss: 3.0489 (2.8449)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0288 (nan)
Test:  [ 0/50]  eta: 0:02:33  loss: 0.5110 (0.5110)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.0796  data: 2.7448  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.7930 (0.8033)  acc1: 88.0000 (88.0000)  acc5: 98.4000 (98.2545)  time: 0.5398  data: 0.2517  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8522 (0.8328)  acc1: 85.6000 (87.0857)  acc5: 98.4000 (98.2095)  time: 0.2850  data: 0.0014  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9485 (0.9189)  acc1: 83.6000 (85.2258)  acc5: 97.2000 (97.3032)  time: 0.2843  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1168 (0.9722)  acc1: 80.8000 (83.8146)  acc5: 94.8000 (96.8195)  time: 0.2840  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1168 (0.9895)  acc1: 79.2000 (83.1520)  acc5: 95.6000 (96.6880)  time: 0.2838  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3426 s / it)
* Acc@1 83.396 Acc@5 96.706 loss 0.981
Accuracy of the model on the 50000 test images: 83.4%
Max accuracy: 83.45%
Epoch: [237]  [   0/2502]  eta: 1:59:32  lr: 0.000480  min_lr: 0.000480  loss: 3.1410 (3.1410)  weight_decay: 0.0500 (0.0500)  time: 2.8668  data: 1.7401  max mem: 53905
Epoch: [237]  [ 200/2502]  eta: 0:23:07  lr: 0.000479  min_lr: 0.000479  loss: 2.7489 (2.8368)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1780 (1.2093)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [237]  [ 400/2502]  eta: 0:20:55  lr: 0.000478  min_lr: 0.000478  loss: 3.0200 (2.8341)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2028 (1.1934)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [237]  [ 600/2502]  eta: 0:18:53  lr: 0.000477  min_lr: 0.000477  loss: 2.8450 (2.8237)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1906 (1.2048)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [237]  [ 800/2502]  eta: 0:16:52  lr: 0.000475  min_lr: 0.000475  loss: 2.8392 (2.8160)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2099 (1.2085)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [237]  [1000/2502]  eta: 0:14:53  lr: 0.000474  min_lr: 0.000474  loss: 2.8111 (2.8205)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1367 (1.2063)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [237]  [1200/2502]  eta: 0:12:53  lr: 0.000473  min_lr: 0.000473  loss: 2.9558 (2.8155)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2010 (1.2024)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [237]  [1400/2502]  eta: 0:10:54  lr: 0.000472  min_lr: 0.000472  loss: 2.8712 (2.8195)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1147 (1.1973)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [237]  [1600/2502]  eta: 0:08:56  lr: 0.000471  min_lr: 0.000471  loss: 2.9363 (2.8258)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2256 (1.1972)  time: 0.6081  data: 0.0005  max mem: 53905
Epoch: [237]  [1800/2502]  eta: 0:06:57  lr: 0.000470  min_lr: 0.000470  loss: 2.9020 (2.8217)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2357 (1.2017)  time: 0.5938  data: 0.0005  max mem: 53905
Epoch: [237]  [2000/2502]  eta: 0:04:58  lr: 0.000468  min_lr: 0.000468  loss: 2.9903 (2.8231)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2009 (1.2061)  time: 0.5996  data: 0.0005  max mem: 53905
Epoch: [237]  [2200/2502]  eta: 0:02:59  lr: 0.000467  min_lr: 0.000467  loss: 3.0694 (2.8201)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1457 (1.2025)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [237]  [2400/2502]  eta: 0:01:00  lr: 0.000466  min_lr: 0.000466  loss: 2.6318 (2.8208)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3363 (1.2102)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [237]  [2501/2502]  eta: 0:00:00  lr: 0.000466  min_lr: 0.000466  loss: 2.7553 (2.8201)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2302 (1.2109)  time: 0.5336  data: 0.0008  max mem: 53905
Epoch: [237] Total time: 0:24:44 (0.5935 s / it)
Averaged stats: lr: 0.000466  min_lr: 0.000466  loss: 2.7553 (2.8244)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2302 (1.2109)
Test:  [ 0/50]  eta: 0:03:06  loss: 0.4964 (0.4964)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.7374  data: 3.4274  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7543 (0.7471)  acc1: 89.2000 (88.4000)  acc5: 98.8000 (98.1091)  time: 0.5979  data: 0.3120  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7780 (0.7803)  acc1: 86.4000 (87.5810)  acc5: 98.4000 (97.9619)  time: 0.2841  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.8660 (0.8545)  acc1: 84.0000 (85.7290)  acc5: 97.2000 (97.1613)  time: 0.2843  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0352 (0.9041)  acc1: 80.0000 (84.2927)  acc5: 94.8000 (96.6732)  time: 0.2840  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 0.9851 (0.9235)  acc1: 79.2000 (83.5200)  acc5: 95.2000 (96.6000)  time: 0.2837  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3557 s / it)
* Acc@1 83.448 Acc@5 96.678 loss 0.917
Accuracy of the model on the 50000 test images: 83.4%
Max accuracy: 83.45%
Epoch: [238]  [   0/2502]  eta: 1:48:52  lr: 0.000466  min_lr: 0.000466  loss: 2.9865 (2.9865)  weight_decay: 0.0500 (0.0500)  time: 2.6109  data: 2.0223  max mem: 53905
Epoch: [238]  [ 200/2502]  eta: 0:23:12  lr: 0.000464  min_lr: 0.000464  loss: 3.0734 (2.8262)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1800 (1.2126)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [238]  [ 400/2502]  eta: 0:20:57  lr: 0.000463  min_lr: 0.000463  loss: 2.8282 (2.8038)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0735 (1.2071)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [238]  [ 600/2502]  eta: 0:18:55  lr: 0.000462  min_lr: 0.000462  loss: 2.7975 (2.8051)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1711 (1.2076)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [238]  [ 800/2502]  eta: 0:16:54  lr: 0.000461  min_lr: 0.000461  loss: 2.9255 (2.8010)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1686 (1.2138)  time: 0.5931  data: 0.0007  max mem: 53905
Epoch: [238]  [1000/2502]  eta: 0:14:54  lr: 0.000460  min_lr: 0.000460  loss: 2.9613 (2.8085)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2931 (inf)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [238]  [1200/2502]  eta: 0:12:54  lr: 0.000459  min_lr: 0.000459  loss: 3.0247 (2.8226)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1757 (inf)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [238]  [1400/2502]  eta: 0:10:55  lr: 0.000458  min_lr: 0.000458  loss: 2.8861 (2.8168)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2525 (inf)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [238]  [1600/2502]  eta: 0:08:56  lr: 0.000456  min_lr: 0.000456  loss: 2.8999 (2.8179)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1679 (inf)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [238]  [1800/2502]  eta: 0:06:57  lr: 0.000455  min_lr: 0.000455  loss: 2.5772 (2.8210)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2039 (inf)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [238]  [2000/2502]  eta: 0:04:58  lr: 0.000454  min_lr: 0.000454  loss: 2.8429 (2.8191)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1452 (inf)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [238]  [2200/2502]  eta: 0:02:59  lr: 0.000453  min_lr: 0.000453  loss: 2.9709 (2.8230)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2815 (inf)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [238]  [2400/2502]  eta: 0:01:00  lr: 0.000452  min_lr: 0.000452  loss: 3.0098 (2.8218)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1638 (inf)  time: 0.5978  data: 0.0004  max mem: 53905
Epoch: [238]  [2501/2502]  eta: 0:00:00  lr: 0.000451  min_lr: 0.000451  loss: 2.8721 (2.8208)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1749 (inf)  time: 0.5326  data: 0.0009  max mem: 53905
Epoch: [238] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.000451  min_lr: 0.000451  loss: 2.8721 (2.8272)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1749 (inf)
Test:  [ 0/50]  eta: 0:02:27  loss: 0.5489 (0.5489)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.9514  data: 2.6242  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8028 (0.7816)  acc1: 89.6000 (88.0000)  acc5: 98.4000 (98.1455)  time: 0.5838  data: 0.2970  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8167 (0.8154)  acc1: 85.2000 (87.0095)  acc5: 98.0000 (98.0571)  time: 0.3154  data: 0.0324  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9196 (0.9009)  acc1: 82.8000 (85.0065)  acc5: 96.8000 (97.1742)  time: 0.2837  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1038 (0.9514)  acc1: 80.0000 (83.6195)  acc5: 95.2000 (96.8000)  time: 0.2835  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1068 (0.9697)  acc1: 78.4000 (83.0080)  acc5: 95.6000 (96.6960)  time: 0.2834  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3519 s / it)
* Acc@1 83.272 Acc@5 96.754 loss 0.964
Accuracy of the model on the 50000 test images: 83.3%
Max accuracy: 83.45%
Epoch: [239]  [   0/2502]  eta: 1:56:01  lr: 0.000451  min_lr: 0.000451  loss: 2.6957 (2.6957)  weight_decay: 0.0500 (0.0500)  time: 2.7823  data: 1.5534  max mem: 53905
Epoch: [239]  [ 200/2502]  eta: 0:23:08  lr: 0.000450  min_lr: 0.000450  loss: 2.8996 (2.7494)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1517 (1.1950)  time: 0.6008  data: 0.0007  max mem: 53905
Epoch: [239]  [ 400/2502]  eta: 0:20:57  lr: 0.000449  min_lr: 0.000449  loss: 2.7147 (2.7853)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2205 (1.2286)  time: 0.5927  data: 0.0008  max mem: 53905
Epoch: [239]  [ 600/2502]  eta: 0:18:54  lr: 0.000448  min_lr: 0.000448  loss: 2.9600 (2.8091)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1347 (1.2196)  time: 0.5922  data: 0.0008  max mem: 53905
Epoch: [239]  [ 800/2502]  eta: 0:16:53  lr: 0.000447  min_lr: 0.000447  loss: 2.9420 (2.8132)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2613 (1.2197)  time: 0.5914  data: 0.0007  max mem: 53905
Epoch: [239]  [1000/2502]  eta: 0:14:53  lr: 0.000446  min_lr: 0.000446  loss: 3.0661 (2.8075)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1930 (1.2356)  time: 0.6040  data: 0.0006  max mem: 53905
Epoch: [239]  [1200/2502]  eta: 0:12:53  lr: 0.000445  min_lr: 0.000445  loss: 2.7541 (2.7951)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2238 (1.2368)  time: 0.5916  data: 0.0007  max mem: 53905
Epoch: [239]  [1400/2502]  eta: 0:10:54  lr: 0.000443  min_lr: 0.000443  loss: 2.9545 (2.7982)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2395 (1.2453)  time: 0.5959  data: 0.0007  max mem: 53905
Epoch: [239]  [1600/2502]  eta: 0:08:55  lr: 0.000442  min_lr: 0.000442  loss: 2.9389 (2.7993)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1933 (1.2480)  time: 0.5914  data: 0.0008  max mem: 53905
Epoch: [239]  [1800/2502]  eta: 0:06:56  lr: 0.000441  min_lr: 0.000441  loss: 2.7861 (2.7995)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1316 (1.2481)  time: 0.5932  data: 0.0008  max mem: 53905
Epoch: [239]  [2000/2502]  eta: 0:04:58  lr: 0.000440  min_lr: 0.000440  loss: 2.9282 (2.8039)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1478 (1.2400)  time: 0.5939  data: 0.0008  max mem: 53905
Epoch: [239]  [2200/2502]  eta: 0:02:59  lr: 0.000439  min_lr: 0.000439  loss: 2.9744 (2.8087)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1823 (1.2395)  time: 0.5925  data: 0.0008  max mem: 53905
Epoch: [239]  [2400/2502]  eta: 0:01:00  lr: 0.000438  min_lr: 0.000438  loss: 2.8983 (2.8119)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2922 (1.2440)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [239]  [2501/2502]  eta: 0:00:00  lr: 0.000437  min_lr: 0.000437  loss: 3.0006 (2.8105)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2409 (1.2451)  time: 0.5337  data: 0.0009  max mem: 53905
Epoch: [239] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.000437  min_lr: 0.000437  loss: 3.0006 (2.8114)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2409 (1.2451)
Test:  [ 0/50]  eta: 0:02:51  loss: 0.5528 (0.5528)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 3.4331  data: 3.0966  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8311 (0.8500)  acc1: 89.2000 (87.8909)  acc5: 98.8000 (98.0727)  time: 0.5898  data: 0.3009  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8899 (0.8881)  acc1: 86.4000 (86.9333)  acc5: 98.0000 (98.0571)  time: 0.3010  data: 0.0109  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9726 (0.9689)  acc1: 82.8000 (84.9936)  acc5: 96.8000 (97.2129)  time: 0.2919  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1860 (1.0197)  acc1: 80.4000 (83.6976)  acc5: 95.2000 (96.7317)  time: 0.2852  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1860 (1.0359)  acc1: 80.4000 (83.2000)  acc5: 95.6000 (96.6800)  time: 0.2833  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3566 s / it)
* Acc@1 83.434 Acc@5 96.712 loss 1.033
Accuracy of the model on the 50000 test images: 83.4%
Max accuracy: 83.45%
Epoch: [240]  [   0/2502]  eta: 1:52:07  lr: 0.000437  min_lr: 0.000437  loss: 2.6222 (2.6222)  weight_decay: 0.0500 (0.0500)  time: 2.6887  data: 1.9989  max mem: 53905
Epoch: [240]  [ 200/2502]  eta: 0:23:11  lr: 0.000436  min_lr: 0.000436  loss: 2.8010 (2.7989)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1935 (1.2352)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [240]  [ 400/2502]  eta: 0:20:57  lr: 0.000435  min_lr: 0.000435  loss: 3.0412 (2.8113)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1924 (1.2098)  time: 0.5985  data: 0.0006  max mem: 53905
Epoch: [240]  [ 600/2502]  eta: 0:18:54  lr: 0.000434  min_lr: 0.000434  loss: 2.7736 (2.8102)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2082 (1.2265)  time: 0.5924  data: 0.0007  max mem: 53905
Epoch: [240]  [ 800/2502]  eta: 0:16:53  lr: 0.000433  min_lr: 0.000433  loss: 2.8044 (2.8089)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1261 (1.2191)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [240]  [1000/2502]  eta: 0:14:53  lr: 0.000432  min_lr: 0.000432  loss: 2.9814 (2.8077)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2623 (1.2252)  time: 0.5924  data: 0.0006  max mem: 53905
Epoch: [240]  [1200/2502]  eta: 0:12:54  lr: 0.000431  min_lr: 0.000431  loss: 2.7770 (2.8103)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2268 (1.2272)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [240]  [1400/2502]  eta: 0:10:55  lr: 0.000429  min_lr: 0.000429  loss: 2.9404 (2.8146)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2699 (1.2275)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [240]  [1600/2502]  eta: 0:08:56  lr: 0.000428  min_lr: 0.000428  loss: 2.8783 (2.8130)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1982 (1.2236)  time: 0.5912  data: 0.0006  max mem: 53905
Epoch: [240]  [1800/2502]  eta: 0:06:57  lr: 0.000427  min_lr: 0.000427  loss: 2.7502 (2.8073)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1476 (1.2222)  time: 0.5975  data: 0.0005  max mem: 53905
Epoch: [240]  [2000/2502]  eta: 0:04:58  lr: 0.000426  min_lr: 0.000426  loss: 2.4742 (2.8067)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2095 (1.2218)  time: 0.5902  data: 0.0005  max mem: 53905
Epoch: [240]  [2200/2502]  eta: 0:02:59  lr: 0.000425  min_lr: 0.000425  loss: 2.8944 (2.8075)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1263 (1.2238)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [240]  [2400/2502]  eta: 0:01:00  lr: 0.000424  min_lr: 0.000424  loss: 2.9095 (2.8066)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2133 (1.2279)  time: 0.5910  data: 0.0006  max mem: 53905
Epoch: [240]  [2501/2502]  eta: 0:00:00  lr: 0.000423  min_lr: 0.000423  loss: 2.9860 (2.8098)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2666 (1.2300)  time: 0.5318  data: 0.0007  max mem: 53905
Epoch: [240] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.000423  min_lr: 0.000423  loss: 2.9860 (2.8057)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2666 (1.2300)
Test:  [ 0/50]  eta: 0:02:39  loss: 0.5762 (0.5762)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.1820  data: 2.8604  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8186 (0.8108)  acc1: 90.4000 (88.4727)  acc5: 98.4000 (98.2182)  time: 0.5885  data: 0.3036  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8320 (0.8371)  acc1: 86.4000 (87.4286)  acc5: 98.0000 (98.0762)  time: 0.3056  data: 0.0242  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9378 (0.9203)  acc1: 83.2000 (85.4194)  acc5: 96.8000 (97.1871)  time: 0.2823  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1265 (0.9749)  acc1: 79.6000 (84.0390)  acc5: 95.2000 (96.7707)  time: 0.2834  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1153 (0.9923)  acc1: 79.6000 (83.3520)  acc5: 96.0000 (96.7280)  time: 0.2869  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3537 s / it)
* Acc@1 83.446 Acc@5 96.782 loss 0.988
Accuracy of the model on the 50000 test images: 83.4%
Max accuracy: 83.45%
Epoch: [241]  [   0/2502]  eta: 1:59:57  lr: 0.000423  min_lr: 0.000423  loss: 3.2296 (3.2296)  weight_decay: 0.0500 (0.0500)  time: 2.8768  data: 1.7917  max mem: 53905
Epoch: [241]  [ 200/2502]  eta: 0:23:09  lr: 0.000422  min_lr: 0.000422  loss: 3.0269 (2.8115)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2513 (1.2259)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [241]  [ 400/2502]  eta: 0:20:57  lr: 0.000421  min_lr: 0.000421  loss: 2.8527 (2.8175)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1714 (1.2352)  time: 0.6008  data: 0.0008  max mem: 53905
Epoch: [241]  [ 600/2502]  eta: 0:18:55  lr: 0.000420  min_lr: 0.000420  loss: 3.0669 (2.8203)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2925 (1.2402)  time: 0.6040  data: 0.0008  max mem: 53905
Epoch: [241]  [ 800/2502]  eta: 0:16:53  lr: 0.000419  min_lr: 0.000419  loss: 3.0383 (2.8168)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1905 (1.2347)  time: 0.5918  data: 0.0007  max mem: 53905
Epoch: [241]  [1000/2502]  eta: 0:14:53  lr: 0.000418  min_lr: 0.000418  loss: 2.8294 (2.7977)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2094 (1.2324)  time: 0.5927  data: 0.0007  max mem: 53905
Epoch: [241]  [1200/2502]  eta: 0:12:54  lr: 0.000417  min_lr: 0.000417  loss: 2.8079 (2.8084)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2151 (1.2259)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [241]  [1400/2502]  eta: 0:10:55  lr: 0.000416  min_lr: 0.000416  loss: 2.8887 (2.8060)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2169 (1.2206)  time: 0.5916  data: 0.0007  max mem: 53905
Epoch: [241]  [1600/2502]  eta: 0:08:56  lr: 0.000415  min_lr: 0.000415  loss: 3.0346 (2.8078)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2302 (1.2268)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [241]  [1800/2502]  eta: 0:06:57  lr: 0.000413  min_lr: 0.000413  loss: 2.8421 (2.8059)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2510 (1.2317)  time: 0.5934  data: 0.0005  max mem: 53905
Epoch: [241]  [2000/2502]  eta: 0:04:58  lr: 0.000412  min_lr: 0.000412  loss: 2.7882 (2.8112)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2020 (1.2365)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [241]  [2200/2502]  eta: 0:02:59  lr: 0.000411  min_lr: 0.000411  loss: 2.8192 (2.8109)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [241]  [2400/2502]  eta: 0:01:00  lr: 0.000410  min_lr: 0.000410  loss: 2.8853 (2.8107)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1884 (nan)  time: 0.5916  data: 0.0007  max mem: 53905
Epoch: [241]  [2501/2502]  eta: 0:00:00  lr: 0.000410  min_lr: 0.000410  loss: 3.0499 (2.8138)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2102 (nan)  time: 0.5328  data: 0.0009  max mem: 53905
Epoch: [241] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.000410  min_lr: 0.000410  loss: 3.0499 (2.8017)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2102 (nan)
Test:  [ 0/50]  eta: 0:03:25  loss: 0.5262 (0.5262)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 4.1069  data: 3.7956  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 0.7936 (0.7706)  acc1: 88.8000 (88.4364)  acc5: 98.4000 (98.3273)  time: 0.6316  data: 0.3454  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8016 (0.8014)  acc1: 85.2000 (87.1810)  acc5: 98.4000 (98.0952)  time: 0.2842  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9389 (0.8870)  acc1: 82.8000 (85.3936)  acc5: 96.4000 (97.2000)  time: 0.2844  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0498 (0.9310)  acc1: 81.2000 (84.2342)  acc5: 95.2000 (96.7805)  time: 0.2842  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0433 (0.9504)  acc1: 80.8000 (83.5520)  acc5: 96.0000 (96.7360)  time: 0.2839  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3640 s / it)
* Acc@1 83.606 Acc@5 96.772 loss 0.948
Accuracy of the model on the 50000 test images: 83.6%
Max accuracy: 83.61%
Epoch: [242]  [   0/2502]  eta: 1:39:55  lr: 0.000410  min_lr: 0.000410  loss: 3.2644 (3.2644)  weight_decay: 0.0500 (0.0500)  time: 2.3962  data: 1.8061  max mem: 53905
Epoch: [242]  [ 200/2502]  eta: 0:23:07  lr: 0.000409  min_lr: 0.000409  loss: 2.7735 (2.7471)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1754 (1.1871)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [242]  [ 400/2502]  eta: 0:20:54  lr: 0.000407  min_lr: 0.000407  loss: 2.8320 (2.7645)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2427 (1.2252)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [242]  [ 600/2502]  eta: 0:18:52  lr: 0.000406  min_lr: 0.000406  loss: 2.8051 (2.7688)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2070 (1.2422)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [242]  [ 800/2502]  eta: 0:16:52  lr: 0.000405  min_lr: 0.000405  loss: 2.9634 (2.7914)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2668 (1.2397)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [242]  [1000/2502]  eta: 0:14:52  lr: 0.000404  min_lr: 0.000404  loss: 2.9957 (2.7937)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2472 (1.2427)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [242]  [1200/2502]  eta: 0:12:53  lr: 0.000403  min_lr: 0.000403  loss: 2.8596 (2.7961)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1206 (1.2381)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [242]  [1400/2502]  eta: 0:10:54  lr: 0.000402  min_lr: 0.000402  loss: 2.8978 (2.7934)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1689 (1.2396)  time: 0.5960  data: 0.0005  max mem: 53905
Epoch: [242]  [1600/2502]  eta: 0:08:55  lr: 0.000401  min_lr: 0.000401  loss: 2.7409 (2.7899)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1722 (1.2339)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [242]  [1800/2502]  eta: 0:06:56  lr: 0.000400  min_lr: 0.000400  loss: 3.0138 (2.7929)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1479 (1.2335)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [242]  [2000/2502]  eta: 0:04:58  lr: 0.000399  min_lr: 0.000399  loss: 2.8783 (2.7983)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3161 (1.2418)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [242]  [2200/2502]  eta: 0:02:59  lr: 0.000398  min_lr: 0.000398  loss: 2.6081 (2.7985)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2120 (1.2403)  time: 0.5967  data: 0.0004  max mem: 53905
Epoch: [242]  [2400/2502]  eta: 0:01:00  lr: 0.000397  min_lr: 0.000397  loss: 2.8929 (2.8002)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1994 (1.2435)  time: 0.5989  data: 0.0004  max mem: 53905
Epoch: [242]  [2501/2502]  eta: 0:00:00  lr: 0.000396  min_lr: 0.000396  loss: 2.9551 (2.7988)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2016 (1.2428)  time: 0.5335  data: 0.0009  max mem: 53905
Epoch: [242] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.000396  min_lr: 0.000396  loss: 2.9551 (2.7994)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2016 (1.2428)
Test:  [ 0/50]  eta: 0:02:33  loss: 0.5212 (0.5212)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.0744  data: 2.7591  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7885 (0.7713)  acc1: 88.0000 (88.2546)  acc5: 98.0000 (97.8909)  time: 0.5710  data: 0.2843  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8071 (0.8083)  acc1: 84.8000 (87.1619)  acc5: 98.0000 (97.9238)  time: 0.3026  data: 0.0187  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9253 (0.8876)  acc1: 83.6000 (85.5613)  acc5: 97.2000 (97.1484)  time: 0.2845  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1093 (0.9366)  acc1: 80.4000 (84.0781)  acc5: 95.6000 (96.8098)  time: 0.2842  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0884 (0.9546)  acc1: 79.2000 (83.3440)  acc5: 95.6000 (96.7280)  time: 0.2838  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3493 s / it)
* Acc@1 83.572 Acc@5 96.790 loss 0.950
Accuracy of the model on the 50000 test images: 83.6%
Max accuracy: 83.61%
Epoch: [243]  [   0/2502]  eta: 1:57:56  lr: 0.000396  min_lr: 0.000396  loss: 1.8658 (1.8658)  weight_decay: 0.0500 (0.0500)  time: 2.8282  data: 2.2317  max mem: 53905
Epoch: [243]  [ 200/2502]  eta: 0:23:08  lr: 0.000395  min_lr: 0.000395  loss: 2.9262 (2.7751)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2159 (1.2242)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [243]  [ 400/2502]  eta: 0:20:59  lr: 0.000394  min_lr: 0.000394  loss: 2.8302 (2.8039)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2893 (1.3180)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [243]  [ 600/2502]  eta: 0:18:55  lr: 0.000393  min_lr: 0.000393  loss: 2.7357 (2.7952)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2460 (1.3087)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [243]  [ 800/2502]  eta: 0:16:54  lr: 0.000392  min_lr: 0.000392  loss: 3.0049 (2.7916)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1063 (1.2836)  time: 0.5966  data: 0.0004  max mem: 53905
Epoch: [243]  [1000/2502]  eta: 0:14:54  lr: 0.000391  min_lr: 0.000391  loss: 2.8256 (2.7888)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2074 (1.2659)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [243]  [1200/2502]  eta: 0:12:54  lr: 0.000390  min_lr: 0.000390  loss: 2.8773 (2.7890)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3031 (1.2666)  time: 0.5915  data: 0.0003  max mem: 53905
Epoch: [243]  [1400/2502]  eta: 0:10:55  lr: 0.000389  min_lr: 0.000389  loss: 2.9113 (2.7895)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1749 (1.2668)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [243]  [1600/2502]  eta: 0:08:56  lr: 0.000388  min_lr: 0.000388  loss: 2.6953 (2.7916)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2488 (1.2642)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [243]  [1800/2502]  eta: 0:06:57  lr: 0.000387  min_lr: 0.000387  loss: 2.7498 (2.7886)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2525 (1.2655)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [243]  [2000/2502]  eta: 0:04:58  lr: 0.000386  min_lr: 0.000386  loss: 2.5782 (2.7885)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2641 (1.2691)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [243]  [2200/2502]  eta: 0:02:59  lr: 0.000384  min_lr: 0.000384  loss: 2.7930 (2.7933)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1386 (1.2678)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [243]  [2400/2502]  eta: 0:01:00  lr: 0.000383  min_lr: 0.000383  loss: 2.8798 (2.7972)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2340 (1.2699)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [243]  [2501/2502]  eta: 0:00:00  lr: 0.000383  min_lr: 0.000383  loss: 3.0320 (2.7965)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2639 (1.2691)  time: 0.5328  data: 0.0007  max mem: 53905
Epoch: [243] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.000383  min_lr: 0.000383  loss: 3.0320 (2.7903)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2639 (1.2691)
Test:  [ 0/50]  eta: 0:03:23  loss: 0.5899 (0.5899)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 4.0773  data: 3.7658  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 0.8382 (0.8477)  acc1: 90.0000 (88.2909)  acc5: 98.4000 (98.2546)  time: 0.6264  data: 0.3428  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8925 (0.8847)  acc1: 85.2000 (87.2571)  acc5: 98.0000 (98.0952)  time: 0.2814  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.0226 (0.9682)  acc1: 83.2000 (85.4581)  acc5: 97.2000 (97.2516)  time: 0.2816  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1895 (1.0192)  acc1: 80.4000 (84.0683)  acc5: 95.2000 (96.8683)  time: 0.2813  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1895 (1.0392)  acc1: 80.0000 (83.2720)  acc5: 96.4000 (96.7840)  time: 0.2811  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3601 s / it)
* Acc@1 83.692 Acc@5 96.806 loss 1.032
Accuracy of the model on the 50000 test images: 83.7%
Max accuracy: 83.69%
Epoch: [244]  [   0/2502]  eta: 1:44:10  lr: 0.000383  min_lr: 0.000383  loss: 3.2083 (3.2083)  weight_decay: 0.0500 (0.0500)  time: 2.4982  data: 1.9080  max mem: 53905
Epoch: [244]  [ 200/2502]  eta: 0:23:06  lr: 0.000382  min_lr: 0.000382  loss: 2.9023 (2.7738)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2387 (1.2688)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [244]  [ 400/2502]  eta: 0:20:54  lr: 0.000381  min_lr: 0.000381  loss: 2.8547 (2.7556)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2421 (1.2833)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [244]  [ 600/2502]  eta: 0:18:52  lr: 0.000380  min_lr: 0.000380  loss: 2.8301 (2.7559)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1985 (1.2736)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [244]  [ 800/2502]  eta: 0:16:52  lr: 0.000379  min_lr: 0.000379  loss: 2.8368 (2.7562)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1505 (1.2563)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [244]  [1000/2502]  eta: 0:14:52  lr: 0.000378  min_lr: 0.000378  loss: 2.7735 (2.7553)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2412 (1.2545)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [244]  [1200/2502]  eta: 0:12:53  lr: 0.000377  min_lr: 0.000377  loss: 2.6930 (2.7573)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1939 (1.2579)  time: 0.5968  data: 0.0004  max mem: 53905
Epoch: [244]  [1400/2502]  eta: 0:10:54  lr: 0.000376  min_lr: 0.000376  loss: 2.7454 (2.7638)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2681 (1.2633)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [244]  [1600/2502]  eta: 0:08:55  lr: 0.000374  min_lr: 0.000374  loss: 2.8024 (2.7669)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2370 (1.2697)  time: 0.5945  data: 0.0005  max mem: 53905
Epoch: [244]  [1800/2502]  eta: 0:06:56  lr: 0.000373  min_lr: 0.000373  loss: 2.8500 (2.7658)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1990 (1.2707)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [244]  [2000/2502]  eta: 0:04:57  lr: 0.000372  min_lr: 0.000372  loss: 2.8530 (2.7660)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2284 (1.2713)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [244]  [2200/2502]  eta: 0:02:59  lr: 0.000371  min_lr: 0.000371  loss: 2.8926 (2.7707)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2797 (1.2702)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [244]  [2400/2502]  eta: 0:01:00  lr: 0.000370  min_lr: 0.000370  loss: 3.0364 (2.7743)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3471 (1.2710)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [244]  [2501/2502]  eta: 0:00:00  lr: 0.000370  min_lr: 0.000370  loss: 2.8236 (2.7773)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2802 (1.2724)  time: 0.5328  data: 0.0007  max mem: 53905
Epoch: [244] Total time: 0:24:42 (0.5925 s / it)
Averaged stats: lr: 0.000370  min_lr: 0.000370  loss: 2.8236 (2.7816)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2802 (1.2724)
Test:  [ 0/50]  eta: 0:03:17  loss: 0.4756 (0.4756)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.9561  data: 3.6354  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.7657 (0.7437)  acc1: 88.8000 (88.4364)  acc5: 98.4000 (97.9636)  time: 0.6154  data: 0.3309  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7844 (0.7727)  acc1: 86.4000 (87.5048)  acc5: 98.0000 (97.9810)  time: 0.2815  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9124 (0.8599)  acc1: 83.2000 (85.6000)  acc5: 97.2000 (97.2000)  time: 0.2818  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0519 (0.9113)  acc1: 80.8000 (84.2439)  acc5: 95.6000 (96.8683)  time: 0.2818  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0512 (0.9277)  acc1: 80.0000 (83.5600)  acc5: 96.0000 (96.8080)  time: 0.2816  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3574 s / it)
* Acc@1 83.780 Acc@5 96.804 loss 0.920
Accuracy of the model on the 50000 test images: 83.8%
Max accuracy: 83.78%
Epoch: [245]  [   0/2502]  eta: 1:41:31  lr: 0.000370  min_lr: 0.000370  loss: 3.1507 (3.1507)  weight_decay: 0.0500 (0.0500)  time: 2.4347  data: 1.8367  max mem: 53905
Epoch: [245]  [ 200/2502]  eta: 0:23:06  lr: 0.000369  min_lr: 0.000369  loss: 2.8613 (2.8223)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2663 (1.2615)  time: 0.6036  data: 0.0004  max mem: 53905
Epoch: [245]  [ 400/2502]  eta: 0:20:56  lr: 0.000368  min_lr: 0.000368  loss: 2.9207 (2.8260)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2733 (1.3121)  time: 0.5919  data: 0.0003  max mem: 53905
Epoch: [245]  [ 600/2502]  eta: 0:18:53  lr: 0.000367  min_lr: 0.000367  loss: 2.8664 (2.8109)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2495 (1.3111)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [245]  [ 800/2502]  eta: 0:16:53  lr: 0.000366  min_lr: 0.000366  loss: 3.0204 (2.7925)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2148 (1.2959)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [245]  [1000/2502]  eta: 0:14:53  lr: 0.000365  min_lr: 0.000365  loss: 2.7845 (2.7926)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2134 (1.2938)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [245]  [1200/2502]  eta: 0:12:53  lr: 0.000364  min_lr: 0.000364  loss: 2.7808 (2.7927)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2901 (1.3034)  time: 0.5903  data: 0.0005  max mem: 53905
Epoch: [245]  [1400/2502]  eta: 0:10:54  lr: 0.000363  min_lr: 0.000363  loss: 3.0074 (2.7892)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2407 (1.2972)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [245]  [1600/2502]  eta: 0:08:55  lr: 0.000362  min_lr: 0.000362  loss: 2.9544 (2.7875)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2600 (1.2932)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [245]  [1800/2502]  eta: 0:06:56  lr: 0.000360  min_lr: 0.000360  loss: 2.7968 (2.7883)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1424 (1.2899)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [245]  [2000/2502]  eta: 0:04:57  lr: 0.000359  min_lr: 0.000359  loss: 2.8246 (2.7853)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2815 (1.2877)  time: 0.5946  data: 0.0004  max mem: 53905
Epoch: [245]  [2200/2502]  eta: 0:02:59  lr: 0.000358  min_lr: 0.000358  loss: 2.5072 (2.7818)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3017 (1.2927)  time: 0.5902  data: 0.0004  max mem: 53905
Epoch: [245]  [2400/2502]  eta: 0:01:00  lr: 0.000357  min_lr: 0.000357  loss: 2.9836 (2.7835)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2381 (1.2988)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [245]  [2501/2502]  eta: 0:00:00  lr: 0.000357  min_lr: 0.000357  loss: 3.0636 (2.7855)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2873 (1.3001)  time: 0.5326  data: 0.0006  max mem: 53905
Epoch: [245] Total time: 0:24:43 (0.5929 s / it)
Averaged stats: lr: 0.000357  min_lr: 0.000357  loss: 3.0636 (2.7769)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2873 (1.3001)
Test:  [ 0/50]  eta: 0:02:27  loss: 0.5289 (0.5289)  acc1: 95.6000 (95.6000)  acc5: 99.2000 (99.2000)  time: 2.9494  data: 2.6334  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.7584 (0.7736)  acc1: 90.0000 (88.7273)  acc5: 98.0000 (97.8909)  time: 0.5262  data: 0.2399  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8056 (0.8109)  acc1: 86.4000 (87.7524)  acc5: 97.6000 (97.9048)  time: 0.2841  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9580 (0.8890)  acc1: 83.2000 (85.5484)  acc5: 96.8000 (97.3032)  time: 0.2842  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0605 (0.9386)  acc1: 79.6000 (83.9707)  acc5: 95.6000 (96.9854)  time: 0.2839  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0585 (0.9586)  acc1: 79.2000 (83.2560)  acc5: 96.0000 (96.9040)  time: 0.2837  data: 0.0002  max mem: 53905
Test: Total time: 0:00:16 (0.3397 s / it)
* Acc@1 83.616 Acc@5 96.866 loss 0.951
Accuracy of the model on the 50000 test images: 83.6%
Max accuracy: 83.78%
Epoch: [246]  [   0/2502]  eta: 1:46:35  lr: 0.000357  min_lr: 0.000357  loss: 3.0000 (3.0000)  weight_decay: 0.0500 (0.0500)  time: 2.5560  data: 1.9560  max mem: 53905
Epoch: [246]  [ 200/2502]  eta: 0:23:08  lr: 0.000356  min_lr: 0.000356  loss: 2.9850 (2.7554)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1820 (1.2548)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [246]  [ 400/2502]  eta: 0:20:57  lr: 0.000355  min_lr: 0.000355  loss: 2.9463 (2.7464)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2011 (1.2623)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [246]  [ 600/2502]  eta: 0:18:55  lr: 0.000354  min_lr: 0.000354  loss: 2.9597 (2.7664)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2393 (1.2632)  time: 0.6086  data: 0.0004  max mem: 53905
Epoch: [246]  [ 800/2502]  eta: 0:16:54  lr: 0.000353  min_lr: 0.000353  loss: 3.0096 (2.7707)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2529 (1.2689)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [246]  [1000/2502]  eta: 0:14:54  lr: 0.000352  min_lr: 0.000352  loss: 2.8830 (2.7737)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1341 (1.2599)  time: 0.5970  data: 0.0006  max mem: 53905
Epoch: [246]  [1200/2502]  eta: 0:12:55  lr: 0.000351  min_lr: 0.000351  loss: 2.8727 (2.7764)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2503 (1.2615)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [246]  [1400/2502]  eta: 0:10:55  lr: 0.000350  min_lr: 0.000350  loss: 2.9587 (2.7783)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2614 (1.2720)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [246]  [1600/2502]  eta: 0:08:56  lr: 0.000349  min_lr: 0.000349  loss: 2.8112 (2.7797)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2521 (1.2745)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [246]  [1800/2502]  eta: 0:06:57  lr: 0.000348  min_lr: 0.000348  loss: 3.0207 (2.7772)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3527 (1.2815)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [246]  [2000/2502]  eta: 0:04:58  lr: 0.000347  min_lr: 0.000347  loss: 2.9376 (2.7757)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2912 (1.2841)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [246]  [2200/2502]  eta: 0:02:59  lr: 0.000346  min_lr: 0.000346  loss: 2.7826 (2.7737)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3930 (1.2909)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [246]  [2400/2502]  eta: 0:01:00  lr: 0.000345  min_lr: 0.000345  loss: 2.8772 (2.7747)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2860 (1.2921)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [246]  [2501/2502]  eta: 0:00:00  lr: 0.000344  min_lr: 0.000344  loss: 2.8647 (2.7724)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2197 (1.2909)  time: 0.5328  data: 0.0007  max mem: 53905
Epoch: [246] Total time: 0:24:45 (0.5939 s / it)
Averaged stats: lr: 0.000344  min_lr: 0.000344  loss: 2.8647 (2.7666)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2197 (1.2909)
Test:  [ 0/50]  eta: 0:03:02  loss: 0.4903 (0.4903)  acc1: 95.6000 (95.6000)  acc5: 99.2000 (99.2000)  time: 3.6573  data: 3.3247  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7652 (0.7584)  acc1: 88.4000 (88.3273)  acc5: 98.4000 (98.1818)  time: 0.5909  data: 0.3027  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7965 (0.7896)  acc1: 86.4000 (87.5048)  acc5: 98.0000 (98.0191)  time: 0.2846  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9380 (0.8708)  acc1: 83.2000 (85.7032)  acc5: 96.8000 (97.1871)  time: 0.2849  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0734 (0.9241)  acc1: 80.8000 (84.3707)  acc5: 95.2000 (96.8098)  time: 0.2846  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0734 (0.9426)  acc1: 80.8000 (83.6320)  acc5: 96.0000 (96.7760)  time: 0.2842  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3547 s / it)
* Acc@1 83.906 Acc@5 96.840 loss 0.936
Accuracy of the model on the 50000 test images: 83.9%
Max accuracy: 83.91%
Epoch: [247]  [   0/2502]  eta: 1:46:19  lr: 0.000344  min_lr: 0.000344  loss: 3.2288 (3.2288)  weight_decay: 0.0500 (0.0500)  time: 2.5498  data: 1.9433  max mem: 53905
Epoch: [247]  [ 200/2502]  eta: 0:23:05  lr: 0.000343  min_lr: 0.000343  loss: 2.8360 (2.7401)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3200 (1.3448)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [247]  [ 400/2502]  eta: 0:20:56  lr: 0.000342  min_lr: 0.000342  loss: 2.7982 (2.7477)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2483 (1.3056)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [247]  [ 600/2502]  eta: 0:18:54  lr: 0.000341  min_lr: 0.000341  loss: 2.7072 (2.7746)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2616 (1.3008)  time: 0.5921  data: 0.0008  max mem: 53905
Epoch: [247]  [ 800/2502]  eta: 0:16:53  lr: 0.000340  min_lr: 0.000340  loss: 2.8930 (2.7676)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3428 (1.3256)  time: 0.5957  data: 0.0007  max mem: 53905
Epoch: [247]  [1000/2502]  eta: 0:14:53  lr: 0.000339  min_lr: 0.000339  loss: 2.9193 (2.7676)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2717 (1.3386)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [247]  [1200/2502]  eta: 0:12:54  lr: 0.000338  min_lr: 0.000338  loss: 3.0032 (2.7570)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2794 (1.3303)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [247]  [1400/2502]  eta: 0:10:55  lr: 0.000337  min_lr: 0.000337  loss: 2.8765 (2.7587)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3502 (1.3289)  time: 0.5942  data: 0.0006  max mem: 53905
Epoch: [247]  [1600/2502]  eta: 0:08:56  lr: 0.000336  min_lr: 0.000336  loss: 2.9147 (2.7602)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2823 (1.3260)  time: 0.6018  data: 0.0007  max mem: 53905
Epoch: [247]  [1800/2502]  eta: 0:06:57  lr: 0.000335  min_lr: 0.000335  loss: 2.9731 (2.7662)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5930  data: 0.0008  max mem: 53905
Epoch: [247]  [2000/2502]  eta: 0:04:58  lr: 0.000334  min_lr: 0.000334  loss: 2.7346 (2.7647)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2573 (nan)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [247]  [2200/2502]  eta: 0:02:59  lr: 0.000333  min_lr: 0.000333  loss: 2.7437 (2.7663)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3273 (nan)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [247]  [2400/2502]  eta: 0:01:00  lr: 0.000332  min_lr: 0.000332  loss: 2.8740 (2.7624)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1493 (nan)  time: 0.5908  data: 0.0007  max mem: 53905
Epoch: [247]  [2501/2502]  eta: 0:00:00  lr: 0.000332  min_lr: 0.000332  loss: 2.9423 (2.7637)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3604 (nan)  time: 0.5325  data: 0.0008  max mem: 53905
Epoch: [247] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.000332  min_lr: 0.000332  loss: 2.9423 (2.7683)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3604 (nan)
Test:  [ 0/50]  eta: 0:02:46  loss: 0.4597 (0.4597)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.3262  data: 3.0095  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7405 (0.7197)  acc1: 88.4000 (88.9091)  acc5: 98.8000 (98.1455)  time: 0.5644  data: 0.2800  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.7512 (0.7523)  acc1: 86.0000 (87.9619)  acc5: 98.0000 (98.1524)  time: 0.2851  data: 0.0038  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.8899 (0.8357)  acc1: 84.0000 (86.0129)  acc5: 97.2000 (97.3032)  time: 0.2824  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0615 (0.8867)  acc1: 80.4000 (84.5073)  acc5: 95.6000 (96.9659)  time: 0.2824  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0615 (0.9063)  acc1: 78.8000 (83.7600)  acc5: 96.0000 (96.8800)  time: 0.2821  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3468 s / it)
* Acc@1 83.866 Acc@5 96.838 loss 0.902
Accuracy of the model on the 50000 test images: 83.9%
Max accuracy: 83.91%
Epoch: [248]  [   0/2502]  eta: 1:59:57  lr: 0.000332  min_lr: 0.000332  loss: 2.4592 (2.4592)  weight_decay: 0.0500 (0.0500)  time: 2.8768  data: 1.9680  max mem: 53905
Epoch: [248]  [ 200/2502]  eta: 0:23:13  lr: 0.000331  min_lr: 0.000331  loss: 2.8274 (2.7193)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2299 (1.2910)  time: 0.5942  data: 0.0008  max mem: 53905
Epoch: [248]  [ 400/2502]  eta: 0:20:59  lr: 0.000330  min_lr: 0.000330  loss: 2.8644 (2.7429)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2987 (1.3121)  time: 0.5919  data: 0.0007  max mem: 53905
Epoch: [248]  [ 600/2502]  eta: 0:18:56  lr: 0.000329  min_lr: 0.000329  loss: 2.8125 (2.7462)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2383 (1.3124)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [248]  [ 800/2502]  eta: 0:16:55  lr: 0.000328  min_lr: 0.000328  loss: 2.8270 (2.7496)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1782 (1.2963)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [248]  [1000/2502]  eta: 0:14:54  lr: 0.000327  min_lr: 0.000327  loss: 2.6210 (2.7520)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2680 (1.3038)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [248]  [1200/2502]  eta: 0:12:55  lr: 0.000326  min_lr: 0.000326  loss: 2.8709 (2.7507)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2904 (1.3087)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [248]  [1400/2502]  eta: 0:10:55  lr: 0.000325  min_lr: 0.000325  loss: 2.7967 (2.7564)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4345 (1.3171)  time: 0.5934  data: 0.0006  max mem: 53905
Epoch: [248]  [1600/2502]  eta: 0:08:56  lr: 0.000324  min_lr: 0.000324  loss: 2.6919 (2.7509)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2975 (1.3122)  time: 0.5937  data: 0.0007  max mem: 53905
Epoch: [248]  [1800/2502]  eta: 0:06:57  lr: 0.000323  min_lr: 0.000323  loss: 2.9488 (2.7491)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3352 (1.3205)  time: 0.5930  data: 0.0007  max mem: 53905
Epoch: [248]  [2000/2502]  eta: 0:04:58  lr: 0.000322  min_lr: 0.000322  loss: 3.0127 (2.7505)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3076 (1.3204)  time: 0.5907  data: 0.0008  max mem: 53905
Epoch: [248]  [2200/2502]  eta: 0:02:59  lr: 0.000321  min_lr: 0.000321  loss: 2.9529 (2.7543)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2764 (1.3191)  time: 0.5920  data: 0.0006  max mem: 53905
Epoch: [248]  [2400/2502]  eta: 0:01:00  lr: 0.000320  min_lr: 0.000320  loss: 3.0393 (2.7536)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2431 (1.3187)  time: 0.5924  data: 0.0007  max mem: 53905
Epoch: [248]  [2501/2502]  eta: 0:00:00  lr: 0.000320  min_lr: 0.000320  loss: 2.7411 (2.7536)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1951 (1.3165)  time: 0.5336  data: 0.0007  max mem: 53905
Epoch: [248] Total time: 0:24:46 (0.5941 s / it)
Averaged stats: lr: 0.000320  min_lr: 0.000320  loss: 2.7411 (2.7585)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1951 (1.3165)
Test:  [ 0/50]  eta: 0:03:15  loss: 0.4776 (0.4776)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.9180  data: 3.5957  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.7406 (0.7182)  acc1: 88.4000 (88.8000)  acc5: 98.8000 (98.2182)  time: 0.6148  data: 0.3273  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7559 (0.7508)  acc1: 85.6000 (87.6762)  acc5: 98.0000 (98.0952)  time: 0.2848  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9042 (0.8304)  acc1: 84.0000 (85.7677)  acc5: 96.8000 (97.2258)  time: 0.2851  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0285 (0.8778)  acc1: 79.2000 (84.3024)  acc5: 95.2000 (96.9463)  time: 0.2847  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 0.9703 (0.8964)  acc1: 78.8000 (83.5840)  acc5: 96.0000 (96.8560)  time: 0.2842  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3595 s / it)
* Acc@1 83.874 Acc@5 96.880 loss 0.890
Accuracy of the model on the 50000 test images: 83.9%
Max accuracy: 83.91%
Epoch: [249]  [   0/2502]  eta: 1:51:55  lr: 0.000320  min_lr: 0.000320  loss: 3.0040 (3.0040)  weight_decay: 0.0500 (0.0500)  time: 2.6841  data: 1.7730  max mem: 53905
Epoch: [249]  [ 200/2502]  eta: 0:23:05  lr: 0.000319  min_lr: 0.000319  loss: 2.7760 (2.7183)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3527 (1.3374)  time: 0.5903  data: 0.0006  max mem: 53905
Epoch: [249]  [ 400/2502]  eta: 0:20:58  lr: 0.000318  min_lr: 0.000318  loss: 2.8474 (2.7372)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3084 (1.3376)  time: 0.6018  data: 0.0010  max mem: 53905
Epoch: [249]  [ 600/2502]  eta: 0:18:54  lr: 0.000317  min_lr: 0.000317  loss: 2.4288 (2.7223)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2273 (1.3373)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [249]  [ 800/2502]  eta: 0:16:53  lr: 0.000316  min_lr: 0.000316  loss: 2.7334 (2.7320)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2691 (1.3339)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [249]  [1000/2502]  eta: 0:14:53  lr: 0.000315  min_lr: 0.000315  loss: 2.6605 (2.7238)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4611 (1.3490)  time: 0.5915  data: 0.0007  max mem: 53905
Epoch: [249]  [1200/2502]  eta: 0:12:53  lr: 0.000314  min_lr: 0.000314  loss: 2.6464 (2.7261)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2213 (1.3407)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [249]  [1400/2502]  eta: 0:10:54  lr: 0.000313  min_lr: 0.000313  loss: 2.7729 (2.7267)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3176 (1.3407)  time: 0.5958  data: 0.0005  max mem: 53905
Epoch: [249]  [1600/2502]  eta: 0:08:55  lr: 0.000312  min_lr: 0.000312  loss: 2.8753 (2.7278)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1997 (1.3349)  time: 0.5912  data: 0.0008  max mem: 53905
Epoch: [249]  [1800/2502]  eta: 0:06:56  lr: 0.000311  min_lr: 0.000311  loss: 2.9199 (2.7311)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3981 (1.3435)  time: 0.6006  data: 0.0007  max mem: 53905
Epoch: [249]  [2000/2502]  eta: 0:04:58  lr: 0.000310  min_lr: 0.000310  loss: 2.7827 (2.7329)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2825 (1.3530)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [249]  [2200/2502]  eta: 0:02:59  lr: 0.000309  min_lr: 0.000309  loss: 2.9392 (2.7355)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2781 (1.3463)  time: 0.5917  data: 0.0006  max mem: 53905
Epoch: [249]  [2400/2502]  eta: 0:01:00  lr: 0.000308  min_lr: 0.000308  loss: 2.6682 (2.7354)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2583 (1.3469)  time: 0.5923  data: 0.0007  max mem: 53905
Epoch: [249]  [2501/2502]  eta: 0:00:00  lr: 0.000307  min_lr: 0.000307  loss: 2.7220 (2.7356)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2083 (1.3471)  time: 0.5430  data: 0.0007  max mem: 53905
Epoch: [249] Total time: 0:24:45 (0.5936 s / it)
Averaged stats: lr: 0.000307  min_lr: 0.000307  loss: 2.7220 (2.7533)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2083 (1.3471)
Test:  [ 0/50]  eta: 0:02:38  loss: 0.4664 (0.4664)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.1621  data: 2.8224  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7305 (0.7299)  acc1: 89.2000 (88.7273)  acc5: 98.8000 (98.4000)  time: 0.5620  data: 0.2728  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.7521 (0.7618)  acc1: 85.2000 (87.5810)  acc5: 98.0000 (98.2095)  time: 0.2949  data: 0.0092  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.8713 (0.8368)  acc1: 84.0000 (85.9613)  acc5: 96.8000 (97.5226)  time: 0.2898  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0042 (0.8887)  acc1: 80.4000 (84.5073)  acc5: 95.6000 (97.1317)  time: 0.2882  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0000 (0.9073)  acc1: 79.2000 (83.8640)  acc5: 96.0000 (97.0480)  time: 0.2846  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3501 s / it)
* Acc@1 83.952 Acc@5 96.904 loss 0.904
Accuracy of the model on the 50000 test images: 84.0%
Max accuracy: 83.95%
Epoch: [250]  [   0/2502]  eta: 1:51:58  lr: 0.000307  min_lr: 0.000307  loss: 2.8385 (2.8385)  weight_decay: 0.0500 (0.0500)  time: 2.6854  data: 2.0809  max mem: 53905
Epoch: [250]  [ 200/2502]  eta: 0:23:05  lr: 0.000307  min_lr: 0.000307  loss: 2.6072 (2.7133)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3545 (1.2983)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [250]  [ 400/2502]  eta: 0:20:55  lr: 0.000306  min_lr: 0.000306  loss: 2.7071 (2.7354)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3637 (1.3508)  time: 0.5997  data: 0.0004  max mem: 53905
Epoch: [250]  [ 600/2502]  eta: 0:18:54  lr: 0.000305  min_lr: 0.000305  loss: 2.8182 (2.7406)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3210 (1.3420)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [250]  [ 800/2502]  eta: 0:16:53  lr: 0.000304  min_lr: 0.000304  loss: 2.5456 (2.7349)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4089 (1.3446)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [250]  [1000/2502]  eta: 0:14:53  lr: 0.000303  min_lr: 0.000303  loss: 2.5562 (2.7391)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1980 (1.3383)  time: 0.5902  data: 0.0004  max mem: 53905
Epoch: [250]  [1200/2502]  eta: 0:12:53  lr: 0.000302  min_lr: 0.000302  loss: 2.6640 (2.7392)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2288 (1.3310)  time: 0.5988  data: 0.0005  max mem: 53905
Epoch: [250]  [1400/2502]  eta: 0:10:54  lr: 0.000301  min_lr: 0.000301  loss: 2.8679 (2.7349)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3457 (1.3299)  time: 0.5907  data: 0.0005  max mem: 53905
Epoch: [250]  [1600/2502]  eta: 0:08:55  lr: 0.000300  min_lr: 0.000300  loss: 2.7296 (2.7411)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2666 (1.3282)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [250]  [1800/2502]  eta: 0:06:56  lr: 0.000299  min_lr: 0.000299  loss: 2.7994 (2.7439)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2023 (1.3234)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [250]  [2000/2502]  eta: 0:04:57  lr: 0.000298  min_lr: 0.000298  loss: 2.8388 (2.7398)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3966 (1.3245)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [250]  [2200/2502]  eta: 0:02:59  lr: 0.000297  min_lr: 0.000297  loss: 2.9194 (2.7410)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2764 (1.3257)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [250]  [2400/2502]  eta: 0:01:00  lr: 0.000296  min_lr: 0.000296  loss: 2.8312 (2.7409)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2987 (1.3351)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [250]  [2501/2502]  eta: 0:00:00  lr: 0.000296  min_lr: 0.000296  loss: 2.8660 (2.7382)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2995 (1.3428)  time: 0.5331  data: 0.0009  max mem: 53905
Epoch: [250] Total time: 0:24:43 (0.5930 s / it)
Averaged stats: lr: 0.000296  min_lr: 0.000296  loss: 2.8660 (2.7402)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2995 (1.3428)
Test:  [ 0/50]  eta: 0:03:04  loss: 0.5545 (0.5545)  acc1: 96.8000 (96.8000)  acc5: 99.2000 (99.2000)  time: 3.6994  data: 3.3848  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8283 (0.8245)  acc1: 88.8000 (88.6182)  acc5: 98.4000 (98.0727)  time: 0.5949  data: 0.3081  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8435 (0.8495)  acc1: 86.8000 (87.5810)  acc5: 98.0000 (98.0952)  time: 0.2847  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9589 (0.9320)  acc1: 84.0000 (85.7677)  acc5: 97.2000 (97.2258)  time: 0.2850  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1411 (0.9844)  acc1: 81.2000 (84.3317)  acc5: 95.2000 (96.8390)  time: 0.2846  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1411 (1.0057)  acc1: 78.8000 (83.5920)  acc5: 95.6000 (96.7920)  time: 0.2842  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3558 s / it)
* Acc@1 83.868 Acc@5 96.824 loss 0.999
Accuracy of the model on the 50000 test images: 83.9%
Max accuracy: 83.95%
Epoch: [251]  [   0/2502]  eta: 1:46:01  lr: 0.000296  min_lr: 0.000296  loss: 3.0368 (3.0368)  weight_decay: 0.0500 (0.0500)  time: 2.5425  data: 1.9462  max mem: 53905
Epoch: [251]  [ 200/2502]  eta: 0:23:08  lr: 0.000295  min_lr: 0.000295  loss: 2.8339 (2.6791)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2554 (1.2946)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [251]  [ 400/2502]  eta: 0:20:56  lr: 0.000294  min_lr: 0.000294  loss: 2.8546 (2.7018)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4144 (1.3390)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [251]  [ 600/2502]  eta: 0:18:52  lr: 0.000293  min_lr: 0.000293  loss: 2.7254 (2.7075)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3621 (1.3427)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [251]  [ 800/2502]  eta: 0:16:52  lr: 0.000292  min_lr: 0.000292  loss: 2.7657 (2.7145)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3324 (inf)  time: 0.5952  data: 0.0004  max mem: 53905
Epoch: [251]  [1000/2502]  eta: 0:14:53  lr: 0.000291  min_lr: 0.000291  loss: 2.3189 (2.7144)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4216 (inf)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [251]  [1200/2502]  eta: 0:12:53  lr: 0.000290  min_lr: 0.000290  loss: 2.8640 (2.7247)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3260 (inf)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [251]  [1400/2502]  eta: 0:10:54  lr: 0.000289  min_lr: 0.000289  loss: 2.9068 (2.7288)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3437 (inf)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [251]  [1600/2502]  eta: 0:08:55  lr: 0.000288  min_lr: 0.000288  loss: 2.9580 (2.7290)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3592 (inf)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [251]  [1800/2502]  eta: 0:06:56  lr: 0.000287  min_lr: 0.000287  loss: 2.8428 (2.7302)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3141 (inf)  time: 0.5992  data: 0.0005  max mem: 53905
Epoch: [251]  [2000/2502]  eta: 0:04:58  lr: 0.000286  min_lr: 0.000286  loss: 2.7805 (2.7337)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3911 (inf)  time: 0.5946  data: 0.0005  max mem: 53905
Epoch: [251]  [2200/2502]  eta: 0:02:59  lr: 0.000285  min_lr: 0.000285  loss: 3.0462 (2.7380)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3430 (inf)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [251]  [2400/2502]  eta: 0:01:00  lr: 0.000284  min_lr: 0.000284  loss: 2.8521 (2.7404)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3613 (inf)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [251]  [2501/2502]  eta: 0:00:00  lr: 0.000284  min_lr: 0.000284  loss: 3.0202 (2.7421)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5338  data: 0.0007  max mem: 53905
Epoch: [251] Total time: 0:24:45 (0.5935 s / it)
Averaged stats: lr: 0.000284  min_lr: 0.000284  loss: 3.0202 (2.7430)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)
Test:  [ 0/50]  eta: 0:02:45  loss: 0.5780 (0.5780)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.3080  data: 2.9735  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8208 (0.8076)  acc1: 88.8000 (88.6545)  acc5: 98.8000 (98.1091)  time: 0.5959  data: 0.3073  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8413 (0.8448)  acc1: 85.6000 (87.5810)  acc5: 98.0000 (98.0571)  time: 0.3049  data: 0.0206  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9714 (0.9240)  acc1: 83.6000 (85.7290)  acc5: 96.8000 (97.2387)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1156 (0.9727)  acc1: 80.8000 (84.3805)  acc5: 95.6000 (96.8976)  time: 0.2846  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1119 (0.9919)  acc1: 80.0000 (83.7040)  acc5: 95.6000 (96.7760)  time: 0.2844  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3552 s / it)
* Acc@1 83.830 Acc@5 96.824 loss 0.985
Accuracy of the model on the 50000 test images: 83.8%
Max accuracy: 83.95%
Epoch: [252]  [   0/2502]  eta: 1:57:51  lr: 0.000284  min_lr: 0.000284  loss: 2.9805 (2.9805)  weight_decay: 0.0500 (0.0500)  time: 2.8264  data: 1.9825  max mem: 53905
Epoch: [252]  [ 200/2502]  eta: 0:23:07  lr: 0.000283  min_lr: 0.000283  loss: 2.7815 (2.7043)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2479 (1.2897)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [252]  [ 400/2502]  eta: 0:20:57  lr: 0.000282  min_lr: 0.000282  loss: 2.6810 (2.6851)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2782 (1.3367)  time: 0.5913  data: 0.0005  max mem: 53905
Epoch: [252]  [ 600/2502]  eta: 0:18:55  lr: 0.000281  min_lr: 0.000281  loss: 2.7864 (2.6933)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3164 (1.3379)  time: 0.5926  data: 0.0006  max mem: 53905
Epoch: [252]  [ 800/2502]  eta: 0:16:53  lr: 0.000280  min_lr: 0.000280  loss: 2.7392 (2.7011)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2654 (1.3307)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [252]  [1000/2502]  eta: 0:14:54  lr: 0.000279  min_lr: 0.000279  loss: 2.8519 (2.7082)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2745 (1.3364)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [252]  [1200/2502]  eta: 0:12:54  lr: 0.000279  min_lr: 0.000279  loss: 2.8082 (2.7096)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3210 (1.3410)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [252]  [1400/2502]  eta: 0:10:54  lr: 0.000278  min_lr: 0.000278  loss: 2.9259 (2.7178)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3185 (1.3496)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [252]  [1600/2502]  eta: 0:08:55  lr: 0.000277  min_lr: 0.000277  loss: 2.7322 (2.7162)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3184 (1.3479)  time: 0.5950  data: 0.0005  max mem: 53905
Epoch: [252]  [1800/2502]  eta: 0:06:56  lr: 0.000276  min_lr: 0.000276  loss: 2.7802 (2.7158)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2902 (1.3471)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [252]  [2000/2502]  eta: 0:04:58  lr: 0.000275  min_lr: 0.000275  loss: 3.0599 (2.7202)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3379 (1.3562)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [252]  [2200/2502]  eta: 0:02:59  lr: 0.000274  min_lr: 0.000274  loss: 2.7792 (2.7192)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3400 (1.3580)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [252]  [2400/2502]  eta: 0:01:00  lr: 0.000273  min_lr: 0.000273  loss: 2.8365 (2.7210)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3429 (1.3653)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [252]  [2501/2502]  eta: 0:00:00  lr: 0.000273  min_lr: 0.000273  loss: 2.9592 (2.7207)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2512 (1.3630)  time: 0.5330  data: 0.0009  max mem: 53905
Epoch: [252] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.000273  min_lr: 0.000273  loss: 2.9592 (2.7259)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2512 (1.3630)
Test:  [ 0/50]  eta: 0:02:44  loss: 0.5388 (0.5388)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.2801  data: 2.9467  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8133 (0.7980)  acc1: 89.6000 (88.9818)  acc5: 98.0000 (98.0364)  time: 0.6126  data: 0.3240  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8308 (0.8275)  acc1: 85.6000 (87.9619)  acc5: 98.0000 (98.1714)  time: 0.3155  data: 0.0311  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9424 (0.9075)  acc1: 85.2000 (86.1032)  acc5: 97.2000 (97.3032)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1079 (0.9615)  acc1: 80.4000 (84.4390)  acc5: 95.2000 (96.8683)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1079 (0.9824)  acc1: 79.6000 (83.7200)  acc5: 95.6000 (96.7760)  time: 0.2844  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3590 s / it)
* Acc@1 83.786 Acc@5 96.862 loss 0.977
Accuracy of the model on the 50000 test images: 83.8%
Max accuracy: 83.95%
Epoch: [253]  [   0/2502]  eta: 1:59:02  lr: 0.000273  min_lr: 0.000273  loss: 3.2989 (3.2989)  weight_decay: 0.0500 (0.0500)  time: 2.8547  data: 1.9172  max mem: 53905
Epoch: [253]  [ 200/2502]  eta: 0:23:11  lr: 0.000272  min_lr: 0.000272  loss: 2.9808 (2.7277)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3228 (1.4278)  time: 0.5934  data: 0.0004  max mem: 53905
Epoch: [253]  [ 400/2502]  eta: 0:21:00  lr: 0.000271  min_lr: 0.000271  loss: 2.3650 (2.7168)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2660 (1.3891)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [253]  [ 600/2502]  eta: 0:18:56  lr: 0.000270  min_lr: 0.000270  loss: 2.8662 (2.7200)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3559 (1.3853)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [253]  [ 800/2502]  eta: 0:16:54  lr: 0.000269  min_lr: 0.000269  loss: 2.7555 (2.7265)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3891 (1.3845)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [253]  [1000/2502]  eta: 0:14:53  lr: 0.000268  min_lr: 0.000268  loss: 2.7421 (2.7328)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3999 (1.3877)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [253]  [1200/2502]  eta: 0:12:54  lr: 0.000267  min_lr: 0.000267  loss: 2.6955 (2.7282)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3516 (1.3863)  time: 0.5983  data: 0.0006  max mem: 53905
Epoch: [253]  [1400/2502]  eta: 0:10:55  lr: 0.000266  min_lr: 0.000266  loss: 2.8358 (2.7345)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3976 (1.3860)  time: 0.5935  data: 0.0005  max mem: 53905
Epoch: [253]  [1600/2502]  eta: 0:08:55  lr: 0.000265  min_lr: 0.000265  loss: 2.9324 (2.7367)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4354 (1.4024)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [253]  [1800/2502]  eta: 0:06:57  lr: 0.000265  min_lr: 0.000265  loss: 2.7819 (2.7347)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3802 (1.4033)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [253]  [2000/2502]  eta: 0:04:58  lr: 0.000264  min_lr: 0.000264  loss: 2.7990 (2.7377)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4145 (1.4062)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [253]  [2200/2502]  eta: 0:02:59  lr: 0.000263  min_lr: 0.000263  loss: 2.7896 (2.7385)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3938 (1.4026)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [253]  [2400/2502]  eta: 0:01:00  lr: 0.000262  min_lr: 0.000262  loss: 2.8501 (2.7393)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3657 (1.4016)  time: 0.5902  data: 0.0005  max mem: 53905
Epoch: [253]  [2501/2502]  eta: 0:00:00  lr: 0.000261  min_lr: 0.000261  loss: 2.5468 (2.7383)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3886 (1.4030)  time: 0.5319  data: 0.0007  max mem: 53905
Epoch: [253] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.000261  min_lr: 0.000261  loss: 2.5468 (2.7343)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3886 (1.4030)
Test:  [ 0/50]  eta: 0:03:04  loss: 0.4959 (0.4959)  acc1: 96.0000 (96.0000)  acc5: 99.2000 (99.2000)  time: 3.6961  data: 3.3755  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7534 (0.7457)  acc1: 88.4000 (88.7273)  acc5: 98.8000 (98.3273)  time: 0.5925  data: 0.3073  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7692 (0.7773)  acc1: 86.0000 (87.8857)  acc5: 98.4000 (98.2476)  time: 0.2824  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9063 (0.8602)  acc1: 84.8000 (86.0645)  acc5: 96.8000 (97.3548)  time: 0.2827  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0746 (0.9120)  acc1: 80.0000 (84.5659)  acc5: 95.2000 (97.0049)  time: 0.2826  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0500 (0.9272)  acc1: 80.0000 (84.0000)  acc5: 96.0000 (96.9440)  time: 0.2824  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3540 s / it)
* Acc@1 83.978 Acc@5 96.974 loss 0.923
Accuracy of the model on the 50000 test images: 84.0%
Max accuracy: 83.98%
Epoch: [254]  [   0/2502]  eta: 1:33:14  lr: 0.000261  min_lr: 0.000261  loss: 2.0181 (2.0181)  weight_decay: 0.0500 (0.0500)  time: 2.2358  data: 1.6351  max mem: 53905
Epoch: [254]  [ 200/2502]  eta: 0:23:01  lr: 0.000261  min_lr: 0.000261  loss: 2.8540 (2.7220)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3281 (1.3930)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [254]  [ 400/2502]  eta: 0:20:54  lr: 0.000260  min_lr: 0.000260  loss: 2.7616 (2.7175)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3060 (1.3901)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [254]  [ 600/2502]  eta: 0:18:52  lr: 0.000259  min_lr: 0.000259  loss: 2.9422 (2.7337)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3690 (1.3693)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [254]  [ 800/2502]  eta: 0:16:52  lr: 0.000258  min_lr: 0.000258  loss: 2.8284 (2.7260)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3549 (1.3621)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [254]  [1000/2502]  eta: 0:14:52  lr: 0.000257  min_lr: 0.000257  loss: 2.7658 (2.7289)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3038 (1.3631)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [254]  [1200/2502]  eta: 0:12:53  lr: 0.000256  min_lr: 0.000256  loss: 2.8551 (2.7309)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2879 (1.3606)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [254]  [1400/2502]  eta: 0:10:54  lr: 0.000255  min_lr: 0.000255  loss: 2.6520 (2.7222)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2912 (1.3641)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [254]  [1600/2502]  eta: 0:08:55  lr: 0.000254  min_lr: 0.000254  loss: 2.6823 (2.7220)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3419 (1.3719)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [254]  [1800/2502]  eta: 0:06:56  lr: 0.000254  min_lr: 0.000254  loss: 2.9115 (2.7218)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3826 (1.3808)  time: 0.5998  data: 0.0004  max mem: 53905
Epoch: [254]  [2000/2502]  eta: 0:04:57  lr: 0.000253  min_lr: 0.000253  loss: 2.7300 (2.7243)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4255 (1.3896)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [254]  [2200/2502]  eta: 0:02:59  lr: 0.000252  min_lr: 0.000252  loss: 2.7322 (2.7265)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3444 (1.3933)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [254]  [2400/2502]  eta: 0:01:00  lr: 0.000251  min_lr: 0.000251  loss: 2.8347 (2.7282)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3010 (1.3925)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [254]  [2501/2502]  eta: 0:00:00  lr: 0.000251  min_lr: 0.000251  loss: 2.9026 (2.7283)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3838 (1.3925)  time: 0.5339  data: 0.0008  max mem: 53905
Epoch: [254] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.000251  min_lr: 0.000251  loss: 2.9026 (2.7206)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3838 (1.3925)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.6309 (0.6309)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.0152  data: 2.6903  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8781 (0.8620)  acc1: 89.6000 (89.0182)  acc5: 98.4000 (98.1455)  time: 0.5680  data: 0.2802  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8902 (0.8944)  acc1: 86.4000 (87.9619)  acc5: 97.6000 (98.0191)  time: 0.3042  data: 0.0198  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0491 (0.9792)  acc1: 84.0000 (86.0129)  acc5: 96.4000 (97.2258)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1814 (1.0328)  acc1: 81.2000 (84.5073)  acc5: 95.6000 (96.8000)  time: 0.2847  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1906 (1.0481)  acc1: 80.0000 (83.9280)  acc5: 95.6000 (96.7200)  time: 0.2845  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3491 s / it)
* Acc@1 83.990 Acc@5 96.828 loss 1.043
Accuracy of the model on the 50000 test images: 84.0%
Max accuracy: 83.99%
Epoch: [255]  [   0/2502]  eta: 1:38:25  lr: 0.000250  min_lr: 0.000250  loss: 3.0641 (3.0641)  weight_decay: 0.0500 (0.0500)  time: 2.3602  data: 1.7482  max mem: 53905
Epoch: [255]  [ 200/2502]  eta: 0:23:08  lr: 0.000250  min_lr: 0.000250  loss: 2.9530 (2.7190)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3395 (1.3915)  time: 0.6010  data: 0.0005  max mem: 53905
Epoch: [255]  [ 400/2502]  eta: 0:20:57  lr: 0.000249  min_lr: 0.000249  loss: 2.6553 (2.7215)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3028 (1.3715)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [255]  [ 600/2502]  eta: 0:18:54  lr: 0.000248  min_lr: 0.000248  loss: 2.8514 (2.7236)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4024 (1.3692)  time: 0.5988  data: 0.0005  max mem: 53905
Epoch: [255]  [ 800/2502]  eta: 0:16:54  lr: 0.000247  min_lr: 0.000247  loss: 2.5646 (2.7200)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4482 (1.4077)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [255]  [1000/2502]  eta: 0:14:54  lr: 0.000246  min_lr: 0.000246  loss: 2.9122 (2.7144)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3049 (1.4002)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [255]  [1200/2502]  eta: 0:12:55  lr: 0.000245  min_lr: 0.000245  loss: 2.6500 (2.7155)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3382 (1.3898)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [255]  [1400/2502]  eta: 0:10:56  lr: 0.000244  min_lr: 0.000244  loss: 2.5319 (2.7115)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3081 (1.3926)  time: 0.5934  data: 0.0004  max mem: 53905
Epoch: [255]  [1600/2502]  eta: 0:08:56  lr: 0.000244  min_lr: 0.000244  loss: 2.8982 (2.7136)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3210 (1.3950)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [255]  [1800/2502]  eta: 0:06:57  lr: 0.000243  min_lr: 0.000243  loss: 2.8854 (2.7126)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2876 (1.3936)  time: 0.5997  data: 0.0005  max mem: 53905
Epoch: [255]  [2000/2502]  eta: 0:04:58  lr: 0.000242  min_lr: 0.000242  loss: 2.4452 (2.7152)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5085 (1.3961)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [255]  [2200/2502]  eta: 0:02:59  lr: 0.000241  min_lr: 0.000241  loss: 2.4814 (2.7167)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3589 (1.3980)  time: 0.5945  data: 0.0005  max mem: 53905
Epoch: [255]  [2400/2502]  eta: 0:01:00  lr: 0.000240  min_lr: 0.000240  loss: 2.8589 (2.7176)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4665 (1.4023)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [255]  [2501/2502]  eta: 0:00:00  lr: 0.000240  min_lr: 0.000240  loss: 2.7636 (2.7154)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3563 (1.3999)  time: 0.5330  data: 0.0008  max mem: 53905
Epoch: [255] Total time: 0:24:48 (0.5947 s / it)
Averaged stats: lr: 0.000240  min_lr: 0.000240  loss: 2.7636 (2.7148)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3563 (1.3999)
Test:  [ 0/50]  eta: 0:02:40  loss: 0.4929 (0.4929)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.2097  data: 2.9010  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.7361 (0.7258)  acc1: 87.6000 (88.9455)  acc5: 98.4000 (98.2182)  time: 0.5481  data: 0.2642  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.7492 (0.7571)  acc1: 87.2000 (87.9810)  acc5: 98.0000 (98.1905)  time: 0.2820  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9109 (0.8422)  acc1: 84.0000 (86.1677)  acc5: 97.6000 (97.2645)  time: 0.2822  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0456 (0.8951)  acc1: 81.2000 (84.6829)  acc5: 95.2000 (96.8976)  time: 0.2820  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0247 (0.9134)  acc1: 80.4000 (84.0400)  acc5: 95.6000 (96.8160)  time: 0.2819  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3432 s / it)
* Acc@1 84.158 Acc@5 96.886 loss 0.908
Accuracy of the model on the 50000 test images: 84.2%
Max accuracy: 84.16%
Epoch: [256]  [   0/2502]  eta: 1:38:22  lr: 0.000240  min_lr: 0.000240  loss: 2.9835 (2.9835)  weight_decay: 0.0500 (0.0500)  time: 2.3593  data: 1.7572  max mem: 53905
Epoch: [256]  [ 200/2502]  eta: 0:23:01  lr: 0.000239  min_lr: 0.000239  loss: 2.6579 (2.6983)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4161 (1.4973)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [256]  [ 400/2502]  eta: 0:20:55  lr: 0.000238  min_lr: 0.000238  loss: 2.5946 (2.6580)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3020 (1.4833)  time: 0.6034  data: 0.0004  max mem: 53905
Epoch: [256]  [ 600/2502]  eta: 0:18:52  lr: 0.000237  min_lr: 0.000237  loss: 2.7215 (2.6596)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2969 (1.4545)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [256]  [ 800/2502]  eta: 0:16:52  lr: 0.000236  min_lr: 0.000236  loss: 2.5757 (2.6670)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3671 (1.4477)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [256]  [1000/2502]  eta: 0:14:53  lr: 0.000236  min_lr: 0.000236  loss: 2.7473 (2.6735)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4303 (1.4525)  time: 0.5981  data: 0.0004  max mem: 53905
Epoch: [256]  [1200/2502]  eta: 0:12:54  lr: 0.000235  min_lr: 0.000235  loss: 2.6320 (2.6744)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4685 (1.4530)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [256]  [1400/2502]  eta: 0:10:55  lr: 0.000234  min_lr: 0.000234  loss: 2.7537 (2.6780)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2713 (1.4418)  time: 0.5955  data: 0.0004  max mem: 53905
Epoch: [256]  [1600/2502]  eta: 0:08:56  lr: 0.000233  min_lr: 0.000233  loss: 2.7424 (2.6892)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3373 (1.4342)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [256]  [1800/2502]  eta: 0:06:57  lr: 0.000232  min_lr: 0.000232  loss: 2.7349 (2.6912)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4879 (1.4425)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [256]  [2000/2502]  eta: 0:04:58  lr: 0.000231  min_lr: 0.000231  loss: 2.6287 (2.6948)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3585 (1.4409)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [256]  [2200/2502]  eta: 0:02:59  lr: 0.000230  min_lr: 0.000230  loss: 2.5822 (2.6954)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4260 (1.4446)  time: 0.5928  data: 0.0006  max mem: 53905
Epoch: [256]  [2400/2502]  eta: 0:01:00  lr: 0.000230  min_lr: 0.000230  loss: 2.8388 (2.6966)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3421 (1.4415)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [256]  [2501/2502]  eta: 0:00:00  lr: 0.000229  min_lr: 0.000229  loss: 2.8984 (2.6966)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5063 (1.4414)  time: 0.5332  data: 0.0007  max mem: 53905
Epoch: [256] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.000229  min_lr: 0.000229  loss: 2.8984 (2.7106)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5063 (1.4414)
Test:  [ 0/50]  eta: 0:02:40  loss: 0.5467 (0.5467)  acc1: 96.0000 (96.0000)  acc5: 99.2000 (99.2000)  time: 3.2163  data: 2.8938  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8234 (0.7980)  acc1: 86.8000 (88.6545)  acc5: 98.0000 (98.1091)  time: 0.5621  data: 0.2744  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8243 (0.8352)  acc1: 85.6000 (87.8095)  acc5: 98.0000 (98.0191)  time: 0.2910  data: 0.0065  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9911 (0.9146)  acc1: 84.8000 (86.1936)  acc5: 96.8000 (97.1742)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1019 (0.9650)  acc1: 81.2000 (84.6146)  acc5: 95.2000 (96.7805)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1147 (0.9829)  acc1: 79.2000 (84.0400)  acc5: 95.6000 (96.7200)  time: 0.2854  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3487 s / it)
* Acc@1 84.230 Acc@5 96.856 loss 0.976
Accuracy of the model on the 50000 test images: 84.2%
Max accuracy: 84.23%
Epoch: [257]  [   0/2502]  eta: 1:34:27  lr: 0.000229  min_lr: 0.000229  loss: 2.6473 (2.6473)  weight_decay: 0.0500 (0.0500)  time: 2.2650  data: 1.6727  max mem: 53905
Epoch: [257]  [ 200/2502]  eta: 0:23:10  lr: 0.000228  min_lr: 0.000228  loss: 2.6496 (2.6245)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3509 (1.4190)  time: 0.6004  data: 0.0007  max mem: 53905
Epoch: [257]  [ 400/2502]  eta: 0:20:58  lr: 0.000228  min_lr: 0.000228  loss: 2.6349 (2.6630)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3481 (1.4135)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [257]  [ 600/2502]  eta: 0:18:54  lr: 0.000227  min_lr: 0.000227  loss: 2.8984 (2.6825)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3772 (1.4266)  time: 0.5904  data: 0.0005  max mem: 53905
Epoch: [257]  [ 800/2502]  eta: 0:16:53  lr: 0.000226  min_lr: 0.000226  loss: 2.8051 (2.6708)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3961 (1.4389)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [257]  [1000/2502]  eta: 0:14:53  lr: 0.000225  min_lr: 0.000225  loss: 2.8459 (2.6812)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3192 (1.4372)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [257]  [1200/2502]  eta: 0:12:54  lr: 0.000224  min_lr: 0.000224  loss: 2.6378 (2.6812)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3526 (1.4260)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [257]  [1400/2502]  eta: 0:10:54  lr: 0.000223  min_lr: 0.000223  loss: 2.9379 (2.6835)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4465 (1.4258)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [257]  [1600/2502]  eta: 0:08:55  lr: 0.000223  min_lr: 0.000223  loss: 2.7016 (2.6792)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3763 (1.4258)  time: 0.5935  data: 0.0004  max mem: 53905
Epoch: [257]  [1800/2502]  eta: 0:06:57  lr: 0.000222  min_lr: 0.000222  loss: 2.6594 (2.6839)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3186 (1.4204)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [257]  [2000/2502]  eta: 0:04:58  lr: 0.000221  min_lr: 0.000221  loss: 2.7888 (2.6880)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3414 (1.4220)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [257]  [2200/2502]  eta: 0:02:59  lr: 0.000220  min_lr: 0.000220  loss: 2.6133 (2.6865)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2447 (nan)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [257]  [2400/2502]  eta: 0:01:00  lr: 0.000219  min_lr: 0.000219  loss: 2.6413 (2.6876)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2749 (nan)  time: 0.5945  data: 0.0005  max mem: 53905
Epoch: [257]  [2501/2502]  eta: 0:00:00  lr: 0.000219  min_lr: 0.000219  loss: 2.7983 (2.6889)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4331 (nan)  time: 0.5335  data: 0.0008  max mem: 53905
Epoch: [257] Total time: 0:24:45 (0.5936 s / it)
Averaged stats: lr: 0.000219  min_lr: 0.000219  loss: 2.7983 (2.6986)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4331 (nan)
Test:  [ 0/50]  eta: 0:02:29  loss: 0.5260 (0.5260)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 2.9925  data: 2.6724  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7685 (0.7567)  acc1: 90.0000 (89.0909)  acc5: 98.8000 (98.0364)  time: 0.5557  data: 0.2666  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.7711 (0.7835)  acc1: 85.6000 (87.9810)  acc5: 97.6000 (98.0381)  time: 0.2986  data: 0.0133  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9019 (0.8580)  acc1: 84.4000 (86.0258)  acc5: 97.2000 (97.3290)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0439 (0.9046)  acc1: 81.2000 (84.7512)  acc5: 95.6000 (96.9854)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0439 (0.9223)  acc1: 81.2000 (84.1680)  acc5: 96.4000 (96.8800)  time: 0.2845  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3470 s / it)
* Acc@1 84.232 Acc@5 96.952 loss 0.917
Accuracy of the model on the 50000 test images: 84.2%
Max accuracy: 84.23%
Epoch: [258]  [   0/2502]  eta: 1:32:45  lr: 0.000219  min_lr: 0.000219  loss: 2.2655 (2.2655)  weight_decay: 0.0500 (0.0500)  time: 2.2244  data: 1.6229  max mem: 53905
Epoch: [258]  [ 200/2502]  eta: 0:23:00  lr: 0.000218  min_lr: 0.000218  loss: 2.7721 (2.7008)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4117 (1.5499)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [258]  [ 400/2502]  eta: 0:20:54  lr: 0.000217  min_lr: 0.000217  loss: 2.8451 (2.6792)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3496 (1.4984)  time: 0.6010  data: 0.0004  max mem: 53905
Epoch: [258]  [ 600/2502]  eta: 0:18:52  lr: 0.000216  min_lr: 0.000216  loss: 2.7635 (2.6758)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4049 (1.4741)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [258]  [ 800/2502]  eta: 0:16:52  lr: 0.000216  min_lr: 0.000216  loss: 2.5781 (2.6743)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4320 (1.4587)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [258]  [1000/2502]  eta: 0:14:53  lr: 0.000215  min_lr: 0.000215  loss: 2.7225 (2.6758)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4635 (1.4656)  time: 0.5972  data: 0.0005  max mem: 53905
Epoch: [258]  [1200/2502]  eta: 0:12:53  lr: 0.000214  min_lr: 0.000214  loss: 2.6807 (2.6699)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3320 (1.4664)  time: 0.5927  data: 0.0006  max mem: 53905
Epoch: [258]  [1400/2502]  eta: 0:10:54  lr: 0.000213  min_lr: 0.000213  loss: 2.8992 (2.6690)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4619 (1.4632)  time: 0.5958  data: 0.0006  max mem: 53905
Epoch: [258]  [1600/2502]  eta: 0:08:55  lr: 0.000212  min_lr: 0.000212  loss: 2.7539 (2.6730)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4723 (1.4654)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [258]  [1800/2502]  eta: 0:06:56  lr: 0.000212  min_lr: 0.000212  loss: 2.5979 (2.6792)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3915 (1.4600)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [258]  [2000/2502]  eta: 0:04:58  lr: 0.000211  min_lr: 0.000211  loss: 2.7819 (2.6719)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3551 (1.4562)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [258]  [2200/2502]  eta: 0:02:59  lr: 0.000210  min_lr: 0.000210  loss: 2.8876 (2.6783)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3998 (1.4536)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [258]  [2400/2502]  eta: 0:01:00  lr: 0.000209  min_lr: 0.000209  loss: 2.6082 (2.6761)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3317 (1.4538)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [258]  [2501/2502]  eta: 0:00:00  lr: 0.000209  min_lr: 0.000209  loss: 2.6764 (2.6739)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3739 (1.4533)  time: 0.5398  data: 0.0008  max mem: 53905
Epoch: [258] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.000209  min_lr: 0.000209  loss: 2.6764 (2.6914)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3739 (1.4533)
Test:  [ 0/50]  eta: 0:03:20  loss: 0.4267 (0.4267)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 4.0010  data: 3.6675  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.6837 (0.6781)  acc1: 90.0000 (88.9455)  acc5: 99.2000 (98.2545)  time: 0.6225  data: 0.3338  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7156 (0.7141)  acc1: 86.4000 (87.7905)  acc5: 98.4000 (98.1333)  time: 0.2849  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.8451 (0.7986)  acc1: 82.8000 (85.8581)  acc5: 97.2000 (97.4323)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0155 (0.8493)  acc1: 82.0000 (84.5561)  acc5: 96.0000 (97.0634)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 0.9658 (0.8663)  acc1: 80.4000 (84.0800)  acc5: 96.0000 (96.9840)  time: 0.2846  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3617 s / it)
* Acc@1 84.204 Acc@5 96.942 loss 0.858
Accuracy of the model on the 50000 test images: 84.2%
Max accuracy: 84.23%
Epoch: [259]  [   0/2502]  eta: 1:35:33  lr: 0.000209  min_lr: 0.000209  loss: 2.9887 (2.9887)  weight_decay: 0.0500 (0.0500)  time: 2.2917  data: 1.7072  max mem: 53905
Epoch: [259]  [ 200/2502]  eta: 0:23:05  lr: 0.000208  min_lr: 0.000208  loss: 2.8136 (2.7286)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3013 (1.4039)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [259]  [ 400/2502]  eta: 0:20:55  lr: 0.000207  min_lr: 0.000207  loss: 2.7748 (2.7419)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4703 (1.4283)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [259]  [ 600/2502]  eta: 0:18:53  lr: 0.000206  min_lr: 0.000206  loss: 2.8983 (2.7334)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4495 (1.4487)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [259]  [ 800/2502]  eta: 0:16:52  lr: 0.000206  min_lr: 0.000206  loss: 2.8087 (2.7150)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4615 (1.4470)  time: 0.5979  data: 0.0004  max mem: 53905
Epoch: [259]  [1000/2502]  eta: 0:14:52  lr: 0.000205  min_lr: 0.000205  loss: 2.6597 (2.7014)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4903 (1.4641)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [259]  [1200/2502]  eta: 0:12:53  lr: 0.000204  min_lr: 0.000204  loss: 2.6342 (2.7096)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3539 (1.4542)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [259]  [1400/2502]  eta: 0:10:54  lr: 0.000203  min_lr: 0.000203  loss: 2.6514 (2.7081)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5135 (1.4590)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [259]  [1600/2502]  eta: 0:08:55  lr: 0.000203  min_lr: 0.000203  loss: 2.9954 (2.7141)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3998 (1.4576)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [259]  [1800/2502]  eta: 0:06:56  lr: 0.000202  min_lr: 0.000202  loss: 2.6453 (2.7126)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4528 (1.4590)  time: 0.5987  data: 0.0004  max mem: 53905
Epoch: [259]  [2000/2502]  eta: 0:04:58  lr: 0.000201  min_lr: 0.000201  loss: 2.5255 (2.7063)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5023 (1.4569)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [259]  [2200/2502]  eta: 0:02:59  lr: 0.000200  min_lr: 0.000200  loss: 2.6248 (2.7080)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3727 (1.4541)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [259]  [2400/2502]  eta: 0:01:00  lr: 0.000199  min_lr: 0.000199  loss: 2.7503 (2.7090)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5092 (1.4578)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [259]  [2501/2502]  eta: 0:00:00  lr: 0.000199  min_lr: 0.000199  loss: 2.9602 (2.7098)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5011 (1.4592)  time: 0.5343  data: 0.0008  max mem: 53905
Epoch: [259] Total time: 0:24:44 (0.5935 s / it)
Averaged stats: lr: 0.000199  min_lr: 0.000199  loss: 2.9602 (2.6913)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5011 (1.4592)
Test:  [ 0/50]  eta: 0:02:34  loss: 0.6359 (0.6359)  acc1: 95.6000 (95.6000)  acc5: 99.2000 (99.2000)  time: 3.0919  data: 2.7716  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8391 (0.8637)  acc1: 89.2000 (89.1273)  acc5: 98.4000 (98.2182)  time: 0.5643  data: 0.2768  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8948 (0.8934)  acc1: 86.4000 (87.9238)  acc5: 98.4000 (98.1714)  time: 0.2983  data: 0.0139  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0097 (0.9742)  acc1: 83.6000 (86.0645)  acc5: 97.2000 (97.3677)  time: 0.2849  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1796 (1.0221)  acc1: 80.8000 (84.6049)  acc5: 95.2000 (96.9366)  time: 0.2845  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1524 (1.0408)  acc1: 80.0000 (83.9520)  acc5: 95.6000 (96.8240)  time: 0.2842  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3482 s / it)
* Acc@1 84.128 Acc@5 96.890 loss 1.033
Accuracy of the model on the 50000 test images: 84.1%
Max accuracy: 84.23%
Epoch: [260]  [   0/2502]  eta: 1:49:56  lr: 0.000199  min_lr: 0.000199  loss: 3.0106 (3.0106)  weight_decay: 0.0500 (0.0500)  time: 2.6366  data: 2.0356  max mem: 53905
Epoch: [260]  [ 200/2502]  eta: 0:23:07  lr: 0.000198  min_lr: 0.000198  loss: 2.7837 (2.6494)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3936 (1.4135)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [260]  [ 400/2502]  eta: 0:21:00  lr: 0.000197  min_lr: 0.000197  loss: 2.8514 (2.6797)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4610 (1.4616)  time: 0.6067  data: 0.0004  max mem: 53905
Epoch: [260]  [ 600/2502]  eta: 0:18:56  lr: 0.000197  min_lr: 0.000197  loss: 2.8307 (2.6588)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4687 (1.4847)  time: 0.5938  data: 0.0005  max mem: 53905
Epoch: [260]  [ 800/2502]  eta: 0:16:54  lr: 0.000196  min_lr: 0.000196  loss: 2.7685 (2.6800)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4737 (1.4827)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [260]  [1000/2502]  eta: 0:14:55  lr: 0.000195  min_lr: 0.000195  loss: 2.8914 (2.6787)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3973 (1.4768)  time: 0.6037  data: 0.0006  max mem: 53905
Epoch: [260]  [1200/2502]  eta: 0:12:55  lr: 0.000194  min_lr: 0.000194  loss: 2.4905 (2.6753)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3804 (1.4662)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [260]  [1400/2502]  eta: 0:10:55  lr: 0.000194  min_lr: 0.000194  loss: 2.8076 (2.6783)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4720 (1.4661)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [260]  [1600/2502]  eta: 0:08:56  lr: 0.000193  min_lr: 0.000193  loss: 2.8955 (2.6794)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3822 (1.4625)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [260]  [1800/2502]  eta: 0:06:57  lr: 0.000192  min_lr: 0.000192  loss: 2.7847 (2.6811)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4699 (1.4675)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [260]  [2000/2502]  eta: 0:04:58  lr: 0.000191  min_lr: 0.000191  loss: 2.8425 (2.6840)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4438 (1.4684)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [260]  [2200/2502]  eta: 0:02:59  lr: 0.000191  min_lr: 0.000191  loss: 2.3937 (2.6814)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4247 (1.4699)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [260]  [2400/2502]  eta: 0:01:00  lr: 0.000190  min_lr: 0.000190  loss: 2.9069 (2.6834)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3971 (1.4703)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [260]  [2501/2502]  eta: 0:00:00  lr: 0.000189  min_lr: 0.000189  loss: 2.7309 (2.6831)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4311 (1.4705)  time: 0.5339  data: 0.0007  max mem: 53905
Epoch: [260] Total time: 0:24:46 (0.5942 s / it)
Averaged stats: lr: 0.000189  min_lr: 0.000189  loss: 2.7309 (2.6866)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4311 (1.4705)
Test:  [ 0/50]  eta: 0:03:02  loss: 0.5941 (0.5941)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.6473  data: 3.3392  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8361 (0.8200)  acc1: 88.4000 (88.9091)  acc5: 98.4000 (98.1818)  time: 0.5906  data: 0.3040  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8680 (0.8556)  acc1: 86.0000 (87.9048)  acc5: 98.0000 (98.0571)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9771 (0.9320)  acc1: 84.0000 (86.1290)  acc5: 97.2000 (97.2774)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1316 (0.9825)  acc1: 80.8000 (84.6049)  acc5: 95.2000 (96.9366)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1013 (1.0013)  acc1: 80.0000 (83.9520)  acc5: 96.0000 (96.8880)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3547 s / it)
* Acc@1 84.230 Acc@5 96.896 loss 0.995
Accuracy of the model on the 50000 test images: 84.2%
Max accuracy: 84.23%
Epoch: [261]  [   0/2502]  eta: 1:53:48  lr: 0.000189  min_lr: 0.000189  loss: 1.9707 (1.9707)  weight_decay: 0.0500 (0.0500)  time: 2.7292  data: 1.5137  max mem: 53905
Epoch: [261]  [ 200/2502]  eta: 0:23:15  lr: 0.000189  min_lr: 0.000189  loss: 2.8327 (2.6828)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3798 (1.4415)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [261]  [ 400/2502]  eta: 0:20:59  lr: 0.000188  min_lr: 0.000188  loss: 2.7298 (2.7033)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4028 (1.4394)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [261]  [ 600/2502]  eta: 0:18:56  lr: 0.000187  min_lr: 0.000187  loss: 2.6788 (2.6904)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3526 (1.4350)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [261]  [ 800/2502]  eta: 0:16:55  lr: 0.000186  min_lr: 0.000186  loss: 2.7863 (2.6879)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5284 (1.4521)  time: 0.5931  data: 0.0006  max mem: 53905
Epoch: [261]  [1000/2502]  eta: 0:14:55  lr: 0.000186  min_lr: 0.000186  loss: 2.7687 (2.6804)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3233 (1.4515)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [261]  [1200/2502]  eta: 0:12:55  lr: 0.000185  min_lr: 0.000185  loss: 2.8612 (2.6929)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5367 (1.4709)  time: 0.6066  data: 0.0005  max mem: 53905
Epoch: [261]  [1400/2502]  eta: 0:10:56  lr: 0.000184  min_lr: 0.000184  loss: 2.7426 (2.6904)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3733 (1.4664)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [261]  [1600/2502]  eta: 0:08:56  lr: 0.000183  min_lr: 0.000183  loss: 2.9133 (2.6862)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4362 (1.4654)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [261]  [1800/2502]  eta: 0:06:57  lr: 0.000183  min_lr: 0.000183  loss: 2.3557 (2.6836)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4129 (1.4688)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [261]  [2000/2502]  eta: 0:04:58  lr: 0.000182  min_lr: 0.000182  loss: 2.8496 (2.6811)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5161 (1.4755)  time: 0.5933  data: 0.0004  max mem: 53905
Epoch: [261]  [2200/2502]  eta: 0:02:59  lr: 0.000181  min_lr: 0.000181  loss: 2.7792 (2.6828)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4769 (1.4827)  time: 0.5987  data: 0.0004  max mem: 53905
Epoch: [261]  [2400/2502]  eta: 0:01:00  lr: 0.000180  min_lr: 0.000180  loss: 2.7701 (2.6841)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3882 (1.4859)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [261]  [2501/2502]  eta: 0:00:00  lr: 0.000180  min_lr: 0.000180  loss: 2.8267 (2.6819)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4322 (1.4863)  time: 0.5333  data: 0.0007  max mem: 53905
Epoch: [261] Total time: 0:24:46 (0.5942 s / it)
Averaged stats: lr: 0.000180  min_lr: 0.000180  loss: 2.8267 (2.6785)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4322 (1.4863)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.4791 (0.4791)  acc1: 95.6000 (95.6000)  acc5: 99.2000 (99.2000)  time: 3.0015  data: 2.6784  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.7489 (0.7375)  acc1: 88.8000 (89.2000)  acc5: 99.2000 (98.2546)  time: 0.5321  data: 0.2439  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.7772 (0.7784)  acc1: 86.0000 (88.0571)  acc5: 98.0000 (98.1333)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9057 (0.8595)  acc1: 84.4000 (86.1419)  acc5: 96.8000 (97.5226)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0518 (0.9074)  acc1: 81.6000 (84.9366)  acc5: 96.0000 (97.2000)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0518 (0.9264)  acc1: 81.2000 (84.3120)  acc5: 96.0000 (97.0560)  time: 0.2850  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3419 s / it)
* Acc@1 84.270 Acc@5 96.988 loss 0.919
Accuracy of the model on the 50000 test images: 84.3%
Max accuracy: 84.27%
Epoch: [262]  [   0/2502]  eta: 1:34:36  lr: 0.000180  min_lr: 0.000180  loss: 2.8901 (2.8901)  weight_decay: 0.0500 (0.0500)  time: 2.2687  data: 1.6733  max mem: 53905
Epoch: [262]  [ 200/2502]  eta: 0:23:05  lr: 0.000179  min_lr: 0.000179  loss: 2.8667 (2.6822)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5340 (1.4827)  time: 0.5977  data: 0.0005  max mem: 53905
Epoch: [262]  [ 400/2502]  eta: 0:20:56  lr: 0.000179  min_lr: 0.000179  loss: 2.6870 (2.6771)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4326 (1.4879)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [262]  [ 600/2502]  eta: 0:18:53  lr: 0.000178  min_lr: 0.000178  loss: 2.5908 (2.6749)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3276 (1.4859)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [262]  [ 800/2502]  eta: 0:16:52  lr: 0.000177  min_lr: 0.000177  loss: 2.7578 (2.6668)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5526 (1.4794)  time: 0.5976  data: 0.0004  max mem: 53905
Epoch: [262]  [1000/2502]  eta: 0:14:53  lr: 0.000176  min_lr: 0.000176  loss: 2.5120 (2.6679)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4455 (1.4792)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [262]  [1200/2502]  eta: 0:12:53  lr: 0.000176  min_lr: 0.000176  loss: 2.9474 (2.6675)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5115 (1.4806)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [262]  [1400/2502]  eta: 0:10:54  lr: 0.000175  min_lr: 0.000175  loss: 2.7714 (2.6721)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3869 (1.4789)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [262]  [1600/2502]  eta: 0:08:55  lr: 0.000174  min_lr: 0.000174  loss: 2.8873 (2.6728)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3438 (1.4726)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [262]  [1800/2502]  eta: 0:06:56  lr: 0.000173  min_lr: 0.000173  loss: 2.7778 (2.6721)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4716 (1.4750)  time: 0.5919  data: 0.0006  max mem: 53905
Epoch: [262]  [2000/2502]  eta: 0:04:57  lr: 0.000173  min_lr: 0.000173  loss: 2.7321 (2.6728)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3483 (1.4723)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [262]  [2200/2502]  eta: 0:02:59  lr: 0.000172  min_lr: 0.000172  loss: 2.7665 (2.6732)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4233 (1.4716)  time: 0.5905  data: 0.0005  max mem: 53905
Epoch: [262]  [2400/2502]  eta: 0:01:00  lr: 0.000171  min_lr: 0.000171  loss: 2.7473 (2.6736)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4296 (1.4730)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [262]  [2501/2502]  eta: 0:00:00  lr: 0.000171  min_lr: 0.000171  loss: 2.7507 (2.6714)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3847 (1.4741)  time: 0.5322  data: 0.0009  max mem: 53905
Epoch: [262] Total time: 0:24:43 (0.5929 s / it)
Averaged stats: lr: 0.000171  min_lr: 0.000171  loss: 2.7507 (2.6731)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3847 (1.4741)
Test:  [ 0/50]  eta: 0:02:39  loss: 0.4861 (0.4861)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.1976  data: 2.8698  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7344 (0.7263)  acc1: 88.8000 (89.3091)  acc5: 98.4000 (98.1818)  time: 0.5513  data: 0.2654  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.7456 (0.7630)  acc1: 86.8000 (88.0762)  acc5: 98.0000 (98.1905)  time: 0.2846  data: 0.0027  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.8766 (0.8468)  acc1: 84.0000 (86.0774)  acc5: 97.2000 (97.3936)  time: 0.2826  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0558 (0.8966)  acc1: 81.2000 (84.6927)  acc5: 95.2000 (97.0049)  time: 0.2825  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0641 (0.9159)  acc1: 79.6000 (84.1520)  acc5: 95.6000 (96.8640)  time: 0.2824  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3444 s / it)
* Acc@1 84.306 Acc@5 96.990 loss 0.908
Accuracy of the model on the 50000 test images: 84.3%
Max accuracy: 84.31%
Epoch: [263]  [   0/2502]  eta: 1:35:37  lr: 0.000171  min_lr: 0.000171  loss: 2.6228 (2.6228)  weight_decay: 0.0500 (0.0500)  time: 2.2933  data: 1.6924  max mem: 53905
Epoch: [263]  [ 200/2502]  eta: 0:23:04  lr: 0.000170  min_lr: 0.000170  loss: 2.6885 (2.6422)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3923 (1.4603)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [263]  [ 400/2502]  eta: 0:20:55  lr: 0.000169  min_lr: 0.000169  loss: 2.5823 (2.6733)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4907 (1.5338)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [263]  [ 600/2502]  eta: 0:18:53  lr: 0.000169  min_lr: 0.000169  loss: 2.9288 (2.6733)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3504 (1.5036)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [263]  [ 800/2502]  eta: 0:16:52  lr: 0.000168  min_lr: 0.000168  loss: 2.7265 (2.6717)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4023 (1.5035)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [263]  [1000/2502]  eta: 0:14:53  lr: 0.000167  min_lr: 0.000167  loss: 2.6853 (2.6731)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3817 (1.4961)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [263]  [1200/2502]  eta: 0:12:54  lr: 0.000167  min_lr: 0.000167  loss: 2.7037 (2.6679)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3685 (1.4864)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [263]  [1400/2502]  eta: 0:10:54  lr: 0.000166  min_lr: 0.000166  loss: 2.3619 (2.6631)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4116 (1.4816)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [263]  [1600/2502]  eta: 0:08:56  lr: 0.000165  min_lr: 0.000165  loss: 2.6429 (2.6633)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3476 (1.4683)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [263]  [1800/2502]  eta: 0:06:57  lr: 0.000164  min_lr: 0.000164  loss: 2.7466 (2.6605)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4478 (1.4692)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [263]  [2000/2502]  eta: 0:04:58  lr: 0.000164  min_lr: 0.000164  loss: 2.7095 (2.6666)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3725 (1.4689)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [263]  [2200/2502]  eta: 0:02:59  lr: 0.000163  min_lr: 0.000163  loss: 2.9100 (2.6660)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3425 (1.4639)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [263]  [2400/2502]  eta: 0:01:00  lr: 0.000162  min_lr: 0.000162  loss: 2.6532 (2.6691)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4028 (1.4671)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [263]  [2501/2502]  eta: 0:00:00  lr: 0.000162  min_lr: 0.000162  loss: 2.7975 (2.6702)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3863 (1.4678)  time: 0.5333  data: 0.0006  max mem: 53905
Epoch: [263] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.000162  min_lr: 0.000162  loss: 2.7975 (2.6751)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3863 (1.4678)
Test:  [ 0/50]  eta: 0:02:45  loss: 0.5273 (0.5273)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.3171  data: 2.9743  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.8020 (0.7784)  acc1: 88.8000 (89.0909)  acc5: 98.4000 (98.1818)  time: 0.5695  data: 0.2797  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8120 (0.8124)  acc1: 86.4000 (88.0381)  acc5: 98.0000 (98.1143)  time: 0.2901  data: 0.0054  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9468 (0.8981)  acc1: 84.4000 (86.1032)  acc5: 97.2000 (97.3032)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0824 (0.9480)  acc1: 80.8000 (84.7902)  acc5: 95.6000 (97.0244)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0824 (0.9688)  acc1: 79.2000 (84.2160)  acc5: 96.0000 (96.9200)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3498 s / it)
* Acc@1 84.268 Acc@5 96.972 loss 0.961
Accuracy of the model on the 50000 test images: 84.3%
Max accuracy: 84.31%
Epoch: [264]  [   0/2502]  eta: 1:50:47  lr: 0.000162  min_lr: 0.000162  loss: 2.8975 (2.8975)  weight_decay: 0.0500 (0.0500)  time: 2.6570  data: 1.5423  max mem: 53905
Epoch: [264]  [ 200/2502]  eta: 0:23:07  lr: 0.000161  min_lr: 0.000161  loss: 2.5824 (2.6671)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4088 (1.4417)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [264]  [ 400/2502]  eta: 0:20:56  lr: 0.000160  min_lr: 0.000160  loss: 2.8430 (2.6522)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5125 (1.4856)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [264]  [ 600/2502]  eta: 0:18:53  lr: 0.000160  min_lr: 0.000160  loss: 2.5961 (2.6533)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4560 (1.4898)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [264]  [ 800/2502]  eta: 0:16:52  lr: 0.000159  min_lr: 0.000159  loss: 2.3785 (2.6456)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4459 (nan)  time: 0.5917  data: 0.0007  max mem: 53905
Epoch: [264]  [1000/2502]  eta: 0:14:53  lr: 0.000158  min_lr: 0.000158  loss: 2.8059 (2.6464)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4107 (nan)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [264]  [1200/2502]  eta: 0:12:54  lr: 0.000158  min_lr: 0.000158  loss: 2.8704 (2.6579)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5105 (nan)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [264]  [1400/2502]  eta: 0:10:54  lr: 0.000157  min_lr: 0.000157  loss: 2.8011 (2.6633)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3363 (nan)  time: 0.5954  data: 0.0005  max mem: 53905
Epoch: [264]  [1600/2502]  eta: 0:08:55  lr: 0.000156  min_lr: 0.000156  loss: 2.7116 (2.6654)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4787 (nan)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [264]  [1800/2502]  eta: 0:06:56  lr: 0.000156  min_lr: 0.000156  loss: 2.8594 (2.6629)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4221 (nan)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [264]  [2000/2502]  eta: 0:04:58  lr: 0.000155  min_lr: 0.000155  loss: 2.6836 (2.6640)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4653 (nan)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [264]  [2200/2502]  eta: 0:02:59  lr: 0.000154  min_lr: 0.000154  loss: 2.7813 (2.6654)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4302 (nan)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [264]  [2400/2502]  eta: 0:01:00  lr: 0.000154  min_lr: 0.000154  loss: 2.7438 (2.6704)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5079 (nan)  time: 0.5960  data: 0.0004  max mem: 53905
Epoch: [264]  [2501/2502]  eta: 0:00:00  lr: 0.000153  min_lr: 0.000153  loss: 2.9648 (2.6732)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4121 (nan)  time: 0.5394  data: 0.0008  max mem: 53905
Epoch: [264] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.000153  min_lr: 0.000153  loss: 2.9648 (2.6674)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4121 (nan)
Test:  [ 0/50]  eta: 0:02:37  loss: 0.6487 (0.6487)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.1498  data: 2.8119  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.9250 (0.8957)  acc1: 88.8000 (89.2000)  acc5: 98.4000 (98.2909)  time: 0.5574  data: 0.2685  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.9354 (0.9312)  acc1: 86.4000 (88.0381)  acc5: 98.4000 (98.1524)  time: 0.2915  data: 0.0073  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0651 (1.0167)  acc1: 84.0000 (86.1419)  acc5: 97.2000 (97.3548)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.2142 (1.0671)  acc1: 81.2000 (84.7512)  acc5: 95.6000 (96.9756)  time: 0.2856  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2103 (1.0856)  acc1: 80.4000 (84.1520)  acc5: 96.0000 (96.8480)  time: 0.2907  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3501 s / it)
* Acc@1 84.184 Acc@5 96.908 loss 1.081
Accuracy of the model on the 50000 test images: 84.2%
Max accuracy: 84.31%
Epoch: [265]  [   0/2502]  eta: 1:51:40  lr: 0.000153  min_lr: 0.000153  loss: 3.1595 (3.1595)  weight_decay: 0.0500 (0.0500)  time: 2.6782  data: 2.0399  max mem: 53905
Epoch: [265]  [ 200/2502]  eta: 0:23:11  lr: 0.000153  min_lr: 0.000153  loss: 2.8464 (2.6434)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5119 (1.4844)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [265]  [ 400/2502]  eta: 0:20:58  lr: 0.000152  min_lr: 0.000152  loss: 2.5424 (2.6317)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4652 (1.4830)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [265]  [ 600/2502]  eta: 0:18:55  lr: 0.000151  min_lr: 0.000151  loss: 2.7397 (2.6518)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3324 (1.5007)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [265]  [ 800/2502]  eta: 0:16:54  lr: 0.000150  min_lr: 0.000150  loss: 2.8205 (2.6640)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4669 (1.5153)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [265]  [1000/2502]  eta: 0:14:53  lr: 0.000150  min_lr: 0.000150  loss: 2.8699 (2.6609)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3956 (1.5123)  time: 0.5980  data: 0.0005  max mem: 53905
Epoch: [265]  [1200/2502]  eta: 0:12:54  lr: 0.000149  min_lr: 0.000149  loss: 2.8379 (2.6678)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4574 (1.5179)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [265]  [1400/2502]  eta: 0:10:55  lr: 0.000148  min_lr: 0.000148  loss: 2.7971 (2.6687)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4557 (1.5091)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [265]  [1600/2502]  eta: 0:08:56  lr: 0.000148  min_lr: 0.000148  loss: 2.8505 (2.6700)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3926 (1.5040)  time: 0.5938  data: 0.0004  max mem: 53905
Epoch: [265]  [1800/2502]  eta: 0:06:57  lr: 0.000147  min_lr: 0.000147  loss: 2.7282 (2.6702)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4936 (1.5147)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [265]  [2000/2502]  eta: 0:04:58  lr: 0.000146  min_lr: 0.000146  loss: 2.9266 (2.6661)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3836 (1.5126)  time: 0.5946  data: 0.0004  max mem: 53905
Epoch: [265]  [2200/2502]  eta: 0:02:59  lr: 0.000146  min_lr: 0.000146  loss: 2.8293 (2.6638)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4250 (1.5162)  time: 0.5990  data: 0.0004  max mem: 53905
Epoch: [265]  [2400/2502]  eta: 0:01:00  lr: 0.000145  min_lr: 0.000145  loss: 2.7473 (2.6633)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4462 (1.5149)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [265]  [2501/2502]  eta: 0:00:00  lr: 0.000145  min_lr: 0.000145  loss: 2.6600 (2.6649)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4021 (1.5136)  time: 0.5337  data: 0.0008  max mem: 53905
Epoch: [265] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.000145  min_lr: 0.000145  loss: 2.6600 (2.6612)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4021 (1.5136)
Test:  [ 0/50]  eta: 0:02:37  loss: 0.4716 (0.4716)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.1400  data: 2.8350  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7118 (0.7113)  acc1: 89.2000 (89.3091)  acc5: 98.0000 (98.1455)  time: 0.5721  data: 0.2802  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7519 (0.7512)  acc1: 86.4000 (88.2095)  acc5: 98.0000 (98.1524)  time: 0.3040  data: 0.0126  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.8673 (0.8344)  acc1: 84.8000 (86.3097)  acc5: 97.6000 (97.3806)  time: 0.2889  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0280 (0.8796)  acc1: 81.6000 (85.0829)  acc5: 95.6000 (97.0732)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 0.9935 (0.8963)  acc1: 80.8000 (84.4960)  acc5: 96.0000 (96.9920)  time: 0.2845  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3520 s / it)
* Acc@1 84.338 Acc@5 97.024 loss 0.890
Accuracy of the model on the 50000 test images: 84.3%
Max accuracy: 84.34%
Epoch: [266]  [   0/2502]  eta: 1:48:15  lr: 0.000145  min_lr: 0.000145  loss: 2.8243 (2.8243)  weight_decay: 0.0500 (0.0500)  time: 2.5959  data: 1.9961  max mem: 53905
Epoch: [266]  [ 200/2502]  eta: 0:23:08  lr: 0.000144  min_lr: 0.000144  loss: 2.8056 (2.6219)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4403 (1.4979)  time: 0.5936  data: 0.0004  max mem: 53905
Epoch: [266]  [ 400/2502]  eta: 0:21:00  lr: 0.000143  min_lr: 0.000143  loss: 2.7009 (2.6330)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4630 (1.4838)  time: 0.5931  data: 0.0007  max mem: 53905
Epoch: [266]  [ 600/2502]  eta: 0:18:56  lr: 0.000143  min_lr: 0.000143  loss: 2.8261 (2.6446)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5733 (1.5178)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [266]  [ 800/2502]  eta: 0:16:54  lr: 0.000142  min_lr: 0.000142  loss: 2.6877 (2.6535)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4245 (1.5250)  time: 0.5978  data: 0.0004  max mem: 53905
Epoch: [266]  [1000/2502]  eta: 0:14:55  lr: 0.000141  min_lr: 0.000141  loss: 2.8937 (2.6583)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [266]  [1200/2502]  eta: 0:12:55  lr: 0.000141  min_lr: 0.000141  loss: 2.7600 (2.6622)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3556 (nan)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [266]  [1400/2502]  eta: 0:10:55  lr: 0.000140  min_lr: 0.000140  loss: 2.8756 (2.6534)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4810 (nan)  time: 0.5977  data: 0.0005  max mem: 53905
Epoch: [266]  [1600/2502]  eta: 0:08:56  lr: 0.000139  min_lr: 0.000139  loss: 2.8347 (2.6532)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4225 (nan)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [266]  [1800/2502]  eta: 0:06:57  lr: 0.000139  min_lr: 0.000139  loss: 2.7929 (2.6543)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3592 (nan)  time: 0.5957  data: 0.0005  max mem: 53905
Epoch: [266]  [2000/2502]  eta: 0:04:58  lr: 0.000138  min_lr: 0.000138  loss: 2.8967 (2.6535)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5287 (nan)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [266]  [2200/2502]  eta: 0:02:59  lr: 0.000137  min_lr: 0.000137  loss: 2.5582 (2.6503)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4944 (nan)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [266]  [2400/2502]  eta: 0:01:00  lr: 0.000137  min_lr: 0.000137  loss: 2.4270 (2.6480)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4724 (nan)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [266]  [2501/2502]  eta: 0:00:00  lr: 0.000137  min_lr: 0.000137  loss: 2.7793 (2.6504)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4934 (nan)  time: 0.5335  data: 0.0007  max mem: 53905
Epoch: [266] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.000137  min_lr: 0.000137  loss: 2.7793 (2.6530)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4934 (nan)
Test:  [ 0/50]  eta: 0:03:14  loss: 0.5343 (0.5343)  acc1: 95.6000 (95.6000)  acc5: 99.2000 (99.2000)  time: 3.8940  data: 3.5760  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.7813 (0.7737)  acc1: 88.8000 (89.5273)  acc5: 99.2000 (98.4000)  time: 0.6127  data: 0.3255  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8040 (0.8132)  acc1: 86.0000 (88.2095)  acc5: 98.0000 (98.2667)  time: 0.2850  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9373 (0.9019)  acc1: 84.0000 (86.1548)  acc5: 97.6000 (97.4323)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1207 (0.9528)  acc1: 81.2000 (84.8781)  acc5: 95.2000 (97.0634)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1007 (0.9698)  acc1: 80.0000 (84.3840)  acc5: 96.4000 (96.9920)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3597 s / it)
* Acc@1 84.398 Acc@5 97.038 loss 0.963
Accuracy of the model on the 50000 test images: 84.4%
Max accuracy: 84.40%
Epoch: [267]  [   0/2502]  eta: 1:44:45  lr: 0.000136  min_lr: 0.000136  loss: 2.7359 (2.7359)  weight_decay: 0.0500 (0.0500)  time: 2.5122  data: 1.9154  max mem: 53905
Epoch: [267]  [ 200/2502]  eta: 0:23:06  lr: 0.000136  min_lr: 0.000136  loss: 2.6023 (2.5982)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5379 (1.5245)  time: 0.5934  data: 0.0005  max mem: 53905
Epoch: [267]  [ 400/2502]  eta: 0:20:57  lr: 0.000135  min_lr: 0.000135  loss: 2.7023 (2.6190)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3903 (1.4951)  time: 0.6034  data: 0.0005  max mem: 53905
Epoch: [267]  [ 600/2502]  eta: 0:18:55  lr: 0.000135  min_lr: 0.000135  loss: 2.7377 (2.6297)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4323 (1.4964)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [267]  [ 800/2502]  eta: 0:16:53  lr: 0.000134  min_lr: 0.000134  loss: 2.8391 (2.6390)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4123 (1.5019)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [267]  [1000/2502]  eta: 0:14:53  lr: 0.000133  min_lr: 0.000133  loss: 2.8215 (2.6435)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4537 (1.4999)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [267]  [1200/2502]  eta: 0:12:54  lr: 0.000133  min_lr: 0.000133  loss: 2.7304 (2.6370)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5853 (1.5109)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [267]  [1400/2502]  eta: 0:10:55  lr: 0.000132  min_lr: 0.000132  loss: 2.5065 (2.6393)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5128 (1.5165)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [267]  [1600/2502]  eta: 0:08:56  lr: 0.000131  min_lr: 0.000131  loss: 2.8034 (2.6375)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5277 (1.5155)  time: 0.6011  data: 0.0004  max mem: 53905
Epoch: [267]  [1800/2502]  eta: 0:06:57  lr: 0.000131  min_lr: 0.000131  loss: 2.7315 (2.6320)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5770 (1.5150)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [267]  [2000/2502]  eta: 0:04:58  lr: 0.000130  min_lr: 0.000130  loss: 2.8487 (2.6348)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4009 (1.5161)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [267]  [2200/2502]  eta: 0:02:59  lr: 0.000129  min_lr: 0.000129  loss: 2.6464 (2.6374)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4259 (1.5119)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [267]  [2400/2502]  eta: 0:01:00  lr: 0.000129  min_lr: 0.000129  loss: 2.5956 (2.6392)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4503 (1.5158)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [267]  [2501/2502]  eta: 0:00:00  lr: 0.000129  min_lr: 0.000129  loss: 2.3094 (2.6380)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4961 (1.5149)  time: 0.5329  data: 0.0007  max mem: 53905
Epoch: [267] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.000129  min_lr: 0.000129  loss: 2.3094 (2.6402)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4961 (1.5149)
Test:  [ 0/50]  eta: 0:02:36  loss: 0.4403 (0.4403)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.1287  data: 2.7994  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.6754 (0.6743)  acc1: 89.2000 (89.0909)  acc5: 99.2000 (98.4000)  time: 0.5595  data: 0.2714  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.7058 (0.7108)  acc1: 86.8000 (88.2857)  acc5: 98.0000 (98.3048)  time: 0.2937  data: 0.0096  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.8571 (0.7961)  acc1: 84.4000 (86.0903)  acc5: 97.2000 (97.4968)  time: 0.2882  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 0.9851 (0.8421)  acc1: 80.4000 (84.7610)  acc5: 95.6000 (97.1024)  time: 0.2920  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 0.9737 (0.8567)  acc1: 80.0000 (84.2720)  acc5: 96.0000 (97.0400)  time: 0.2892  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3505 s / it)
* Acc@1 84.390 Acc@5 97.012 loss 0.848
Accuracy of the model on the 50000 test images: 84.4%
Max accuracy: 84.40%
Epoch: [268]  [   0/2502]  eta: 1:56:21  lr: 0.000128  min_lr: 0.000128  loss: 2.4208 (2.4208)  weight_decay: 0.0500 (0.0500)  time: 2.7902  data: 1.6498  max mem: 53905
Epoch: [268]  [ 200/2502]  eta: 0:23:11  lr: 0.000128  min_lr: 0.000128  loss: 2.7347 (2.6545)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4387 (1.4995)  time: 0.5926  data: 0.0005  max mem: 53905
Epoch: [268]  [ 400/2502]  eta: 0:20:59  lr: 0.000127  min_lr: 0.000127  loss: 2.8835 (2.6647)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4711 (1.5098)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [268]  [ 600/2502]  eta: 0:18:55  lr: 0.000127  min_lr: 0.000127  loss: 2.6625 (2.6641)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4910 (1.5085)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [268]  [ 800/2502]  eta: 0:16:54  lr: 0.000126  min_lr: 0.000126  loss: 2.7122 (2.6726)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4409 (1.5004)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [268]  [1000/2502]  eta: 0:14:54  lr: 0.000125  min_lr: 0.000125  loss: 2.8785 (2.6631)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5979 (1.5217)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [268]  [1200/2502]  eta: 0:12:54  lr: 0.000125  min_lr: 0.000125  loss: 2.7256 (2.6572)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4405 (1.5284)  time: 0.5974  data: 0.0004  max mem: 53905
Epoch: [268]  [1400/2502]  eta: 0:10:55  lr: 0.000124  min_lr: 0.000124  loss: 2.7993 (2.6510)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5089 (1.5225)  time: 0.5924  data: 0.0006  max mem: 53905
Epoch: [268]  [1600/2502]  eta: 0:08:56  lr: 0.000124  min_lr: 0.000124  loss: 2.8216 (2.6529)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4110 (1.5132)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [268]  [1800/2502]  eta: 0:06:57  lr: 0.000123  min_lr: 0.000123  loss: 2.7358 (2.6522)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5589 (1.5189)  time: 0.5975  data: 0.0005  max mem: 53905
Epoch: [268]  [2000/2502]  eta: 0:04:58  lr: 0.000122  min_lr: 0.000122  loss: 2.7276 (2.6488)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3699 (1.5180)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [268]  [2200/2502]  eta: 0:02:59  lr: 0.000122  min_lr: 0.000122  loss: 2.4433 (2.6475)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4856 (1.5278)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [268]  [2400/2502]  eta: 0:01:00  lr: 0.000121  min_lr: 0.000121  loss: 2.6988 (2.6446)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6100 (1.5263)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [268]  [2501/2502]  eta: 0:00:00  lr: 0.000121  min_lr: 0.000121  loss: 2.7295 (2.6458)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4133 (1.5251)  time: 0.5334  data: 0.0007  max mem: 53905
Epoch: [268] Total time: 0:24:45 (0.5938 s / it)
Averaged stats: lr: 0.000121  min_lr: 0.000121  loss: 2.7295 (2.6421)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4133 (1.5251)
Test:  [ 0/50]  eta: 0:03:21  loss: 0.5833 (0.5833)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 4.0242  data: 3.6914  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.7857 (0.7971)  acc1: 89.6000 (89.0909)  acc5: 98.4000 (98.2546)  time: 0.6244  data: 0.3360  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8287 (0.8361)  acc1: 85.6000 (87.9048)  acc5: 98.0000 (98.0952)  time: 0.2845  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9561 (0.9228)  acc1: 84.4000 (86.0516)  acc5: 96.8000 (97.2774)  time: 0.2846  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1440 (0.9740)  acc1: 80.0000 (84.6342)  acc5: 95.6000 (96.9463)  time: 0.2846  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1440 (0.9928)  acc1: 79.2000 (84.0480)  acc5: 96.0000 (96.8640)  time: 0.2844  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3616 s / it)
* Acc@1 84.370 Acc@5 96.968 loss 0.982
Accuracy of the model on the 50000 test images: 84.4%
Max accuracy: 84.40%
Epoch: [269]  [   0/2502]  eta: 1:54:31  lr: 0.000121  min_lr: 0.000121  loss: 2.4772 (2.4772)  weight_decay: 0.0500 (0.0500)  time: 2.7464  data: 1.5690  max mem: 53905
Epoch: [269]  [ 200/2502]  eta: 0:23:07  lr: 0.000120  min_lr: 0.000120  loss: 2.8012 (2.6468)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3981 (1.4761)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [269]  [ 400/2502]  eta: 0:20:57  lr: 0.000120  min_lr: 0.000120  loss: 2.8224 (2.6528)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4734 (1.5101)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [269]  [ 600/2502]  eta: 0:18:54  lr: 0.000119  min_lr: 0.000119  loss: 2.7864 (2.6382)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5087 (1.5629)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [269]  [ 800/2502]  eta: 0:16:52  lr: 0.000118  min_lr: 0.000118  loss: 2.4305 (2.6326)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4221 (1.5475)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [269]  [1000/2502]  eta: 0:14:53  lr: 0.000118  min_lr: 0.000118  loss: 2.7395 (2.6339)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4018 (1.5570)  time: 0.5950  data: 0.0006  max mem: 53905
Epoch: [269]  [1200/2502]  eta: 0:12:54  lr: 0.000117  min_lr: 0.000117  loss: 2.8124 (2.6356)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4502 (1.5506)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [269]  [1400/2502]  eta: 0:10:54  lr: 0.000116  min_lr: 0.000116  loss: 2.4780 (2.6318)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4462 (1.5498)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [269]  [1600/2502]  eta: 0:08:55  lr: 0.000116  min_lr: 0.000116  loss: 2.7113 (2.6322)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4947 (1.5530)  time: 0.5951  data: 0.0004  max mem: 53905
Epoch: [269]  [1800/2502]  eta: 0:06:56  lr: 0.000115  min_lr: 0.000115  loss: 2.6837 (2.6319)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5470 (1.5549)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [269]  [2000/2502]  eta: 0:04:58  lr: 0.000115  min_lr: 0.000115  loss: 2.7515 (2.6341)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4271 (1.5529)  time: 0.5989  data: 0.0005  max mem: 53905
Epoch: [269]  [2200/2502]  eta: 0:02:59  lr: 0.000114  min_lr: 0.000114  loss: 2.8017 (2.6327)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4950 (1.5501)  time: 0.6016  data: 0.0005  max mem: 53905
Epoch: [269]  [2400/2502]  eta: 0:01:00  lr: 0.000114  min_lr: 0.000114  loss: 2.6835 (2.6320)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4435 (1.5554)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [269]  [2501/2502]  eta: 0:00:00  lr: 0.000113  min_lr: 0.000113  loss: 2.7535 (2.6315)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5031 (1.5533)  time: 0.5335  data: 0.0009  max mem: 53905
Epoch: [269] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.000113  min_lr: 0.000113  loss: 2.7535 (2.6388)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5031 (1.5533)
Test:  [ 0/50]  eta: 0:02:39  loss: 0.5789 (0.5789)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.1983  data: 2.8622  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.8355 (0.8257)  acc1: 88.8000 (89.1636)  acc5: 98.4000 (98.2546)  time: 0.5499  data: 0.2607  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8675 (0.8636)  acc1: 86.4000 (88.0000)  acc5: 98.4000 (98.1143)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9764 (0.9467)  acc1: 84.0000 (85.9613)  acc5: 96.8000 (97.1871)  time: 0.2854  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1330 (0.9930)  acc1: 81.2000 (84.6732)  acc5: 95.2000 (96.8293)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1389 (1.0127)  acc1: 80.0000 (84.1200)  acc5: 96.0000 (96.8000)  time: 0.2850  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3456 s / it)
* Acc@1 84.386 Acc@5 96.974 loss 1.005
Accuracy of the model on the 50000 test images: 84.4%
Max accuracy: 84.40%
Epoch: [270]  [   0/2502]  eta: 1:57:22  lr: 0.000113  min_lr: 0.000113  loss: 1.8222 (1.8222)  weight_decay: 0.0500 (0.0500)  time: 2.8147  data: 1.9989  max mem: 53905
Epoch: [270]  [ 200/2502]  eta: 0:23:12  lr: 0.000113  min_lr: 0.000113  loss: 2.6991 (2.6059)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5669 (1.5645)  time: 0.6007  data: 0.0004  max mem: 53905
Epoch: [270]  [ 400/2502]  eta: 0:20:58  lr: 0.000112  min_lr: 0.000112  loss: 2.7666 (2.6368)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4622 (1.5515)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [270]  [ 600/2502]  eta: 0:18:55  lr: 0.000111  min_lr: 0.000111  loss: 2.4824 (2.6342)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5373 (1.5577)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [270]  [ 800/2502]  eta: 0:16:54  lr: 0.000111  min_lr: 0.000111  loss: 2.7523 (2.6345)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4945 (1.5559)  time: 0.5953  data: 0.0004  max mem: 53905
Epoch: [270]  [1000/2502]  eta: 0:14:54  lr: 0.000110  min_lr: 0.000110  loss: 2.7960 (2.6297)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5252 (1.5575)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [270]  [1200/2502]  eta: 0:12:54  lr: 0.000110  min_lr: 0.000110  loss: 2.7462 (2.6368)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4441 (1.5548)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [270]  [1400/2502]  eta: 0:10:55  lr: 0.000109  min_lr: 0.000109  loss: 2.6963 (2.6362)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4088 (1.5474)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [270]  [1600/2502]  eta: 0:08:56  lr: 0.000109  min_lr: 0.000109  loss: 2.6216 (2.6328)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5643 (1.5542)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [270]  [1800/2502]  eta: 0:06:57  lr: 0.000108  min_lr: 0.000108  loss: 2.7259 (2.6400)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4731 (1.5690)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [270]  [2000/2502]  eta: 0:04:58  lr: 0.000107  min_lr: 0.000107  loss: 2.5875 (2.6362)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5535 (1.5686)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [270]  [2200/2502]  eta: 0:02:59  lr: 0.000107  min_lr: 0.000107  loss: 2.8426 (2.6328)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5396 (1.5684)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [270]  [2400/2502]  eta: 0:01:00  lr: 0.000106  min_lr: 0.000106  loss: 2.7928 (2.6325)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4876 (1.5683)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [270]  [2501/2502]  eta: 0:00:00  lr: 0.000106  min_lr: 0.000106  loss: 2.8091 (2.6349)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4742 (1.5672)  time: 0.5330  data: 0.0007  max mem: 53905
Epoch: [270] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.000106  min_lr: 0.000106  loss: 2.8091 (2.6294)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4742 (1.5672)
Test:  [ 0/50]  eta: 0:03:30  loss: 0.6040 (0.6040)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 4.2063  data: 3.8898  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 0.8622 (0.8616)  acc1: 89.2000 (89.1273)  acc5: 98.4000 (98.1818)  time: 0.6418  data: 0.3540  max mem: 53905
Test:  [20/50]  eta: 0:00:14  loss: 0.8786 (0.8962)  acc1: 86.0000 (87.9810)  acc5: 98.4000 (98.1143)  time: 0.2855  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.0409 (0.9855)  acc1: 83.6000 (86.0387)  acc5: 96.8000 (97.2645)  time: 0.2856  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1974 (1.0368)  acc1: 80.4000 (84.6634)  acc5: 95.6000 (96.9268)  time: 0.2853  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.2116 (1.0536)  acc1: 80.4000 (84.1520)  acc5: 96.0000 (96.8480)  time: 0.2850  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3662 s / it)
* Acc@1 84.412 Acc@5 96.970 loss 1.044
Accuracy of the model on the 50000 test images: 84.4%
Max accuracy: 84.41%
Epoch: [271]  [   0/2502]  eta: 1:44:15  lr: 0.000106  min_lr: 0.000106  loss: 2.8919 (2.8919)  weight_decay: 0.0500 (0.0500)  time: 2.5001  data: 1.8985  max mem: 53905
Epoch: [271]  [ 200/2502]  eta: 0:23:07  lr: 0.000105  min_lr: 0.000105  loss: 2.7681 (2.6231)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5632 (1.6304)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [271]  [ 400/2502]  eta: 0:20:57  lr: 0.000105  min_lr: 0.000105  loss: 2.7333 (2.6287)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5612 (1.5934)  time: 0.5939  data: 0.0004  max mem: 53905
Epoch: [271]  [ 600/2502]  eta: 0:18:55  lr: 0.000104  min_lr: 0.000104  loss: 2.7718 (2.6456)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4894 (1.5788)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [271]  [ 800/2502]  eta: 0:16:53  lr: 0.000104  min_lr: 0.000104  loss: 2.6853 (2.6456)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5096 (1.5660)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [271]  [1000/2502]  eta: 0:14:53  lr: 0.000103  min_lr: 0.000103  loss: 2.6866 (2.6342)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4740 (1.5596)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [271]  [1200/2502]  eta: 0:12:54  lr: 0.000102  min_lr: 0.000102  loss: 2.6377 (2.6295)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4772 (1.5597)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [271]  [1400/2502]  eta: 0:10:54  lr: 0.000102  min_lr: 0.000102  loss: 2.7332 (2.6262)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5337 (1.5674)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [271]  [1600/2502]  eta: 0:08:56  lr: 0.000101  min_lr: 0.000101  loss: 2.5183 (2.6284)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5541 (1.5716)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [271]  [1800/2502]  eta: 0:06:57  lr: 0.000101  min_lr: 0.000101  loss: 2.7037 (2.6315)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4881 (1.5673)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [271]  [2000/2502]  eta: 0:04:58  lr: 0.000100  min_lr: 0.000100  loss: 2.7684 (2.6333)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5938 (1.5786)  time: 0.5971  data: 0.0005  max mem: 53905
Epoch: [271]  [2200/2502]  eta: 0:02:59  lr: 0.000100  min_lr: 0.000100  loss: 2.7799 (2.6390)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5047 (1.5840)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [271]  [2400/2502]  eta: 0:01:00  lr: 0.000099  min_lr: 0.000099  loss: 2.5774 (2.6360)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5085 (1.5859)  time: 0.5980  data: 0.0004  max mem: 53905
Epoch: [271]  [2501/2502]  eta: 0:00:00  lr: 0.000099  min_lr: 0.000099  loss: 2.8049 (2.6381)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5015 (1.5900)  time: 0.5332  data: 0.0007  max mem: 53905
Epoch: [271] Total time: 0:24:45 (0.5935 s / it)
Averaged stats: lr: 0.000099  min_lr: 0.000099  loss: 2.8049 (2.6275)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5015 (1.5900)
Test:  [ 0/50]  eta: 0:03:22  loss: 0.5664 (0.5664)  acc1: 95.6000 (95.6000)  acc5: 99.2000 (99.2000)  time: 4.0413  data: 3.7345  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 0.8270 (0.8147)  acc1: 88.4000 (89.1273)  acc5: 98.8000 (98.2546)  time: 0.6263  data: 0.3399  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8479 (0.8472)  acc1: 86.4000 (88.1714)  acc5: 98.0000 (98.1333)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9919 (0.9317)  acc1: 84.4000 (86.1419)  acc5: 96.8000 (97.3290)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1369 (0.9783)  acc1: 80.8000 (84.8098)  acc5: 95.6000 (96.9561)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1381 (0.9965)  acc1: 80.4000 (84.3200)  acc5: 95.6000 (96.8640)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3632 s / it)
* Acc@1 84.410 Acc@5 96.956 loss 0.989
Accuracy of the model on the 50000 test images: 84.4%
Max accuracy: 84.41%
Epoch: [272]  [   0/2502]  eta: 1:54:58  lr: 0.000099  min_lr: 0.000099  loss: 2.8685 (2.8685)  weight_decay: 0.0500 (0.0500)  time: 2.7571  data: 1.4885  max mem: 53905
Epoch: [272]  [ 200/2502]  eta: 0:23:12  lr: 0.000098  min_lr: 0.000098  loss: 2.8033 (2.6860)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4975 (1.5315)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [272]  [ 400/2502]  eta: 0:20:58  lr: 0.000098  min_lr: 0.000098  loss: 2.8858 (2.6695)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4771 (1.5709)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [272]  [ 600/2502]  eta: 0:18:55  lr: 0.000097  min_lr: 0.000097  loss: 2.2950 (2.6553)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5423 (1.5703)  time: 0.6007  data: 0.0005  max mem: 53905
Epoch: [272]  [ 800/2502]  eta: 0:16:54  lr: 0.000097  min_lr: 0.000097  loss: 2.6965 (2.6456)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4480 (1.5788)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [272]  [1000/2502]  eta: 0:14:54  lr: 0.000096  min_lr: 0.000096  loss: 2.6910 (2.6407)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5346 (1.5881)  time: 0.5933  data: 0.0004  max mem: 53905
Epoch: [272]  [1200/2502]  eta: 0:12:55  lr: 0.000096  min_lr: 0.000096  loss: 2.7500 (2.6418)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5800 (1.5932)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [272]  [1400/2502]  eta: 0:10:55  lr: 0.000095  min_lr: 0.000095  loss: 2.7149 (2.6405)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4917 (1.5921)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [272]  [1600/2502]  eta: 0:08:56  lr: 0.000094  min_lr: 0.000094  loss: 2.7618 (2.6388)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5016 (1.5875)  time: 0.5904  data: 0.0005  max mem: 53905
Epoch: [272]  [1800/2502]  eta: 0:06:57  lr: 0.000094  min_lr: 0.000094  loss: 2.6384 (2.6340)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4817 (1.5836)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [272]  [2000/2502]  eta: 0:04:58  lr: 0.000093  min_lr: 0.000093  loss: 2.7198 (2.6343)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4555 (1.5818)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [272]  [2200/2502]  eta: 0:02:59  lr: 0.000093  min_lr: 0.000093  loss: 2.7432 (2.6319)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5548 (1.5858)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [272]  [2400/2502]  eta: 0:01:00  lr: 0.000092  min_lr: 0.000092  loss: 2.6137 (2.6294)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6363 (1.5873)  time: 0.5969  data: 0.0004  max mem: 53905
Epoch: [272]  [2501/2502]  eta: 0:00:00  lr: 0.000092  min_lr: 0.000092  loss: 2.7148 (2.6313)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5557 (1.5864)  time: 0.5341  data: 0.0007  max mem: 53905
Epoch: [272] Total time: 0:24:46 (0.5941 s / it)
Averaged stats: lr: 0.000092  min_lr: 0.000092  loss: 2.7148 (2.6283)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5557 (1.5864)
Test:  [ 0/50]  eta: 0:03:18  loss: 0.6338 (0.6338)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.9607  data: 3.6275  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8529 (0.8619)  acc1: 90.0000 (89.2000)  acc5: 99.2000 (98.2909)  time: 0.6191  data: 0.3302  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8908 (0.9014)  acc1: 86.0000 (88.0762)  acc5: 98.0000 (98.2286)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 1.0481 (0.9875)  acc1: 84.8000 (86.2323)  acc5: 97.2000 (97.3548)  time: 0.2855  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1823 (1.0361)  acc1: 81.2000 (84.8000)  acc5: 95.6000 (96.9659)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1953 (1.0534)  acc1: 79.2000 (84.2240)  acc5: 96.4000 (96.9200)  time: 0.2848  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3609 s / it)
* Acc@1 84.412 Acc@5 97.048 loss 1.045
Accuracy of the model on the 50000 test images: 84.4%
Max accuracy: 84.41%
Epoch: [273]  [   0/2502]  eta: 1:52:42  lr: 0.000092  min_lr: 0.000092  loss: 2.5204 (2.5204)  weight_decay: 0.0500 (0.0500)  time: 2.7028  data: 2.1009  max mem: 53905
Epoch: [273]  [ 200/2502]  eta: 0:23:08  lr: 0.000092  min_lr: 0.000092  loss: 2.5098 (2.5673)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4569 (inf)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [273]  [ 400/2502]  eta: 0:20:58  lr: 0.000091  min_lr: 0.000091  loss: 2.7204 (2.6043)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5073 (inf)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [273]  [ 600/2502]  eta: 0:18:55  lr: 0.000090  min_lr: 0.000090  loss: 2.8137 (2.6202)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5715 (inf)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [273]  [ 800/2502]  eta: 0:16:54  lr: 0.000090  min_lr: 0.000090  loss: 2.8392 (2.6273)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5369 (inf)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [273]  [1000/2502]  eta: 0:14:54  lr: 0.000089  min_lr: 0.000089  loss: 2.7681 (2.6313)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4766 (inf)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [273]  [1200/2502]  eta: 0:12:55  lr: 0.000089  min_lr: 0.000089  loss: 2.5651 (2.6266)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5148 (inf)  time: 0.5932  data: 0.0004  max mem: 53905
Epoch: [273]  [1400/2502]  eta: 0:10:55  lr: 0.000088  min_lr: 0.000088  loss: 2.6424 (2.6247)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5878 (inf)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [273]  [1600/2502]  eta: 0:08:56  lr: 0.000088  min_lr: 0.000088  loss: 2.6196 (2.6196)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4706 (inf)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [273]  [1800/2502]  eta: 0:06:57  lr: 0.000087  min_lr: 0.000087  loss: 2.5075 (2.6224)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5058 (inf)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [273]  [2000/2502]  eta: 0:04:58  lr: 0.000087  min_lr: 0.000087  loss: 2.7080 (2.6238)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5159 (inf)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [273]  [2200/2502]  eta: 0:02:59  lr: 0.000086  min_lr: 0.000086  loss: 2.7227 (2.6258)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4950 (inf)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [273]  [2400/2502]  eta: 0:01:00  lr: 0.000086  min_lr: 0.000086  loss: 2.7814 (2.6260)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5282 (inf)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [273]  [2501/2502]  eta: 0:00:00  lr: 0.000085  min_lr: 0.000085  loss: 2.7276 (2.6249)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6032 (inf)  time: 0.5327  data: 0.0008  max mem: 53905
Epoch: [273] Total time: 0:24:45 (0.5938 s / it)
Averaged stats: lr: 0.000085  min_lr: 0.000085  loss: 2.7276 (2.6215)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6032 (inf)
Test:  [ 0/50]  eta: 0:02:53  loss: 0.5346 (0.5346)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.4672  data: 3.1313  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7723 (0.7806)  acc1: 89.2000 (89.3091)  acc5: 98.4000 (98.1091)  time: 0.5745  data: 0.2875  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8129 (0.8145)  acc1: 86.4000 (88.2857)  acc5: 98.0000 (98.1143)  time: 0.2841  data: 0.0018  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9629 (0.8989)  acc1: 84.8000 (86.2710)  acc5: 97.2000 (97.3161)  time: 0.2830  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1024 (0.9482)  acc1: 81.2000 (84.9659)  acc5: 95.6000 (96.8878)  time: 0.2827  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1024 (0.9654)  acc1: 80.8000 (84.4400)  acc5: 95.6000 (96.8160)  time: 0.2824  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3501 s / it)
* Acc@1 84.524 Acc@5 96.996 loss 0.956
Accuracy of the model on the 50000 test images: 84.5%
Max accuracy: 84.52%
Epoch: [274]  [   0/2502]  eta: 1:33:53  lr: 0.000085  min_lr: 0.000085  loss: 2.6974 (2.6974)  weight_decay: 0.0500 (0.0500)  time: 2.2515  data: 1.6599  max mem: 53905
Epoch: [274]  [ 200/2502]  eta: 0:23:05  lr: 0.000085  min_lr: 0.000085  loss: 2.7790 (2.6064)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4540 (1.6180)  time: 0.5980  data: 0.0005  max mem: 53905
Epoch: [274]  [ 400/2502]  eta: 0:20:55  lr: 0.000084  min_lr: 0.000084  loss: 2.7787 (2.5976)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5188 (1.5974)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [274]  [ 600/2502]  eta: 0:18:54  lr: 0.000084  min_lr: 0.000084  loss: 2.7990 (2.6158)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5109 (1.6010)  time: 0.6000  data: 0.0004  max mem: 53905
Epoch: [274]  [ 800/2502]  eta: 0:16:54  lr: 0.000083  min_lr: 0.000083  loss: 2.7223 (2.6321)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5719 (1.6194)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [274]  [1000/2502]  eta: 0:14:54  lr: 0.000083  min_lr: 0.000083  loss: 2.5586 (2.6272)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5535 (1.6175)  time: 0.5939  data: 0.0005  max mem: 53905
Epoch: [274]  [1200/2502]  eta: 0:12:55  lr: 0.000082  min_lr: 0.000082  loss: 2.8316 (2.6313)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5301 (1.6061)  time: 0.5945  data: 0.0005  max mem: 53905
Epoch: [274]  [1400/2502]  eta: 0:10:55  lr: 0.000082  min_lr: 0.000082  loss: 2.6344 (2.6296)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5505 (1.6169)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [274]  [1600/2502]  eta: 0:08:56  lr: 0.000081  min_lr: 0.000081  loss: 2.8311 (2.6278)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5588 (1.6127)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [274]  [1800/2502]  eta: 0:06:57  lr: 0.000081  min_lr: 0.000081  loss: 2.6470 (2.6240)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5669 (1.6179)  time: 0.5990  data: 0.0005  max mem: 53905
Epoch: [274]  [2000/2502]  eta: 0:04:58  lr: 0.000080  min_lr: 0.000080  loss: 2.7907 (2.6235)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5550 (1.6130)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [274]  [2200/2502]  eta: 0:02:59  lr: 0.000080  min_lr: 0.000080  loss: 2.8100 (2.6197)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4504 (1.6049)  time: 0.6004  data: 0.0006  max mem: 53905
Epoch: [274]  [2400/2502]  eta: 0:01:00  lr: 0.000079  min_lr: 0.000079  loss: 2.7199 (2.6197)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5969 (1.6016)  time: 0.5960  data: 0.0006  max mem: 53905
Epoch: [274]  [2501/2502]  eta: 0:00:00  lr: 0.000079  min_lr: 0.000079  loss: 2.7895 (2.6195)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4707 (1.5977)  time: 0.5331  data: 0.0007  max mem: 53905
Epoch: [274] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.000079  min_lr: 0.000079  loss: 2.7895 (2.6147)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4707 (1.5977)
Test:  [ 0/50]  eta: 0:03:09  loss: 0.5449 (0.5449)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.7919  data: 3.4460  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.8066 (0.7907)  acc1: 88.8000 (89.0182)  acc5: 98.4000 (98.1455)  time: 0.6037  data: 0.3138  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8070 (0.8274)  acc1: 85.6000 (88.0191)  acc5: 98.0000 (98.1905)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9691 (0.9160)  acc1: 84.4000 (86.1032)  acc5: 97.2000 (97.3806)  time: 0.2853  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1292 (0.9682)  acc1: 80.8000 (84.6829)  acc5: 95.6000 (97.0342)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1140 (0.9843)  acc1: 79.6000 (84.3040)  acc5: 96.0000 (96.9920)  time: 0.2847  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3574 s / it)
* Acc@1 84.462 Acc@5 97.078 loss 0.974
Accuracy of the model on the 50000 test images: 84.5%
Max accuracy: 84.52%
Epoch: [275]  [   0/2502]  eta: 1:47:19  lr: 0.000079  min_lr: 0.000079  loss: 3.0094 (3.0094)  weight_decay: 0.0500 (0.0500)  time: 2.5739  data: 1.8969  max mem: 53905
Epoch: [275]  [ 200/2502]  eta: 0:23:06  lr: 0.000079  min_lr: 0.000079  loss: 2.8302 (2.6776)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4680 (1.4750)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [275]  [ 400/2502]  eta: 0:20:58  lr: 0.000078  min_lr: 0.000078  loss: 2.6907 (2.6547)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4637 (1.5053)  time: 0.6020  data: 0.0004  max mem: 53905
Epoch: [275]  [ 600/2502]  eta: 0:18:54  lr: 0.000078  min_lr: 0.000078  loss: 2.6634 (2.6413)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5009 (1.5273)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [275]  [ 800/2502]  eta: 0:16:54  lr: 0.000077  min_lr: 0.000077  loss: 2.7016 (2.6318)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4994 (1.5312)  time: 0.6013  data: 0.0004  max mem: 53905
Epoch: [275]  [1000/2502]  eta: 0:14:54  lr: 0.000077  min_lr: 0.000077  loss: 2.8120 (2.6356)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6192 (1.5505)  time: 0.5958  data: 0.0004  max mem: 53905
Epoch: [275]  [1200/2502]  eta: 0:12:54  lr: 0.000076  min_lr: 0.000076  loss: 2.4490 (2.6372)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4288 (1.5525)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [275]  [1400/2502]  eta: 0:10:55  lr: 0.000076  min_lr: 0.000076  loss: 2.5187 (2.6352)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4554 (1.5467)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [275]  [1600/2502]  eta: 0:08:56  lr: 0.000075  min_lr: 0.000075  loss: 2.7946 (2.6342)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4862 (1.5512)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [275]  [1800/2502]  eta: 0:06:57  lr: 0.000075  min_lr: 0.000075  loss: 2.7086 (2.6391)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6024 (1.5633)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [275]  [2000/2502]  eta: 0:04:58  lr: 0.000074  min_lr: 0.000074  loss: 2.7381 (2.6357)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5074 (1.5588)  time: 0.5916  data: 0.0006  max mem: 53905
Epoch: [275]  [2200/2502]  eta: 0:02:59  lr: 0.000074  min_lr: 0.000074  loss: 2.5495 (2.6314)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5501 (1.5628)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [275]  [2400/2502]  eta: 0:01:00  lr: 0.000073  min_lr: 0.000073  loss: 2.7717 (2.6284)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5922 (1.5666)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [275]  [2501/2502]  eta: 0:00:00  lr: 0.000073  min_lr: 0.000073  loss: 2.8876 (2.6283)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5002 (1.5653)  time: 0.5322  data: 0.0007  max mem: 53905
Epoch: [275] Total time: 0:24:45 (0.5936 s / it)
Averaged stats: lr: 0.000073  min_lr: 0.000073  loss: 2.8876 (2.6221)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5002 (1.5653)
Test:  [ 0/50]  eta: 0:02:48  loss: 0.5786 (0.5786)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.3734  data: 3.0607  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.8251 (0.8180)  acc1: 88.8000 (89.2364)  acc5: 98.8000 (98.2182)  time: 0.5782  data: 0.2936  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8509 (0.8517)  acc1: 86.4000 (88.0952)  acc5: 98.0000 (98.1714)  time: 0.2908  data: 0.0087  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9745 (0.9389)  acc1: 84.0000 (86.1936)  acc5: 97.2000 (97.3936)  time: 0.2829  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1401 (0.9895)  acc1: 80.4000 (84.8390)  acc5: 95.6000 (97.0342)  time: 0.2827  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1324 (1.0091)  acc1: 80.8000 (84.3200)  acc5: 96.0000 (96.9360)  time: 0.2826  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3500 s / it)
* Acc@1 84.522 Acc@5 97.038 loss 0.999
Accuracy of the model on the 50000 test images: 84.5%
Max accuracy: 84.52%
Epoch: [276]  [   0/2502]  eta: 1:59:37  lr: 0.000073  min_lr: 0.000073  loss: 2.9690 (2.9690)  weight_decay: 0.0500 (0.0500)  time: 2.8685  data: 2.1249  max mem: 53905
Epoch: [276]  [ 200/2502]  eta: 0:23:13  lr: 0.000073  min_lr: 0.000073  loss: 2.7459 (2.6545)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5658 (1.5959)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [276]  [ 400/2502]  eta: 0:20:58  lr: 0.000072  min_lr: 0.000072  loss: 2.6741 (2.6162)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5272 (1.5801)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [276]  [ 600/2502]  eta: 0:18:55  lr: 0.000072  min_lr: 0.000072  loss: 2.7487 (2.6162)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5225 (1.5800)  time: 0.5908  data: 0.0004  max mem: 53905
Epoch: [276]  [ 800/2502]  eta: 0:16:54  lr: 0.000071  min_lr: 0.000071  loss: 2.5390 (2.6158)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5037 (1.5729)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [276]  [1000/2502]  eta: 0:14:54  lr: 0.000071  min_lr: 0.000071  loss: 2.4261 (2.6110)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5740 (1.5729)  time: 0.6021  data: 0.0005  max mem: 53905
Epoch: [276]  [1200/2502]  eta: 0:12:54  lr: 0.000070  min_lr: 0.000070  loss: 2.8601 (2.6143)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4672 (1.5883)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [276]  [1400/2502]  eta: 0:10:55  lr: 0.000070  min_lr: 0.000070  loss: 2.7174 (2.6177)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5533 (1.5956)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [276]  [1600/2502]  eta: 0:08:56  lr: 0.000069  min_lr: 0.000069  loss: 2.8115 (2.6131)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5432 (1.5899)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [276]  [1800/2502]  eta: 0:06:57  lr: 0.000069  min_lr: 0.000069  loss: 2.6446 (2.6123)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5881 (1.5894)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [276]  [2000/2502]  eta: 0:04:58  lr: 0.000068  min_lr: 0.000068  loss: 2.7883 (2.6157)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5626 (1.5880)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [276]  [2200/2502]  eta: 0:02:59  lr: 0.000068  min_lr: 0.000068  loss: 2.6455 (2.6142)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5330 (1.5920)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [276]  [2400/2502]  eta: 0:01:00  lr: 0.000067  min_lr: 0.000067  loss: 2.6150 (2.6178)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4194 (1.5933)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [276]  [2501/2502]  eta: 0:00:00  lr: 0.000067  min_lr: 0.000067  loss: 2.6371 (2.6172)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5591 (1.5904)  time: 0.5333  data: 0.0007  max mem: 53905
Epoch: [276] Total time: 0:24:44 (0.5935 s / it)
Averaged stats: lr: 0.000067  min_lr: 0.000067  loss: 2.6371 (2.6127)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5591 (1.5904)
Test:  [ 0/50]  eta: 0:02:31  loss: 0.5303 (0.5303)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.0363  data: 2.7334  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.7739 (0.7754)  acc1: 89.2000 (89.4546)  acc5: 98.8000 (98.3273)  time: 0.5356  data: 0.2489  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8025 (0.8110)  acc1: 86.0000 (88.3048)  acc5: 98.0000 (98.1143)  time: 0.2856  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9439 (0.8991)  acc1: 84.4000 (86.2194)  acc5: 97.2000 (97.3032)  time: 0.2857  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1094 (0.9488)  acc1: 81.2000 (84.9268)  acc5: 95.6000 (96.9854)  time: 0.2854  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0954 (0.9688)  acc1: 80.4000 (84.3360)  acc5: 96.0000 (96.8560)  time: 0.2851  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3429 s / it)
* Acc@1 84.428 Acc@5 96.986 loss 0.959
Accuracy of the model on the 50000 test images: 84.4%
Max accuracy: 84.52%
Epoch: [277]  [   0/2502]  eta: 1:57:26  lr: 0.000067  min_lr: 0.000067  loss: 1.8801 (1.8801)  weight_decay: 0.0500 (0.0500)  time: 2.8165  data: 1.7065  max mem: 53905
Epoch: [277]  [ 200/2502]  eta: 0:23:11  lr: 0.000067  min_lr: 0.000067  loss: 2.6077 (2.5776)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4640 (1.5225)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [277]  [ 400/2502]  eta: 0:20:59  lr: 0.000066  min_lr: 0.000066  loss: 2.6939 (2.5998)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5331 (1.5464)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [277]  [ 600/2502]  eta: 0:18:56  lr: 0.000066  min_lr: 0.000066  loss: 2.5535 (2.6026)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5703 (1.5578)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [277]  [ 800/2502]  eta: 0:16:55  lr: 0.000065  min_lr: 0.000065  loss: 2.7662 (2.6096)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4978 (1.5752)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [277]  [1000/2502]  eta: 0:14:55  lr: 0.000065  min_lr: 0.000065  loss: 2.7093 (2.6117)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6434 (1.5831)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [277]  [1200/2502]  eta: 0:12:55  lr: 0.000064  min_lr: 0.000064  loss: 2.4324 (2.6016)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4618 (1.5790)  time: 0.6006  data: 0.0004  max mem: 53905
Epoch: [277]  [1400/2502]  eta: 0:10:55  lr: 0.000064  min_lr: 0.000064  loss: 2.6519 (2.6005)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5960 (1.5876)  time: 0.5936  data: 0.0004  max mem: 53905
Epoch: [277]  [1600/2502]  eta: 0:08:56  lr: 0.000064  min_lr: 0.000064  loss: 2.7255 (2.6035)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5375 (nan)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [277]  [1800/2502]  eta: 0:06:57  lr: 0.000063  min_lr: 0.000063  loss: 2.5906 (2.6010)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4161 (nan)  time: 0.5935  data: 0.0004  max mem: 53905
Epoch: [277]  [2000/2502]  eta: 0:04:58  lr: 0.000063  min_lr: 0.000063  loss: 2.7441 (2.6045)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4904 (nan)  time: 0.5935  data: 0.0005  max mem: 53905
Epoch: [277]  [2200/2502]  eta: 0:02:59  lr: 0.000062  min_lr: 0.000062  loss: 2.6996 (2.6080)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6653 (nan)  time: 0.5940  data: 0.0005  max mem: 53905
Epoch: [277]  [2400/2502]  eta: 0:01:00  lr: 0.000062  min_lr: 0.000062  loss: 2.6903 (2.6145)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5476 (nan)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [277]  [2501/2502]  eta: 0:00:00  lr: 0.000062  min_lr: 0.000062  loss: 2.8557 (2.6127)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5470 (nan)  time: 0.5347  data: 0.0008  max mem: 53905
Epoch: [277] Total time: 0:24:47 (0.5946 s / it)
Averaged stats: lr: 0.000062  min_lr: 0.000062  loss: 2.8557 (2.6109)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5470 (nan)
Test:  [ 0/50]  eta: 0:03:07  loss: 0.4788 (0.4788)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.7567  data: 3.4246  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.7267 (0.7192)  acc1: 89.2000 (89.3091)  acc5: 98.8000 (98.2909)  time: 0.6086  data: 0.3206  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7630 (0.7537)  acc1: 86.0000 (88.1714)  acc5: 98.0000 (98.1524)  time: 0.2892  data: 0.0054  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.8936 (0.8421)  acc1: 84.4000 (86.3355)  acc5: 97.2000 (97.3677)  time: 0.2847  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0489 (0.8908)  acc1: 81.2000 (85.0244)  acc5: 95.6000 (97.0537)  time: 0.2843  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0239 (0.9102)  acc1: 80.8000 (84.5520)  acc5: 96.4000 (96.9600)  time: 0.2839  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3579 s / it)
* Acc@1 84.572 Acc@5 97.014 loss 0.901
Accuracy of the model on the 50000 test images: 84.6%
Max accuracy: 84.57%
Epoch: [278]  [   0/2502]  eta: 1:36:16  lr: 0.000062  min_lr: 0.000062  loss: 2.7082 (2.7082)  weight_decay: 0.0500 (0.0500)  time: 2.3086  data: 1.7009  max mem: 53905
Epoch: [278]  [ 200/2502]  eta: 0:23:08  lr: 0.000061  min_lr: 0.000061  loss: 2.7471 (2.5693)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5156 (inf)  time: 0.5934  data: 0.0007  max mem: 53905
Epoch: [278]  [ 400/2502]  eta: 0:20:56  lr: 0.000061  min_lr: 0.000061  loss: 2.4160 (2.5858)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5652 (inf)  time: 0.5937  data: 0.0004  max mem: 53905
Epoch: [278]  [ 600/2502]  eta: 0:18:54  lr: 0.000060  min_lr: 0.000060  loss: 2.7428 (2.6017)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6004 (inf)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [278]  [ 800/2502]  eta: 0:16:54  lr: 0.000060  min_lr: 0.000060  loss: 2.7664 (2.6080)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5381 (inf)  time: 0.5933  data: 0.0005  max mem: 53905
Epoch: [278]  [1000/2502]  eta: 0:14:54  lr: 0.000059  min_lr: 0.000059  loss: 2.5222 (2.6077)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5066 (inf)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [278]  [1200/2502]  eta: 0:12:54  lr: 0.000059  min_lr: 0.000059  loss: 2.8444 (2.6071)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4863 (inf)  time: 0.5983  data: 0.0005  max mem: 53905
Epoch: [278]  [1400/2502]  eta: 0:10:55  lr: 0.000059  min_lr: 0.000059  loss: 2.7368 (2.6104)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5166 (inf)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [278]  [1600/2502]  eta: 0:08:56  lr: 0.000058  min_lr: 0.000058  loss: 2.4031 (2.6011)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6184 (inf)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [278]  [1800/2502]  eta: 0:06:57  lr: 0.000058  min_lr: 0.000058  loss: 2.6921 (2.6020)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6329 (inf)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [278]  [2000/2502]  eta: 0:04:58  lr: 0.000057  min_lr: 0.000057  loss: 2.7385 (2.6026)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5143 (inf)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [278]  [2200/2502]  eta: 0:02:59  lr: 0.000057  min_lr: 0.000057  loss: 2.5708 (2.6054)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4927 (inf)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [278]  [2400/2502]  eta: 0:01:00  lr: 0.000056  min_lr: 0.000056  loss: 2.8615 (2.6075)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5325 (inf)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [278]  [2501/2502]  eta: 0:00:00  lr: 0.000056  min_lr: 0.000056  loss: 2.5638 (2.6063)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5680 (inf)  time: 0.5333  data: 0.0007  max mem: 53905
Epoch: [278] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.000056  min_lr: 0.000056  loss: 2.5638 (2.6004)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5680 (inf)
Test:  [ 0/50]  eta: 0:02:41  loss: 0.5358 (0.5358)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.2278  data: 2.8930  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7512 (0.7508)  acc1: 89.6000 (89.5636)  acc5: 98.4000 (98.2909)  time: 0.5556  data: 0.2665  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.7851 (0.7869)  acc1: 86.4000 (88.4952)  acc5: 98.4000 (98.1905)  time: 0.2869  data: 0.0022  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9244 (0.8743)  acc1: 85.2000 (86.3742)  acc5: 97.2000 (97.3806)  time: 0.2855  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1013 (0.9221)  acc1: 80.4000 (85.0342)  acc5: 95.2000 (97.0927)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0775 (0.9402)  acc1: 80.0000 (84.5040)  acc5: 96.0000 (96.9760)  time: 0.2851  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3468 s / it)
* Acc@1 84.614 Acc@5 97.068 loss 0.931
Accuracy of the model on the 50000 test images: 84.6%
Max accuracy: 84.61%
Epoch: [279]  [   0/2502]  eta: 1:43:55  lr: 0.000056  min_lr: 0.000056  loss: 2.5602 (2.5602)  weight_decay: 0.0500 (0.0500)  time: 2.4920  data: 1.8934  max mem: 53905
Epoch: [279]  [ 200/2502]  eta: 0:23:06  lr: 0.000056  min_lr: 0.000056  loss: 2.6411 (2.6114)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5641 (1.5821)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [279]  [ 400/2502]  eta: 0:20:57  lr: 0.000055  min_lr: 0.000055  loss: 2.7856 (2.5935)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4504 (1.5776)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [279]  [ 600/2502]  eta: 0:18:53  lr: 0.000055  min_lr: 0.000055  loss: 2.8450 (2.5960)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4781 (1.5671)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [279]  [ 800/2502]  eta: 0:16:52  lr: 0.000055  min_lr: 0.000055  loss: 2.6682 (2.6060)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5463 (1.5745)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [279]  [1000/2502]  eta: 0:14:53  lr: 0.000054  min_lr: 0.000054  loss: 2.4573 (2.6059)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6299 (1.5797)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [279]  [1200/2502]  eta: 0:12:53  lr: 0.000054  min_lr: 0.000054  loss: 2.7881 (2.6121)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5433 (1.5880)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [279]  [1400/2502]  eta: 0:10:54  lr: 0.000053  min_lr: 0.000053  loss: 2.7217 (2.6039)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5423 (1.5957)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [279]  [1600/2502]  eta: 0:08:55  lr: 0.000053  min_lr: 0.000053  loss: 2.7670 (2.6023)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4754 (1.5834)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [279]  [1800/2502]  eta: 0:06:56  lr: 0.000053  min_lr: 0.000053  loss: 2.4543 (2.6003)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4838 (1.5889)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [279]  [2000/2502]  eta: 0:04:57  lr: 0.000052  min_lr: 0.000052  loss: 2.6291 (2.5981)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5457 (1.5889)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [279]  [2200/2502]  eta: 0:02:59  lr: 0.000052  min_lr: 0.000052  loss: 2.8006 (2.6039)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5728 (1.5914)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [279]  [2400/2502]  eta: 0:01:00  lr: 0.000051  min_lr: 0.000051  loss: 2.5406 (2.6055)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5116 (1.5885)  time: 0.5983  data: 0.0005  max mem: 53905
Epoch: [279]  [2501/2502]  eta: 0:00:00  lr: 0.000051  min_lr: 0.000051  loss: 2.5483 (2.6068)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5875 (1.5904)  time: 0.5331  data: 0.0007  max mem: 53905
Epoch: [279] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.000051  min_lr: 0.000051  loss: 2.5483 (2.6053)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5875 (1.5904)
Test:  [ 0/50]  eta: 0:03:01  loss: 0.5185 (0.5185)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.6219  data: 3.2829  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7707 (0.7595)  acc1: 89.2000 (89.4909)  acc5: 98.4000 (98.2909)  time: 0.5967  data: 0.3076  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8019 (0.7927)  acc1: 86.0000 (88.2857)  acc5: 98.0000 (98.1905)  time: 0.2896  data: 0.0052  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9430 (0.8797)  acc1: 84.8000 (86.2323)  acc5: 97.2000 (97.3548)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0924 (0.9292)  acc1: 80.4000 (84.9366)  acc5: 95.2000 (96.9854)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0844 (0.9467)  acc1: 80.4000 (84.4240)  acc5: 96.0000 (96.8480)  time: 0.2850  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3563 s / it)
* Acc@1 84.590 Acc@5 96.982 loss 0.936
Accuracy of the model on the 50000 test images: 84.6%
Max accuracy: 84.61%
Epoch: [280]  [   0/2502]  eta: 1:50:12  lr: 0.000051  min_lr: 0.000051  loss: 2.3273 (2.3273)  weight_decay: 0.0500 (0.0500)  time: 2.6429  data: 2.0136  max mem: 53905
Epoch: [280]  [ 200/2502]  eta: 0:23:08  lr: 0.000051  min_lr: 0.000051  loss: 2.5419 (2.5851)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5076 (1.7002)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [280]  [ 400/2502]  eta: 0:20:56  lr: 0.000050  min_lr: 0.000050  loss: 2.7680 (2.6028)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5564 (1.6732)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [280]  [ 600/2502]  eta: 0:18:53  lr: 0.000050  min_lr: 0.000050  loss: 2.7899 (2.5843)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5619 (1.6700)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [280]  [ 800/2502]  eta: 0:16:52  lr: 0.000050  min_lr: 0.000050  loss: 2.7266 (2.5894)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6018 (1.6615)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [280]  [1000/2502]  eta: 0:14:53  lr: 0.000049  min_lr: 0.000049  loss: 2.6978 (2.5955)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5939 (1.6702)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [280]  [1200/2502]  eta: 0:12:53  lr: 0.000049  min_lr: 0.000049  loss: 2.6903 (2.5905)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5127 (1.6609)  time: 0.5951  data: 0.0004  max mem: 53905
Epoch: [280]  [1400/2502]  eta: 0:10:54  lr: 0.000048  min_lr: 0.000048  loss: 2.6381 (2.5831)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7367 (1.6595)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [280]  [1600/2502]  eta: 0:08:55  lr: 0.000048  min_lr: 0.000048  loss: 2.4138 (2.5868)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5177 (1.6482)  time: 0.5997  data: 0.0004  max mem: 53905
Epoch: [280]  [1800/2502]  eta: 0:06:56  lr: 0.000048  min_lr: 0.000048  loss: 2.4762 (2.5867)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4413 (1.6382)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [280]  [2000/2502]  eta: 0:04:57  lr: 0.000047  min_lr: 0.000047  loss: 2.7574 (2.5889)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4656 (1.6297)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [280]  [2200/2502]  eta: 0:02:59  lr: 0.000047  min_lr: 0.000047  loss: 2.7530 (2.5901)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5114 (1.6256)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [280]  [2400/2502]  eta: 0:01:00  lr: 0.000046  min_lr: 0.000046  loss: 2.7395 (2.5939)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5052 (1.6207)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [280]  [2501/2502]  eta: 0:00:00  lr: 0.000046  min_lr: 0.000046  loss: 2.7704 (2.5944)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4288 (1.6159)  time: 0.5328  data: 0.0007  max mem: 53905
Epoch: [280] Total time: 0:24:43 (0.5930 s / it)
Averaged stats: lr: 0.000046  min_lr: 0.000046  loss: 2.7704 (2.5978)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4288 (1.6159)
Test:  [ 0/50]  eta: 0:03:16  loss: 0.5466 (0.5466)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.9284  data: 3.5896  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.7976 (0.7872)  acc1: 88.8000 (89.6000)  acc5: 98.8000 (98.2909)  time: 0.6162  data: 0.3267  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8133 (0.8272)  acc1: 86.4000 (88.3048)  acc5: 98.4000 (98.2286)  time: 0.2851  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9552 (0.9106)  acc1: 84.0000 (86.4000)  acc5: 97.2000 (97.4452)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1167 (0.9591)  acc1: 81.6000 (85.0244)  acc5: 96.0000 (97.1707)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1107 (0.9770)  acc1: 80.0000 (84.4880)  acc5: 96.0000 (97.0320)  time: 0.2846  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3605 s / it)
* Acc@1 84.620 Acc@5 97.068 loss 0.967
Accuracy of the model on the 50000 test images: 84.6%
Max accuracy: 84.62%
Epoch: [281]  [   0/2502]  eta: 1:44:34  lr: 0.000046  min_lr: 0.000046  loss: 2.5467 (2.5467)  weight_decay: 0.0500 (0.0500)  time: 2.5079  data: 1.9172  max mem: 53905
Epoch: [281]  [ 200/2502]  eta: 0:23:08  lr: 0.000046  min_lr: 0.000046  loss: 2.8233 (2.5750)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4476 (1.5264)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [281]  [ 400/2502]  eta: 0:20:58  lr: 0.000046  min_lr: 0.000046  loss: 2.3719 (2.5961)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6107 (1.5999)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [281]  [ 600/2502]  eta: 0:18:55  lr: 0.000045  min_lr: 0.000045  loss: 2.6666 (2.5811)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5323 (1.5953)  time: 0.6016  data: 0.0004  max mem: 53905
Epoch: [281]  [ 800/2502]  eta: 0:16:54  lr: 0.000045  min_lr: 0.000045  loss: 2.7009 (2.5864)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4820 (1.6032)  time: 0.5916  data: 0.0005  max mem: 53905
Epoch: [281]  [1000/2502]  eta: 0:14:54  lr: 0.000044  min_lr: 0.000044  loss: 2.5685 (2.5916)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4998 (1.5963)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [281]  [1200/2502]  eta: 0:12:54  lr: 0.000044  min_lr: 0.000044  loss: 2.7087 (2.5967)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5185 (1.5863)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [281]  [1400/2502]  eta: 0:10:55  lr: 0.000044  min_lr: 0.000044  loss: 2.5585 (2.5869)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5411 (1.5802)  time: 0.5942  data: 0.0005  max mem: 53905
Epoch: [281]  [1600/2502]  eta: 0:08:56  lr: 0.000043  min_lr: 0.000043  loss: 2.6386 (2.5896)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5374 (1.5796)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [281]  [1800/2502]  eta: 0:06:57  lr: 0.000043  min_lr: 0.000043  loss: 2.7452 (2.5958)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5144 (1.5801)  time: 0.5994  data: 0.0005  max mem: 53905
Epoch: [281]  [2000/2502]  eta: 0:04:58  lr: 0.000043  min_lr: 0.000043  loss: 2.7103 (2.5953)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5293 (1.5910)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [281]  [2200/2502]  eta: 0:02:59  lr: 0.000042  min_lr: 0.000042  loss: 2.7656 (2.5967)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6067 (1.5921)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [281]  [2400/2502]  eta: 0:01:00  lr: 0.000042  min_lr: 0.000042  loss: 2.6003 (2.5946)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5664 (1.5971)  time: 0.5920  data: 0.0005  max mem: 53905
Epoch: [281]  [2501/2502]  eta: 0:00:00  lr: 0.000042  min_lr: 0.000042  loss: 2.6562 (2.5957)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5849 (1.5975)  time: 0.5333  data: 0.0007  max mem: 53905
Epoch: [281] Total time: 0:24:45 (0.5938 s / it)
Averaged stats: lr: 0.000042  min_lr: 0.000042  loss: 2.6562 (2.5944)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5849 (1.5975)
Test:  [ 0/50]  eta: 0:02:39  loss: 0.5270 (0.5270)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.1867  data: 2.8498  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7933 (0.7779)  acc1: 89.2000 (89.4909)  acc5: 98.4000 (98.2182)  time: 0.5711  data: 0.2826  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8100 (0.8164)  acc1: 86.0000 (88.2857)  acc5: 98.4000 (98.1524)  time: 0.2970  data: 0.0132  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9536 (0.9025)  acc1: 84.8000 (86.3613)  acc5: 97.2000 (97.3677)  time: 0.2845  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1051 (0.9521)  acc1: 81.2000 (85.0244)  acc5: 95.6000 (97.0927)  time: 0.2842  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1041 (0.9696)  acc1: 80.8000 (84.4960)  acc5: 95.6000 (96.9440)  time: 0.2839  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3500 s / it)
* Acc@1 84.594 Acc@5 97.060 loss 0.959
Accuracy of the model on the 50000 test images: 84.6%
Max accuracy: 84.62%
Epoch: [282]  [   0/2502]  eta: 1:44:58  lr: 0.000042  min_lr: 0.000042  loss: 2.8520 (2.8520)  weight_decay: 0.0500 (0.0500)  time: 2.5175  data: 1.6553  max mem: 53905
Epoch: [282]  [ 200/2502]  eta: 0:23:04  lr: 0.000041  min_lr: 0.000041  loss: 2.6831 (2.6109)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5265 (1.5698)  time: 0.5908  data: 0.0005  max mem: 53905
Epoch: [282]  [ 400/2502]  eta: 0:20:54  lr: 0.000041  min_lr: 0.000041  loss: 2.7352 (2.5899)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5625 (1.5746)  time: 0.5970  data: 0.0004  max mem: 53905
Epoch: [282]  [ 600/2502]  eta: 0:18:51  lr: 0.000041  min_lr: 0.000041  loss: 2.6991 (2.5795)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5128 (1.5701)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [282]  [ 800/2502]  eta: 0:16:52  lr: 0.000040  min_lr: 0.000040  loss: 2.7952 (2.5856)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5003 (1.5661)  time: 0.6000  data: 0.0006  max mem: 53905
Epoch: [282]  [1000/2502]  eta: 0:14:52  lr: 0.000040  min_lr: 0.000040  loss: 2.7110 (2.5891)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [282]  [1200/2502]  eta: 0:12:53  lr: 0.000040  min_lr: 0.000040  loss: 2.5533 (2.5858)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5548 (nan)  time: 0.5906  data: 0.0004  max mem: 53905
Epoch: [282]  [1400/2502]  eta: 0:10:54  lr: 0.000039  min_lr: 0.000039  loss: 2.7132 (2.5902)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4810 (nan)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [282]  [1600/2502]  eta: 0:08:55  lr: 0.000039  min_lr: 0.000039  loss: 2.7164 (2.5891)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6260 (nan)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [282]  [1800/2502]  eta: 0:06:56  lr: 0.000038  min_lr: 0.000038  loss: 2.6776 (2.5855)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5249 (nan)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [282]  [2000/2502]  eta: 0:04:57  lr: 0.000038  min_lr: 0.000038  loss: 2.5980 (2.5854)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5278 (nan)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [282]  [2200/2502]  eta: 0:02:59  lr: 0.000038  min_lr: 0.000038  loss: 2.5070 (2.5858)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5746 (nan)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [282]  [2400/2502]  eta: 0:01:00  lr: 0.000037  min_lr: 0.000037  loss: 2.8939 (2.5865)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6976 (nan)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [282]  [2501/2502]  eta: 0:00:00  lr: 0.000037  min_lr: 0.000037  loss: 2.7962 (2.5872)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5124 (nan)  time: 0.5330  data: 0.0007  max mem: 53905
Epoch: [282] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.000037  min_lr: 0.000037  loss: 2.7962 (2.5946)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5124 (nan)
Test:  [ 0/50]  eta: 0:02:45  loss: 0.5497 (0.5497)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.3164  data: 2.9942  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.7866 (0.7690)  acc1: 90.0000 (89.6364)  acc5: 98.8000 (98.2909)  time: 0.6049  data: 0.3174  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8044 (0.8093)  acc1: 86.0000 (88.4381)  acc5: 98.0000 (98.2095)  time: 0.3150  data: 0.0251  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9349 (0.8936)  acc1: 84.8000 (86.5161)  acc5: 97.2000 (97.4194)  time: 0.2913  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1009 (0.9400)  acc1: 81.2000 (85.1707)  acc5: 95.6000 (97.1415)  time: 0.2852  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0667 (0.9596)  acc1: 80.4000 (84.5200)  acc5: 96.0000 (97.0000)  time: 0.2840  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3596 s / it)
* Acc@1 84.636 Acc@5 97.098 loss 0.949
Accuracy of the model on the 50000 test images: 84.6%
Max accuracy: 84.64%
Epoch: [283]  [   0/2502]  eta: 1:43:20  lr: 0.000037  min_lr: 0.000037  loss: 2.3737 (2.3737)  weight_decay: 0.0500 (0.0500)  time: 2.4784  data: 1.8739  max mem: 53905
Epoch: [283]  [ 200/2502]  eta: 0:23:06  lr: 0.000037  min_lr: 0.000037  loss: 2.7267 (2.6149)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5751 (1.7447)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [283]  [ 400/2502]  eta: 0:20:58  lr: 0.000037  min_lr: 0.000037  loss: 2.6680 (2.5936)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4606 (1.6501)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [283]  [ 600/2502]  eta: 0:18:54  lr: 0.000036  min_lr: 0.000036  loss: 2.3507 (2.6008)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5823 (1.6493)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [283]  [ 800/2502]  eta: 0:16:53  lr: 0.000036  min_lr: 0.000036  loss: 2.6021 (2.5953)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5818 (1.6183)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [283]  [1000/2502]  eta: 0:14:53  lr: 0.000036  min_lr: 0.000036  loss: 2.5165 (2.5941)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4983 (1.6257)  time: 0.5931  data: 0.0004  max mem: 53905
Epoch: [283]  [1200/2502]  eta: 0:12:54  lr: 0.000035  min_lr: 0.000035  loss: 2.5823 (2.5951)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5096 (1.6217)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [283]  [1400/2502]  eta: 0:10:54  lr: 0.000035  min_lr: 0.000035  loss: 2.5677 (2.5935)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4272 (1.6182)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [283]  [1600/2502]  eta: 0:08:56  lr: 0.000035  min_lr: 0.000035  loss: 2.8360 (2.5947)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6638 (1.6266)  time: 0.5939  data: 0.0005  max mem: 53905
Epoch: [283]  [1800/2502]  eta: 0:06:57  lr: 0.000034  min_lr: 0.000034  loss: 2.6577 (2.5901)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5241 (1.6192)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [283]  [2000/2502]  eta: 0:04:58  lr: 0.000034  min_lr: 0.000034  loss: 2.6291 (2.5920)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4248 (1.6111)  time: 0.5960  data: 0.0004  max mem: 53905
Epoch: [283]  [2200/2502]  eta: 0:02:59  lr: 0.000034  min_lr: 0.000034  loss: 2.5633 (2.5898)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4904 (1.6201)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [283]  [2400/2502]  eta: 0:01:00  lr: 0.000033  min_lr: 0.000033  loss: 2.7384 (2.5915)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5571 (1.6148)  time: 0.5922  data: 0.0005  max mem: 53905
Epoch: [283]  [2501/2502]  eta: 0:00:00  lr: 0.000033  min_lr: 0.000033  loss: 2.7244 (2.5892)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4451 (1.6134)  time: 0.5320  data: 0.0007  max mem: 53905
Epoch: [283] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.000033  min_lr: 0.000033  loss: 2.7244 (2.5882)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4451 (1.6134)
Test:  [ 0/50]  eta: 0:02:35  loss: 0.5096 (0.5096)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.1081  data: 2.7909  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7415 (0.7341)  acc1: 89.6000 (89.6364)  acc5: 99.2000 (98.2546)  time: 0.5509  data: 0.2659  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.7832 (0.7751)  acc1: 86.4000 (88.3048)  acc5: 98.0000 (98.1143)  time: 0.2889  data: 0.0069  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9027 (0.8599)  acc1: 84.4000 (86.4258)  acc5: 97.2000 (97.4323)  time: 0.2827  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0756 (0.9076)  acc1: 81.2000 (85.1317)  acc5: 95.6000 (97.1220)  time: 0.2826  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0548 (0.9260)  acc1: 80.0000 (84.5600)  acc5: 96.4000 (96.9840)  time: 0.2824  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3442 s / it)
* Acc@1 84.578 Acc@5 97.108 loss 0.917
Accuracy of the model on the 50000 test images: 84.6%
Max accuracy: 84.64%
Epoch: [284]  [   0/2502]  eta: 1:52:04  lr: 0.000033  min_lr: 0.000033  loss: 2.4533 (2.4533)  weight_decay: 0.0500 (0.0500)  time: 2.6878  data: 1.6470  max mem: 53905
Epoch: [284]  [ 200/2502]  eta: 0:23:08  lr: 0.000033  min_lr: 0.000033  loss: 2.7507 (2.5366)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5438 (1.5471)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [284]  [ 400/2502]  eta: 0:20:56  lr: 0.000032  min_lr: 0.000032  loss: 2.6070 (2.5758)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4960 (1.5871)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [284]  [ 600/2502]  eta: 0:18:53  lr: 0.000032  min_lr: 0.000032  loss: 2.1980 (2.5602)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5423 (1.6003)  time: 0.5939  data: 0.0004  max mem: 53905
Epoch: [284]  [ 800/2502]  eta: 0:16:52  lr: 0.000032  min_lr: 0.000032  loss: 2.8218 (2.5684)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5282 (1.6156)  time: 0.5946  data: 0.0005  max mem: 53905
Epoch: [284]  [1000/2502]  eta: 0:14:53  lr: 0.000032  min_lr: 0.000032  loss: 2.5377 (2.5697)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5507 (1.6060)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [284]  [1200/2502]  eta: 0:12:54  lr: 0.000031  min_lr: 0.000031  loss: 2.5748 (2.5719)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6772 (1.6223)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [284]  [1400/2502]  eta: 0:10:54  lr: 0.000031  min_lr: 0.000031  loss: 2.7059 (2.5758)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5571 (1.6232)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [284]  [1600/2502]  eta: 0:08:55  lr: 0.000031  min_lr: 0.000031  loss: 2.8175 (2.5765)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6257 (1.6349)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [284]  [1800/2502]  eta: 0:06:56  lr: 0.000030  min_lr: 0.000030  loss: 2.6725 (2.5757)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5122 (1.6341)  time: 0.5929  data: 0.0005  max mem: 53905
Epoch: [284]  [2000/2502]  eta: 0:04:58  lr: 0.000030  min_lr: 0.000030  loss: 2.8711 (2.5756)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5389 (1.6388)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [284]  [2200/2502]  eta: 0:02:59  lr: 0.000030  min_lr: 0.000030  loss: 2.6098 (2.5787)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4894 (1.6332)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [284]  [2400/2502]  eta: 0:01:00  lr: 0.000029  min_lr: 0.000029  loss: 2.5536 (2.5793)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3957 (1.6319)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [284]  [2501/2502]  eta: 0:00:00  lr: 0.000029  min_lr: 0.000029  loss: 2.6941 (2.5783)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5933 (1.6309)  time: 0.5336  data: 0.0008  max mem: 53905
Epoch: [284] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.000029  min_lr: 0.000029  loss: 2.6941 (2.5893)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5933 (1.6309)
Test:  [ 0/50]  eta: 0:02:51  loss: 0.4982 (0.4982)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.4283  data: 3.1144  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7457 (0.7354)  acc1: 89.2000 (89.5636)  acc5: 98.8000 (98.2546)  time: 0.5708  data: 0.2835  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7685 (0.7740)  acc1: 86.8000 (88.4381)  acc5: 98.0000 (98.1714)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9150 (0.8624)  acc1: 84.8000 (86.4774)  acc5: 97.2000 (97.3290)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0797 (0.9093)  acc1: 81.2000 (85.1902)  acc5: 95.6000 (97.0342)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0421 (0.9267)  acc1: 80.8000 (84.6160)  acc5: 96.4000 (96.9200)  time: 0.2845  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3504 s / it)
* Acc@1 84.662 Acc@5 97.078 loss 0.917
Accuracy of the model on the 50000 test images: 84.7%
Max accuracy: 84.66%
Epoch: [285]  [   0/2502]  eta: 1:49:11  lr: 0.000029  min_lr: 0.000029  loss: 2.9029 (2.9029)  weight_decay: 0.0500 (0.0500)  time: 2.6187  data: 2.0100  max mem: 53905
Epoch: [285]  [ 200/2502]  eta: 0:23:08  lr: 0.000029  min_lr: 0.000029  loss: 2.8310 (2.5668)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5810 (1.6242)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [285]  [ 400/2502]  eta: 0:20:58  lr: 0.000029  min_lr: 0.000029  loss: 2.7167 (2.5590)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6644 (1.6405)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [285]  [ 600/2502]  eta: 0:18:54  lr: 0.000028  min_lr: 0.000028  loss: 2.8606 (2.5756)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5757 (1.6284)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [285]  [ 800/2502]  eta: 0:16:53  lr: 0.000028  min_lr: 0.000028  loss: 2.3832 (2.5741)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4996 (1.6431)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [285]  [1000/2502]  eta: 0:14:54  lr: 0.000028  min_lr: 0.000028  loss: 2.6256 (2.5717)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5326 (1.6268)  time: 0.5930  data: 0.0006  max mem: 53905
Epoch: [285]  [1200/2502]  eta: 0:12:54  lr: 0.000027  min_lr: 0.000027  loss: 2.5772 (2.5724)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6073 (1.6115)  time: 0.5909  data: 0.0005  max mem: 53905
Epoch: [285]  [1400/2502]  eta: 0:10:55  lr: 0.000027  min_lr: 0.000027  loss: 2.5612 (2.5728)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5638 (1.6097)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [285]  [1600/2502]  eta: 0:08:56  lr: 0.000027  min_lr: 0.000027  loss: 2.4935 (2.5718)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4875 (1.6150)  time: 0.5915  data: 0.0005  max mem: 53905
Epoch: [285]  [1800/2502]  eta: 0:06:57  lr: 0.000027  min_lr: 0.000027  loss: 2.6770 (2.5751)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6029 (1.6128)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [285]  [2000/2502]  eta: 0:04:58  lr: 0.000026  min_lr: 0.000026  loss: 2.6414 (2.5761)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5132 (1.6104)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [285]  [2200/2502]  eta: 0:02:59  lr: 0.000026  min_lr: 0.000026  loss: 2.6922 (2.5764)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4835 (1.6048)  time: 0.5944  data: 0.0005  max mem: 53905
Epoch: [285]  [2400/2502]  eta: 0:01:00  lr: 0.000026  min_lr: 0.000026  loss: 2.7992 (2.5785)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5580 (1.6132)  time: 0.5930  data: 0.0006  max mem: 53905
Epoch: [285]  [2501/2502]  eta: 0:00:00  lr: 0.000026  min_lr: 0.000026  loss: 2.5511 (2.5778)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5029 (1.6103)  time: 0.5329  data: 0.0008  max mem: 53905
Epoch: [285] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.000026  min_lr: 0.000026  loss: 2.5511 (2.5851)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5029 (1.6103)
Test:  [ 0/50]  eta: 0:02:34  loss: 0.4996 (0.4996)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.0959  data: 2.7766  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.7190 (0.7265)  acc1: 89.2000 (89.3818)  acc5: 99.2000 (98.4000)  time: 0.5499  data: 0.2655  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.7637 (0.7628)  acc1: 86.0000 (88.2286)  acc5: 98.0000 (98.1333)  time: 0.2884  data: 0.0074  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.8896 (0.8481)  acc1: 85.2000 (86.3613)  acc5: 96.8000 (97.3290)  time: 0.2816  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0403 (0.8950)  acc1: 80.8000 (85.0829)  acc5: 95.6000 (97.0537)  time: 0.2815  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0129 (0.9123)  acc1: 79.6000 (84.4880)  acc5: 96.0000 (96.9280)  time: 0.2813  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3433 s / it)
* Acc@1 84.654 Acc@5 97.058 loss 0.902
Accuracy of the model on the 50000 test images: 84.7%
Max accuracy: 84.66%
Epoch: [286]  [   0/2502]  eta: 1:49:57  lr: 0.000026  min_lr: 0.000026  loss: 1.9060 (1.9060)  weight_decay: 0.0500 (0.0500)  time: 2.6368  data: 1.8690  max mem: 53905
Epoch: [286]  [ 200/2502]  eta: 0:23:06  lr: 0.000025  min_lr: 0.000025  loss: 2.6470 (2.5829)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5610 (1.6409)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [286]  [ 400/2502]  eta: 0:20:57  lr: 0.000025  min_lr: 0.000025  loss: 2.6699 (2.5946)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6076 (1.6423)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [286]  [ 600/2502]  eta: 0:18:54  lr: 0.000025  min_lr: 0.000025  loss: 2.5575 (2.5904)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5181 (1.6228)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [286]  [ 800/2502]  eta: 0:16:52  lr: 0.000025  min_lr: 0.000025  loss: 2.6344 (2.5968)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5556 (1.6048)  time: 0.5905  data: 0.0004  max mem: 53905
Epoch: [286]  [1000/2502]  eta: 0:14:53  lr: 0.000024  min_lr: 0.000024  loss: 2.5359 (2.5934)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5014 (1.5917)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [286]  [1200/2502]  eta: 0:12:53  lr: 0.000024  min_lr: 0.000024  loss: 2.6027 (2.5885)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4549 (1.5933)  time: 0.5924  data: 0.0005  max mem: 53905
Epoch: [286]  [1400/2502]  eta: 0:10:54  lr: 0.000024  min_lr: 0.000024  loss: 2.6868 (2.5907)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4354 (1.5936)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [286]  [1600/2502]  eta: 0:08:55  lr: 0.000023  min_lr: 0.000023  loss: 2.6581 (2.5913)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4961 (1.5916)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [286]  [1800/2502]  eta: 0:06:56  lr: 0.000023  min_lr: 0.000023  loss: 2.3238 (2.5922)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6503 (1.5946)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [286]  [2000/2502]  eta: 0:04:58  lr: 0.000023  min_lr: 0.000023  loss: 2.5443 (2.5881)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5302 (1.6047)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [286]  [2200/2502]  eta: 0:02:59  lr: 0.000023  min_lr: 0.000023  loss: 2.6898 (2.5909)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5669 (1.6086)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [286]  [2400/2502]  eta: 0:01:00  lr: 0.000022  min_lr: 0.000022  loss: 2.6369 (2.5903)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5770 (1.6151)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [286]  [2501/2502]  eta: 0:00:00  lr: 0.000022  min_lr: 0.000022  loss: 2.5581 (2.5886)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5637 (1.6169)  time: 0.5333  data: 0.0007  max mem: 53905
Epoch: [286] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.000022  min_lr: 0.000022  loss: 2.5581 (2.5915)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5637 (1.6169)
Test:  [ 0/50]  eta: 0:03:26  loss: 0.4992 (0.4992)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 4.1257  data: 3.8144  max mem: 53905
Test:  [10/50]  eta: 0:00:25  loss: 0.7465 (0.7440)  acc1: 89.2000 (89.5273)  acc5: 98.8000 (98.1455)  time: 0.6339  data: 0.3472  max mem: 53905
Test:  [20/50]  eta: 0:00:14  loss: 0.7773 (0.7761)  acc1: 86.0000 (88.3238)  acc5: 98.0000 (98.0952)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:08  loss: 0.9128 (0.8632)  acc1: 84.8000 (86.4129)  acc5: 97.2000 (97.3419)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0601 (0.9124)  acc1: 80.8000 (85.1122)  acc5: 95.6000 (97.0146)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0559 (0.9293)  acc1: 80.4000 (84.5920)  acc5: 96.0000 (96.9120)  time: 0.2845  data: 0.0002  max mem: 53905
Test: Total time: 0:00:18 (0.3643 s / it)
* Acc@1 84.636 Acc@5 97.036 loss 0.919
Accuracy of the model on the 50000 test images: 84.6%
Max accuracy: 84.66%
Epoch: [287]  [   0/2502]  eta: 1:44:27  lr: 0.000022  min_lr: 0.000022  loss: 2.3379 (2.3379)  weight_decay: 0.0500 (0.0500)  time: 2.5049  data: 1.8953  max mem: 53905
Epoch: [287]  [ 200/2502]  eta: 0:23:06  lr: 0.000022  min_lr: 0.000022  loss: 2.6518 (2.5895)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5877 (1.6351)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [287]  [ 400/2502]  eta: 0:20:58  lr: 0.000022  min_lr: 0.000022  loss: 2.6860 (2.6050)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6425 (1.6420)  time: 0.5931  data: 0.0005  max mem: 53905
Epoch: [287]  [ 600/2502]  eta: 0:18:54  lr: 0.000021  min_lr: 0.000021  loss: 2.7597 (2.5990)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6453 (1.6299)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [287]  [ 800/2502]  eta: 0:16:53  lr: 0.000021  min_lr: 0.000021  loss: 2.6321 (2.6006)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5244 (1.6140)  time: 0.5974  data: 0.0004  max mem: 53905
Epoch: [287]  [1000/2502]  eta: 0:14:53  lr: 0.000021  min_lr: 0.000021  loss: 2.6069 (2.5917)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4977 (1.6177)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [287]  [1200/2502]  eta: 0:12:54  lr: 0.000021  min_lr: 0.000021  loss: 2.7091 (2.5926)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5438 (1.6190)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [287]  [1400/2502]  eta: 0:10:54  lr: 0.000020  min_lr: 0.000020  loss: 2.5741 (2.5905)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7268 (1.6284)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [287]  [1600/2502]  eta: 0:08:55  lr: 0.000020  min_lr: 0.000020  loss: 2.6184 (2.5910)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5192 (1.6222)  time: 0.5904  data: 0.0004  max mem: 53905
Epoch: [287]  [1800/2502]  eta: 0:06:56  lr: 0.000020  min_lr: 0.000020  loss: 2.8553 (2.5975)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5103 (1.6254)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [287]  [2000/2502]  eta: 0:04:58  lr: 0.000020  min_lr: 0.000020  loss: 2.6732 (2.5973)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6016 (1.6328)  time: 0.5917  data: 0.0004  max mem: 53905
Epoch: [287]  [2200/2502]  eta: 0:02:59  lr: 0.000019  min_lr: 0.000019  loss: 2.5386 (2.5944)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5187 (1.6295)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [287]  [2400/2502]  eta: 0:01:00  lr: 0.000019  min_lr: 0.000019  loss: 2.6878 (2.5962)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4871 (1.6258)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [287]  [2501/2502]  eta: 0:00:00  lr: 0.000019  min_lr: 0.000019  loss: 2.6741 (2.5924)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4671 (1.6238)  time: 0.5332  data: 0.0007  max mem: 53905
Epoch: [287] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.000019  min_lr: 0.000019  loss: 2.6741 (2.5828)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4671 (1.6238)
Test:  [ 0/50]  eta: 0:02:44  loss: 0.5111 (0.5111)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.2975  data: 2.9706  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7166 (0.7287)  acc1: 89.2000 (89.3455)  acc5: 99.2000 (98.4000)  time: 0.5587  data: 0.2705  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.7661 (0.7655)  acc1: 86.0000 (88.2476)  acc5: 98.4000 (98.1905)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.8873 (0.8486)  acc1: 84.8000 (86.4258)  acc5: 96.8000 (97.4452)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0520 (0.8964)  acc1: 81.6000 (85.1707)  acc5: 95.6000 (97.1220)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0356 (0.9150)  acc1: 80.0000 (84.6320)  acc5: 96.0000 (96.9920)  time: 0.2845  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3475 s / it)
* Acc@1 84.748 Acc@5 97.086 loss 0.905
Accuracy of the model on the 50000 test images: 84.7%
Max accuracy: 84.75%
Epoch: [288]  [   0/2502]  eta: 1:42:41  lr: 0.000019  min_lr: 0.000019  loss: 1.8548 (1.8548)  weight_decay: 0.0500 (0.0500)  time: 2.4628  data: 1.8677  max mem: 53905
Epoch: [288]  [ 200/2502]  eta: 0:23:05  lr: 0.000019  min_lr: 0.000019  loss: 2.5191 (2.5581)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5329 (1.6281)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [288]  [ 400/2502]  eta: 0:20:55  lr: 0.000019  min_lr: 0.000019  loss: 2.8124 (2.5629)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5884 (1.6534)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [288]  [ 600/2502]  eta: 0:18:53  lr: 0.000018  min_lr: 0.000018  loss: 2.7868 (2.5766)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5617 (1.6368)  time: 0.5907  data: 0.0004  max mem: 53905
Epoch: [288]  [ 800/2502]  eta: 0:16:52  lr: 0.000018  min_lr: 0.000018  loss: 2.6772 (2.5669)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5674 (1.6354)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [288]  [1000/2502]  eta: 0:14:53  lr: 0.000018  min_lr: 0.000018  loss: 2.7712 (2.5685)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6572 (1.6352)  time: 0.5925  data: 0.0004  max mem: 53905
Epoch: [288]  [1200/2502]  eta: 0:12:53  lr: 0.000018  min_lr: 0.000018  loss: 2.6692 (2.5795)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5292 (1.6287)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [288]  [1400/2502]  eta: 0:10:54  lr: 0.000017  min_lr: 0.000017  loss: 2.5328 (2.5778)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5612 (1.6243)  time: 0.5947  data: 0.0004  max mem: 53905
Epoch: [288]  [1600/2502]  eta: 0:08:55  lr: 0.000017  min_lr: 0.000017  loss: 2.6989 (2.5805)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5870 (1.6270)  time: 0.5989  data: 0.0004  max mem: 53905
Epoch: [288]  [1800/2502]  eta: 0:06:56  lr: 0.000017  min_lr: 0.000017  loss: 2.4851 (2.5801)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6463 (1.6445)  time: 0.5979  data: 0.0004  max mem: 53905
Epoch: [288]  [2000/2502]  eta: 0:04:57  lr: 0.000017  min_lr: 0.000017  loss: 2.4571 (2.5786)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5066 (1.6406)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [288]  [2200/2502]  eta: 0:02:59  lr: 0.000017  min_lr: 0.000017  loss: 2.7166 (2.5781)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6334 (inf)  time: 0.5973  data: 0.0005  max mem: 53905
Epoch: [288]  [2400/2502]  eta: 0:01:00  lr: 0.000016  min_lr: 0.000016  loss: 2.7723 (2.5806)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4897 (inf)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [288]  [2501/2502]  eta: 0:00:00  lr: 0.000016  min_lr: 0.000016  loss: 2.6241 (2.5794)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5445 (inf)  time: 0.5319  data: 0.0008  max mem: 53905
Epoch: [288] Total time: 0:24:44 (0.5932 s / it)
Averaged stats: lr: 0.000016  min_lr: 0.000016  loss: 2.6241 (2.5758)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5445 (inf)
Test:  [ 0/50]  eta: 0:02:43  loss: 0.5241 (0.5241)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.2791  data: 2.9566  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7601 (0.7644)  acc1: 89.2000 (89.3818)  acc5: 98.4000 (98.1818)  time: 0.5587  data: 0.2741  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8002 (0.8022)  acc1: 86.4000 (88.2286)  acc5: 98.0000 (98.0381)  time: 0.2842  data: 0.0032  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9214 (0.8866)  acc1: 84.8000 (86.4129)  acc5: 96.8000 (97.3419)  time: 0.2819  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1015 (0.9347)  acc1: 81.6000 (85.1512)  acc5: 96.0000 (97.0342)  time: 0.2817  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0747 (0.9542)  acc1: 80.4000 (84.5440)  acc5: 96.4000 (96.9280)  time: 0.2814  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3446 s / it)
* Acc@1 84.634 Acc@5 97.072 loss 0.944
Accuracy of the model on the 50000 test images: 84.6%
Max accuracy: 84.75%
Epoch: [289]  [   0/2502]  eta: 1:41:25  lr: 0.000016  min_lr: 0.000016  loss: 3.3106 (3.3106)  weight_decay: 0.0500 (0.0500)  time: 2.4323  data: 1.7043  max mem: 53905
Epoch: [289]  [ 200/2502]  eta: 0:23:06  lr: 0.000016  min_lr: 0.000016  loss: 2.6375 (2.6113)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5894 (nan)  time: 0.5972  data: 0.0004  max mem: 53905
Epoch: [289]  [ 400/2502]  eta: 0:20:57  lr: 0.000016  min_lr: 0.000016  loss: 2.7147 (2.6057)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4863 (nan)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [289]  [ 600/2502]  eta: 0:18:54  lr: 0.000016  min_lr: 0.000016  loss: 2.6641 (2.5980)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5126 (nan)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [289]  [ 800/2502]  eta: 0:16:54  lr: 0.000015  min_lr: 0.000015  loss: 2.5143 (2.5813)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4900 (nan)  time: 0.5996  data: 0.0006  max mem: 53905
Epoch: [289]  [1000/2502]  eta: 0:14:54  lr: 0.000015  min_lr: 0.000015  loss: 2.5030 (2.5730)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5337 (nan)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [289]  [1200/2502]  eta: 0:12:54  lr: 0.000015  min_lr: 0.000015  loss: 2.6521 (2.5732)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5817 (nan)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [289]  [1400/2502]  eta: 0:10:55  lr: 0.000015  min_lr: 0.000015  loss: 2.7088 (2.5816)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6211 (nan)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [289]  [1600/2502]  eta: 0:08:56  lr: 0.000014  min_lr: 0.000014  loss: 2.7662 (2.5761)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5686 (nan)  time: 0.5940  data: 0.0006  max mem: 53905
Epoch: [289]  [1800/2502]  eta: 0:06:57  lr: 0.000014  min_lr: 0.000014  loss: 2.7002 (2.5785)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4995 (nan)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [289]  [2000/2502]  eta: 0:04:58  lr: 0.000014  min_lr: 0.000014  loss: 2.6608 (2.5820)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5649 (nan)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [289]  [2200/2502]  eta: 0:02:59  lr: 0.000014  min_lr: 0.000014  loss: 2.7753 (2.5797)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6002 (nan)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [289]  [2400/2502]  eta: 0:01:00  lr: 0.000014  min_lr: 0.000014  loss: 2.6161 (2.5792)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5384 (nan)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [289]  [2501/2502]  eta: 0:00:00  lr: 0.000014  min_lr: 0.000014  loss: 2.6467 (2.5757)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5809 (nan)  time: 0.5330  data: 0.0008  max mem: 53905
Epoch: [289] Total time: 0:24:46 (0.5940 s / it)
Averaged stats: lr: 0.000014  min_lr: 0.000014  loss: 2.6467 (2.5760)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5809 (nan)
Test:  [ 0/50]  eta: 0:02:38  loss: 0.5407 (0.5407)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.1709  data: 2.8530  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.7830 (0.7732)  acc1: 89.6000 (89.4909)  acc5: 98.4000 (98.2909)  time: 0.5487  data: 0.2645  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8119 (0.8095)  acc1: 86.4000 (88.3238)  acc5: 98.0000 (98.1524)  time: 0.2841  data: 0.0031  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9421 (0.8967)  acc1: 84.8000 (86.4645)  acc5: 97.2000 (97.4323)  time: 0.2818  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1153 (0.9451)  acc1: 81.2000 (85.0927)  acc5: 95.6000 (97.1024)  time: 0.2815  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0805 (0.9646)  acc1: 81.2000 (84.6080)  acc5: 96.4000 (97.0000)  time: 0.2814  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3424 s / it)
* Acc@1 84.728 Acc@5 97.100 loss 0.955
Accuracy of the model on the 50000 test images: 84.7%
Max accuracy: 84.75%
Epoch: [290]  [   0/2502]  eta: 1:55:11  lr: 0.000014  min_lr: 0.000014  loss: 2.4728 (2.4728)  weight_decay: 0.0500 (0.0500)  time: 2.7624  data: 2.1611  max mem: 53905
Epoch: [290]  [ 200/2502]  eta: 0:23:05  lr: 0.000013  min_lr: 0.000013  loss: 2.7018 (2.5572)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5530 (1.5771)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [290]  [ 400/2502]  eta: 0:20:55  lr: 0.000013  min_lr: 0.000013  loss: 2.5588 (2.5669)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6960 (1.6388)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [290]  [ 600/2502]  eta: 0:18:54  lr: 0.000013  min_lr: 0.000013  loss: 2.6073 (2.5525)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4657 (1.6211)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [290]  [ 800/2502]  eta: 0:16:53  lr: 0.000013  min_lr: 0.000013  loss: 2.6787 (2.5632)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6115 (1.6206)  time: 0.5927  data: 0.0005  max mem: 53905
Epoch: [290]  [1000/2502]  eta: 0:14:53  lr: 0.000013  min_lr: 0.000013  loss: 2.6788 (2.5681)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5060 (1.6332)  time: 0.5922  data: 0.0007  max mem: 53905
Epoch: [290]  [1200/2502]  eta: 0:12:54  lr: 0.000012  min_lr: 0.000012  loss: 2.5036 (2.5714)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6140 (1.6302)  time: 0.5926  data: 0.0004  max mem: 53905
Epoch: [290]  [1400/2502]  eta: 0:10:54  lr: 0.000012  min_lr: 0.000012  loss: 2.6668 (2.5710)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7073 (1.6418)  time: 0.5903  data: 0.0004  max mem: 53905
Epoch: [290]  [1600/2502]  eta: 0:08:55  lr: 0.000012  min_lr: 0.000012  loss: 2.7579 (2.5759)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6319 (1.6492)  time: 0.5988  data: 0.0006  max mem: 53905
Epoch: [290]  [1800/2502]  eta: 0:06:56  lr: 0.000012  min_lr: 0.000012  loss: 2.6036 (2.5766)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6942 (1.6681)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [290]  [2000/2502]  eta: 0:04:58  lr: 0.000012  min_lr: 0.000012  loss: 2.5811 (2.5751)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5316 (1.6546)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [290]  [2200/2502]  eta: 0:02:59  lr: 0.000011  min_lr: 0.000011  loss: 2.6495 (2.5784)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6216 (1.6584)  time: 0.5919  data: 0.0005  max mem: 53905
Epoch: [290]  [2400/2502]  eta: 0:01:00  lr: 0.000011  min_lr: 0.000011  loss: 2.5712 (2.5773)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5542 (1.6544)  time: 0.5944  data: 0.0004  max mem: 53905
Epoch: [290]  [2501/2502]  eta: 0:00:00  lr: 0.000011  min_lr: 0.000011  loss: 2.5911 (2.5775)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4578 (1.6516)  time: 0.5409  data: 0.0008  max mem: 53905
Epoch: [290] Total time: 0:24:44 (0.5933 s / it)
Averaged stats: lr: 0.000011  min_lr: 0.000011  loss: 2.5911 (2.5786)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4578 (1.6516)
Test:  [ 0/50]  eta: 0:02:59  loss: 0.4970 (0.4970)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.5931  data: 3.2850  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7303 (0.7335)  acc1: 89.6000 (89.6000)  acc5: 98.8000 (98.2909)  time: 0.5855  data: 0.2991  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7812 (0.7745)  acc1: 86.0000 (88.3048)  acc5: 98.0000 (98.1333)  time: 0.2849  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9061 (0.8605)  acc1: 84.8000 (86.4387)  acc5: 96.8000 (97.3806)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0679 (0.9098)  acc1: 81.2000 (85.0829)  acc5: 95.6000 (97.1024)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0581 (0.9275)  acc1: 80.8000 (84.5360)  acc5: 96.4000 (96.9920)  time: 0.2846  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3532 s / it)
* Acc@1 84.690 Acc@5 97.100 loss 0.918
Accuracy of the model on the 50000 test images: 84.7%
Max accuracy: 84.75%
Epoch: [291]  [   0/2502]  eta: 1:46:00  lr: 0.000011  min_lr: 0.000011  loss: 2.5305 (2.5305)  weight_decay: 0.0500 (0.0500)  time: 2.5423  data: 1.7264  max mem: 53905
Epoch: [291]  [ 200/2502]  eta: 0:23:11  lr: 0.000011  min_lr: 0.000011  loss: 2.4774 (2.5784)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5497 (1.6991)  time: 0.5985  data: 0.0005  max mem: 53905
Epoch: [291]  [ 400/2502]  eta: 0:20:57  lr: 0.000011  min_lr: 0.000011  loss: 2.3832 (2.5795)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5358 (1.6528)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [291]  [ 600/2502]  eta: 0:18:55  lr: 0.000011  min_lr: 0.000011  loss: 2.8512 (2.6039)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6674 (1.6758)  time: 0.5929  data: 0.0008  max mem: 53905
Epoch: [291]  [ 800/2502]  eta: 0:16:54  lr: 0.000010  min_lr: 0.000010  loss: 2.5484 (2.5996)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5811 (1.6532)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [291]  [1000/2502]  eta: 0:14:54  lr: 0.000010  min_lr: 0.000010  loss: 2.7105 (2.5893)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5301 (1.6408)  time: 0.5914  data: 0.0006  max mem: 53905
Epoch: [291]  [1200/2502]  eta: 0:12:54  lr: 0.000010  min_lr: 0.000010  loss: 2.6482 (2.5940)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5407 (1.6347)  time: 0.5910  data: 0.0005  max mem: 53905
Epoch: [291]  [1400/2502]  eta: 0:10:55  lr: 0.000010  min_lr: 0.000010  loss: 2.4273 (2.5923)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6342 (1.6520)  time: 0.5923  data: 0.0005  max mem: 53905
Epoch: [291]  [1600/2502]  eta: 0:08:55  lr: 0.000010  min_lr: 0.000010  loss: 2.6299 (2.5906)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5670 (1.6536)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [291]  [1800/2502]  eta: 0:06:56  lr: 0.000010  min_lr: 0.000010  loss: 2.6149 (2.5868)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5183 (1.6611)  time: 0.5914  data: 0.0005  max mem: 53905
Epoch: [291]  [2000/2502]  eta: 0:04:58  lr: 0.000009  min_lr: 0.000009  loss: 2.8430 (2.5869)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6259 (1.6579)  time: 0.5919  data: 0.0008  max mem: 53905
Epoch: [291]  [2200/2502]  eta: 0:02:59  lr: 0.000009  min_lr: 0.000009  loss: 2.6662 (2.5856)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5744 (1.6534)  time: 0.5963  data: 0.0008  max mem: 53905
Epoch: [291]  [2400/2502]  eta: 0:01:00  lr: 0.000009  min_lr: 0.000009  loss: 2.6460 (2.5888)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6202 (1.6501)  time: 0.5922  data: 0.0006  max mem: 53905
Epoch: [291]  [2501/2502]  eta: 0:00:00  lr: 0.000009  min_lr: 0.000009  loss: 2.5771 (2.5867)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5404 (1.6499)  time: 0.5331  data: 0.0008  max mem: 53905
Epoch: [291] Total time: 0:24:44 (0.5935 s / it)
Averaged stats: lr: 0.000009  min_lr: 0.000009  loss: 2.5771 (2.5794)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5404 (1.6499)
Test:  [ 0/50]  eta: 0:02:48  loss: 0.4811 (0.4811)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.3635  data: 3.0270  max mem: 53905
Test:  [10/50]  eta: 0:00:22  loss: 0.7155 (0.7211)  acc1: 89.2000 (89.4909)  acc5: 98.8000 (98.3273)  time: 0.5647  data: 0.2757  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.7593 (0.7586)  acc1: 86.0000 (88.2476)  acc5: 98.0000 (98.1905)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.8823 (0.8434)  acc1: 84.8000 (86.3871)  acc5: 96.8000 (97.4194)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0551 (0.8912)  acc1: 81.2000 (85.0342)  acc5: 95.6000 (97.0829)  time: 0.2847  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0268 (0.9084)  acc1: 81.2000 (84.5280)  acc5: 96.4000 (96.9760)  time: 0.2843  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3487 s / it)
* Acc@1 84.692 Acc@5 97.070 loss 0.899
Accuracy of the model on the 50000 test images: 84.7%
Max accuracy: 84.75%
Epoch: [292]  [   0/2502]  eta: 1:38:40  lr: 0.000009  min_lr: 0.000009  loss: 2.8048 (2.8048)  weight_decay: 0.0500 (0.0500)  time: 2.3661  data: 1.7305  max mem: 53905
Epoch: [292]  [ 200/2502]  eta: 0:23:02  lr: 0.000009  min_lr: 0.000009  loss: 2.8291 (2.5748)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5381 (1.7305)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [292]  [ 400/2502]  eta: 0:20:55  lr: 0.000009  min_lr: 0.000009  loss: 2.6154 (2.5899)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4543 (1.6416)  time: 0.5918  data: 0.0007  max mem: 53905
Epoch: [292]  [ 600/2502]  eta: 0:18:54  lr: 0.000009  min_lr: 0.000009  loss: 2.7243 (2.5804)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5204 (1.6268)  time: 0.5937  data: 0.0006  max mem: 53905
Epoch: [292]  [ 800/2502]  eta: 0:16:53  lr: 0.000008  min_lr: 0.000008  loss: 2.5287 (2.5712)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5675 (1.6458)  time: 0.5918  data: 0.0006  max mem: 53905
Epoch: [292]  [1000/2502]  eta: 0:14:53  lr: 0.000008  min_lr: 0.000008  loss: 2.5553 (2.5662)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5298 (1.6241)  time: 0.5926  data: 0.0007  max mem: 53905
Epoch: [292]  [1200/2502]  eta: 0:12:54  lr: 0.000008  min_lr: 0.000008  loss: 2.5374 (2.5738)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4531 (1.6225)  time: 0.5909  data: 0.0008  max mem: 53905
Epoch: [292]  [1400/2502]  eta: 0:10:54  lr: 0.000008  min_lr: 0.000008  loss: 2.6375 (2.5791)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4995 (1.6143)  time: 0.5921  data: 0.0008  max mem: 53905
Epoch: [292]  [1600/2502]  eta: 0:08:55  lr: 0.000008  min_lr: 0.000008  loss: 2.6996 (2.5779)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5389 (inf)  time: 0.5918  data: 0.0009  max mem: 53905
Epoch: [292]  [1800/2502]  eta: 0:06:56  lr: 0.000008  min_lr: 0.000008  loss: 2.8002 (2.5794)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4913 (inf)  time: 0.5912  data: 0.0005  max mem: 53905
Epoch: [292]  [2000/2502]  eta: 0:04:58  lr: 0.000008  min_lr: 0.000008  loss: 2.5583 (2.5813)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4693 (inf)  time: 0.5911  data: 0.0006  max mem: 53905
Epoch: [292]  [2200/2502]  eta: 0:02:59  lr: 0.000007  min_lr: 0.000007  loss: 2.5906 (2.5797)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5498 (inf)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [292]  [2400/2502]  eta: 0:01:00  lr: 0.000007  min_lr: 0.000007  loss: 2.8602 (2.5804)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6401 (inf)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [292]  [2501/2502]  eta: 0:00:00  lr: 0.000007  min_lr: 0.000007  loss: 2.8414 (2.5837)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5194 (inf)  time: 0.5332  data: 0.0007  max mem: 53905
Epoch: [292] Total time: 0:24:44 (0.5934 s / it)
Averaged stats: lr: 0.000007  min_lr: 0.000007  loss: 2.8414 (2.5815)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5194 (inf)
Test:  [ 0/50]  eta: 0:03:00  loss: 0.5492 (0.5492)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.6010  data: 3.2792  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7754 (0.7823)  acc1: 89.2000 (89.4909)  acc5: 98.4000 (98.2546)  time: 0.5864  data: 0.2986  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8264 (0.8200)  acc1: 85.6000 (88.1524)  acc5: 98.0000 (98.0952)  time: 0.2850  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9608 (0.9101)  acc1: 84.8000 (86.3742)  acc5: 96.8000 (97.2774)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1241 (0.9599)  acc1: 81.2000 (85.0732)  acc5: 95.6000 (96.9854)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1038 (0.9789)  acc1: 80.4000 (84.5520)  acc5: 96.4000 (96.9120)  time: 0.2847  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3535 s / it)
* Acc@1 84.706 Acc@5 97.056 loss 0.969
Accuracy of the model on the 50000 test images: 84.7%
Max accuracy: 84.75%
Epoch: [293]  [   0/2502]  eta: 1:52:20  lr: 0.000007  min_lr: 0.000007  loss: 2.1389 (2.1389)  weight_decay: 0.0500 (0.0500)  time: 2.6942  data: 1.6589  max mem: 53905
Epoch: [293]  [ 200/2502]  eta: 0:23:12  lr: 0.000007  min_lr: 0.000007  loss: 2.7389 (2.5287)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5759 (1.5916)  time: 0.5917  data: 0.0007  max mem: 53905
Epoch: [293]  [ 400/2502]  eta: 0:20:58  lr: 0.000007  min_lr: 0.000007  loss: 2.4199 (2.5473)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4609 (1.6169)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [293]  [ 600/2502]  eta: 0:18:55  lr: 0.000007  min_lr: 0.000007  loss: 2.5228 (2.5532)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5186 (1.6040)  time: 0.5925  data: 0.0006  max mem: 53905
Epoch: [293]  [ 800/2502]  eta: 0:16:54  lr: 0.000007  min_lr: 0.000007  loss: 2.5900 (2.5510)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5178 (1.5991)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [293]  [1000/2502]  eta: 0:14:54  lr: 0.000006  min_lr: 0.000006  loss: 2.6259 (2.5629)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6070 (1.5977)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [293]  [1200/2502]  eta: 0:12:54  lr: 0.000006  min_lr: 0.000006  loss: 2.8449 (2.5705)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6595 (1.6120)  time: 0.5930  data: 0.0007  max mem: 53905
Epoch: [293]  [1400/2502]  eta: 0:10:55  lr: 0.000006  min_lr: 0.000006  loss: 2.5148 (2.5673)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5501 (1.6265)  time: 0.5930  data: 0.0006  max mem: 53905
Epoch: [293]  [1600/2502]  eta: 0:08:56  lr: 0.000006  min_lr: 0.000006  loss: 2.8018 (2.5708)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6448 (1.6314)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [293]  [1800/2502]  eta: 0:06:57  lr: 0.000006  min_lr: 0.000006  loss: 2.7012 (2.5653)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4294 (1.6303)  time: 0.5962  data: 0.0007  max mem: 53905
Epoch: [293]  [2000/2502]  eta: 0:04:58  lr: 0.000006  min_lr: 0.000006  loss: 2.3231 (2.5708)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5245 (1.6308)  time: 0.5921  data: 0.0006  max mem: 53905
Epoch: [293]  [2200/2502]  eta: 0:02:59  lr: 0.000006  min_lr: 0.000006  loss: 2.5168 (2.5718)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5452 (1.6288)  time: 0.5934  data: 0.0006  max mem: 53905
Epoch: [293]  [2400/2502]  eta: 0:01:00  lr: 0.000006  min_lr: 0.000006  loss: 2.5355 (2.5710)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4919 (1.6267)  time: 0.5925  data: 0.0005  max mem: 53905
Epoch: [293]  [2501/2502]  eta: 0:00:00  lr: 0.000006  min_lr: 0.000006  loss: 2.4395 (2.5710)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6312 (1.6289)  time: 0.5328  data: 0.0007  max mem: 53905
Epoch: [293] Total time: 0:24:45 (0.5937 s / it)
Averaged stats: lr: 0.000006  min_lr: 0.000006  loss: 2.4395 (2.5791)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6312 (1.6289)
Test:  [ 0/50]  eta: 0:02:35  loss: 0.5403 (0.5403)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.1085  data: 2.7907  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.7813 (0.7754)  acc1: 89.2000 (89.4545)  acc5: 99.2000 (98.3273)  time: 0.5417  data: 0.2542  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8220 (0.8155)  acc1: 86.0000 (88.2286)  acc5: 98.0000 (98.2095)  time: 0.2852  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9509 (0.9004)  acc1: 84.8000 (86.3355)  acc5: 97.2000 (97.4581)  time: 0.2853  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1103 (0.9485)  acc1: 81.6000 (84.9756)  acc5: 95.6000 (97.1220)  time: 0.2849  data: 0.0004  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0840 (0.9680)  acc1: 81.2000 (84.5040)  acc5: 96.0000 (97.0000)  time: 0.2846  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3440 s / it)
* Acc@1 84.702 Acc@5 97.084 loss 0.958
Accuracy of the model on the 50000 test images: 84.7%
Max accuracy: 84.75%
Epoch: [294]  [   0/2502]  eta: 1:51:46  lr: 0.000006  min_lr: 0.000006  loss: 2.7892 (2.7892)  weight_decay: 0.0500 (0.0500)  time: 2.6803  data: 1.6043  max mem: 53905
Epoch: [294]  [ 200/2502]  eta: 0:23:09  lr: 0.000005  min_lr: 0.000005  loss: 2.6336 (2.5559)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4996 (1.6902)  time: 0.5928  data: 0.0005  max mem: 53905
Epoch: [294]  [ 400/2502]  eta: 0:20:59  lr: 0.000005  min_lr: 0.000005  loss: 2.6524 (2.5725)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7286 (1.6815)  time: 0.5995  data: 0.0004  max mem: 53905
Epoch: [294]  [ 600/2502]  eta: 0:18:56  lr: 0.000005  min_lr: 0.000005  loss: 2.6372 (2.5779)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5332 (1.6399)  time: 0.5926  data: 0.0006  max mem: 53905
Epoch: [294]  [ 800/2502]  eta: 0:16:55  lr: 0.000005  min_lr: 0.000005  loss: 2.6746 (2.5801)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6351 (1.6261)  time: 0.5929  data: 0.0004  max mem: 53905
Epoch: [294]  [1000/2502]  eta: 0:14:55  lr: 0.000005  min_lr: 0.000005  loss: 2.6895 (2.5798)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5247 (1.6397)  time: 0.5936  data: 0.0004  max mem: 53905
Epoch: [294]  [1200/2502]  eta: 0:12:55  lr: 0.000005  min_lr: 0.000005  loss: 2.4292 (2.5700)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5517 (1.6370)  time: 0.5937  data: 0.0004  max mem: 53905
Epoch: [294]  [1400/2502]  eta: 0:10:56  lr: 0.000005  min_lr: 0.000005  loss: 2.7784 (2.5707)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5016 (1.6400)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [294]  [1600/2502]  eta: 0:08:56  lr: 0.000005  min_lr: 0.000005  loss: 2.7810 (2.5736)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6164 (1.6464)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [294]  [1800/2502]  eta: 0:06:57  lr: 0.000005  min_lr: 0.000005  loss: 2.6464 (2.5724)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4202 (1.6426)  time: 0.5928  data: 0.0006  max mem: 53905
Epoch: [294]  [2000/2502]  eta: 0:04:58  lr: 0.000004  min_lr: 0.000004  loss: 2.6458 (2.5715)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5088 (1.6404)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [294]  [2200/2502]  eta: 0:02:59  lr: 0.000004  min_lr: 0.000004  loss: 2.7167 (2.5701)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6269 (1.6414)  time: 0.5912  data: 0.0004  max mem: 53905
Epoch: [294]  [2400/2502]  eta: 0:01:00  lr: 0.000004  min_lr: 0.000004  loss: 2.6667 (2.5700)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5050 (1.6378)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [294]  [2501/2502]  eta: 0:00:00  lr: 0.000004  min_lr: 0.000004  loss: 2.7384 (2.5708)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6373 (1.6388)  time: 0.5343  data: 0.0007  max mem: 53905
Epoch: [294] Total time: 0:24:46 (0.5941 s / it)
Averaged stats: lr: 0.000004  min_lr: 0.000004  loss: 2.7384 (2.5753)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6373 (1.6388)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.5074 (0.5074)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.0571  data: 2.7177  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.7462 (0.7419)  acc1: 89.2000 (89.6727)  acc5: 98.4000 (98.1818)  time: 0.5367  data: 0.2475  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.7837 (0.7835)  acc1: 86.4000 (88.3619)  acc5: 98.0000 (98.0762)  time: 0.2848  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9123 (0.8688)  acc1: 84.8000 (86.5677)  acc5: 96.8000 (97.3419)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0718 (0.9158)  acc1: 81.6000 (85.2683)  acc5: 95.6000 (97.0634)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0514 (0.9346)  acc1: 80.0000 (84.6880)  acc5: 96.0000 (96.9680)  time: 0.2844  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3425 s / it)
* Acc@1 84.760 Acc@5 97.092 loss 0.924
Accuracy of the model on the 50000 test images: 84.8%
Max accuracy: 84.76%
Epoch: [295]  [   0/2502]  eta: 1:49:10  lr: 0.000004  min_lr: 0.000004  loss: 2.5804 (2.5804)  weight_decay: 0.0500 (0.0500)  time: 2.6182  data: 2.0154  max mem: 53905
Epoch: [295]  [ 200/2502]  eta: 0:23:08  lr: 0.000004  min_lr: 0.000004  loss: 2.7571 (2.6148)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5118 (1.5604)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [295]  [ 400/2502]  eta: 0:20:56  lr: 0.000004  min_lr: 0.000004  loss: 2.7431 (2.5754)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4765 (1.5868)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [295]  [ 600/2502]  eta: 0:18:53  lr: 0.000004  min_lr: 0.000004  loss: 2.7555 (2.5813)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6103 (1.5884)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [295]  [ 800/2502]  eta: 0:16:53  lr: 0.000004  min_lr: 0.000004  loss: 2.4221 (2.5689)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5179 (1.5938)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [295]  [1000/2502]  eta: 0:14:53  lr: 0.000004  min_lr: 0.000004  loss: 2.4960 (2.5643)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5266 (1.5971)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [295]  [1200/2502]  eta: 0:12:54  lr: 0.000004  min_lr: 0.000004  loss: 2.7187 (2.5644)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6415 (1.6105)  time: 0.5932  data: 0.0005  max mem: 53905
Epoch: [295]  [1400/2502]  eta: 0:10:55  lr: 0.000003  min_lr: 0.000003  loss: 2.8105 (2.5658)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5068 (1.6133)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [295]  [1600/2502]  eta: 0:08:56  lr: 0.000003  min_lr: 0.000003  loss: 2.3344 (2.5698)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6710 (1.6206)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [295]  [1800/2502]  eta: 0:06:57  lr: 0.000003  min_lr: 0.000003  loss: 2.6665 (2.5717)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5838 (1.6264)  time: 0.6023  data: 0.0005  max mem: 53905
Epoch: [295]  [2000/2502]  eta: 0:04:58  lr: 0.000003  min_lr: 0.000003  loss: 2.4611 (2.5725)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6788 (1.6435)  time: 0.5938  data: 0.0005  max mem: 53905
Epoch: [295]  [2200/2502]  eta: 0:02:59  lr: 0.000003  min_lr: 0.000003  loss: 2.7980 (2.5726)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6030 (1.6465)  time: 0.5930  data: 0.0005  max mem: 53905
Epoch: [295]  [2400/2502]  eta: 0:01:00  lr: 0.000003  min_lr: 0.000003  loss: 2.8364 (2.5739)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6097 (1.6442)  time: 0.5960  data: 0.0007  max mem: 53905
Epoch: [295]  [2501/2502]  eta: 0:00:00  lr: 0.000003  min_lr: 0.000003  loss: 2.6234 (2.5750)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4816 (1.6422)  time: 0.5329  data: 0.0008  max mem: 53905
Epoch: [295] Total time: 0:24:46 (0.5941 s / it)
Averaged stats: lr: 0.000003  min_lr: 0.000003  loss: 2.6234 (2.5768)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4816 (1.6422)
Test:  [ 0/50]  eta: 0:02:54  loss: 0.5172 (0.5172)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.4920  data: 3.1631  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7624 (0.7607)  acc1: 89.2000 (89.5273)  acc5: 98.4000 (98.2546)  time: 0.5762  data: 0.2880  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8030 (0.8015)  acc1: 86.0000 (88.2286)  acc5: 98.0000 (98.1143)  time: 0.2849  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9364 (0.8871)  acc1: 84.8000 (86.4258)  acc5: 96.8000 (97.3419)  time: 0.2851  data: 0.0006  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0852 (0.9354)  acc1: 82.0000 (85.1317)  acc5: 95.2000 (97.0732)  time: 0.2847  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0828 (0.9539)  acc1: 80.4000 (84.6000)  acc5: 96.4000 (96.9520)  time: 0.2844  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3511 s / it)
* Acc@1 84.690 Acc@5 97.100 loss 0.945
Accuracy of the model on the 50000 test images: 84.7%
Max accuracy: 84.76%
Epoch: [296]  [   0/2502]  eta: 1:57:53  lr: 0.000003  min_lr: 0.000003  loss: 1.8457 (1.8457)  weight_decay: 0.0500 (0.0500)  time: 2.8272  data: 1.8289  max mem: 53905
Epoch: [296]  [ 200/2502]  eta: 0:23:08  lr: 0.000003  min_lr: 0.000003  loss: 2.6607 (2.5608)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6213 (1.6786)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [296]  [ 400/2502]  eta: 0:20:57  lr: 0.000003  min_lr: 0.000003  loss: 2.4977 (2.5609)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6110 (1.6342)  time: 0.5983  data: 0.0004  max mem: 53905
Epoch: [296]  [ 600/2502]  eta: 0:18:53  lr: 0.000003  min_lr: 0.000003  loss: 2.5571 (2.5576)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6717 (1.6442)  time: 0.5921  data: 0.0005  max mem: 53905
Epoch: [296]  [ 800/2502]  eta: 0:16:52  lr: 0.000003  min_lr: 0.000003  loss: 2.6343 (2.5649)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5828 (1.6432)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [296]  [1000/2502]  eta: 0:14:52  lr: 0.000003  min_lr: 0.000003  loss: 2.7100 (2.5674)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5886 (1.6350)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [296]  [1200/2502]  eta: 0:12:53  lr: 0.000003  min_lr: 0.000003  loss: 2.5433 (2.5623)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5445 (1.6444)  time: 0.5911  data: 0.0004  max mem: 53905
Epoch: [296]  [1400/2502]  eta: 0:10:54  lr: 0.000002  min_lr: 0.000002  loss: 2.2754 (2.5642)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5640 (1.6394)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [296]  [1600/2502]  eta: 0:08:55  lr: 0.000002  min_lr: 0.000002  loss: 2.7254 (2.5638)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5539 (1.6383)  time: 0.5903  data: 0.0005  max mem: 53905
Epoch: [296]  [1800/2502]  eta: 0:06:56  lr: 0.000002  min_lr: 0.000002  loss: 2.6917 (2.5667)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5546 (1.6319)  time: 0.5941  data: 0.0005  max mem: 53905
Epoch: [296]  [2000/2502]  eta: 0:04:57  lr: 0.000002  min_lr: 0.000002  loss: 2.6935 (2.5702)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4862 (1.6226)  time: 0.5910  data: 0.0004  max mem: 53905
Epoch: [296]  [2200/2502]  eta: 0:02:59  lr: 0.000002  min_lr: 0.000002  loss: 2.7746 (2.5680)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5900 (1.6218)  time: 0.5977  data: 0.0004  max mem: 53905
Epoch: [296]  [2400/2502]  eta: 0:01:00  lr: 0.000002  min_lr: 0.000002  loss: 2.6854 (2.5681)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6051 (1.6296)  time: 0.5954  data: 0.0004  max mem: 53905
Epoch: [296]  [2501/2502]  eta: 0:00:00  lr: 0.000002  min_lr: 0.000002  loss: 2.5543 (2.5657)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5849 (1.6286)  time: 0.5331  data: 0.0007  max mem: 53905
Epoch: [296] Total time: 0:24:43 (0.5928 s / it)
Averaged stats: lr: 0.000002  min_lr: 0.000002  loss: 2.5543 (2.5776)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5849 (1.6286)
Test:  [ 0/50]  eta: 0:02:46  loss: 0.4891 (0.4891)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.3220  data: 2.9973  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.7269 (0.7246)  acc1: 89.2000 (89.5273)  acc5: 98.8000 (98.3273)  time: 0.6010  data: 0.3135  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7616 (0.7618)  acc1: 86.4000 (88.3810)  acc5: 98.0000 (98.1905)  time: 0.3068  data: 0.0228  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.8915 (0.8485)  acc1: 84.8000 (86.4774)  acc5: 97.2000 (97.3936)  time: 0.2872  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0563 (0.8991)  acc1: 81.2000 (85.1610)  acc5: 95.6000 (97.0537)  time: 0.2900  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0469 (0.9171)  acc1: 80.4000 (84.6240)  acc5: 96.0000 (96.9520)  time: 0.2880  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3584 s / it)
* Acc@1 84.754 Acc@5 97.068 loss 0.907
Accuracy of the model on the 50000 test images: 84.8%
Max accuracy: 84.76%
Epoch: [297]  [   0/2502]  eta: 1:52:27  lr: 0.000002  min_lr: 0.000002  loss: 1.9652 (1.9652)  weight_decay: 0.0500 (0.0500)  time: 2.6968  data: 1.5597  max mem: 53905
Epoch: [297]  [ 200/2502]  eta: 0:23:12  lr: 0.000002  min_lr: 0.000002  loss: 2.7341 (2.5601)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5472 (1.6063)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [297]  [ 400/2502]  eta: 0:20:58  lr: 0.000002  min_lr: 0.000002  loss: 2.7893 (2.5890)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5257 (1.6356)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [297]  [ 600/2502]  eta: 0:18:55  lr: 0.000002  min_lr: 0.000002  loss: 2.7990 (2.5937)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5805 (1.6163)  time: 0.5928  data: 0.0004  max mem: 53905
Epoch: [297]  [ 800/2502]  eta: 0:16:54  lr: 0.000002  min_lr: 0.000002  loss: 2.5588 (2.5925)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5075 (1.6360)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [297]  [1000/2502]  eta: 0:14:54  lr: 0.000002  min_lr: 0.000002  loss: 2.6456 (2.5864)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5524 (1.6300)  time: 0.5968  data: 0.0005  max mem: 53905
Epoch: [297]  [1200/2502]  eta: 0:12:55  lr: 0.000002  min_lr: 0.000002  loss: 2.6205 (2.5859)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5130 (1.6483)  time: 0.5993  data: 0.0004  max mem: 53905
Epoch: [297]  [1400/2502]  eta: 0:10:55  lr: 0.000002  min_lr: 0.000002  loss: 2.6610 (2.5791)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5799 (1.6396)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [297]  [1600/2502]  eta: 0:08:56  lr: 0.000002  min_lr: 0.000002  loss: 2.7455 (2.5815)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5412 (1.6312)  time: 0.5922  data: 0.0004  max mem: 53905
Epoch: [297]  [1800/2502]  eta: 0:06:57  lr: 0.000002  min_lr: 0.000002  loss: 2.7887 (2.5781)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6171 (1.6214)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [297]  [2000/2502]  eta: 0:04:58  lr: 0.000002  min_lr: 0.000002  loss: 2.4456 (2.5777)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5271 (1.6138)  time: 0.5927  data: 0.0004  max mem: 53905
Epoch: [297]  [2200/2502]  eta: 0:02:59  lr: 0.000002  min_lr: 0.000002  loss: 2.6798 (2.5748)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5200 (inf)  time: 0.5937  data: 0.0004  max mem: 53905
Epoch: [297]  [2400/2502]  eta: 0:01:00  lr: 0.000002  min_lr: 0.000002  loss: 2.4717 (2.5729)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6117 (inf)  time: 0.5979  data: 0.0004  max mem: 53905
Epoch: [297]  [2501/2502]  eta: 0:00:00  lr: 0.000002  min_lr: 0.000002  loss: 2.6620 (2.5716)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4574 (inf)  time: 0.5333  data: 0.0007  max mem: 53905
Epoch: [297] Total time: 0:24:46 (0.5941 s / it)
Averaged stats: lr: 0.000002  min_lr: 0.000002  loss: 2.6620 (2.5660)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4574 (inf)
Test:  [ 0/50]  eta: 0:03:08  loss: 0.5311 (0.5311)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.7752  data: 3.4670  max mem: 53905
Test:  [10/50]  eta: 0:00:24  loss: 0.7790 (0.7702)  acc1: 89.2000 (89.5273)  acc5: 98.8000 (98.2909)  time: 0.6021  data: 0.3156  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.8158 (0.8097)  acc1: 86.0000 (88.3429)  acc5: 98.0000 (98.1524)  time: 0.2849  data: 0.0004  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9382 (0.8940)  acc1: 84.8000 (86.4387)  acc5: 97.2000 (97.3677)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0928 (0.9420)  acc1: 81.2000 (85.0927)  acc5: 95.6000 (97.0537)  time: 0.2848  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0882 (0.9596)  acc1: 80.8000 (84.5520)  acc5: 96.0000 (96.9760)  time: 0.2846  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3568 s / it)
* Acc@1 84.738 Acc@5 97.060 loss 0.950
Accuracy of the model on the 50000 test images: 84.7%
Max accuracy: 84.76%
Epoch: [298]  [   0/2502]  eta: 1:42:03  lr: 0.000002  min_lr: 0.000002  loss: 2.5419 (2.5419)  weight_decay: 0.0500 (0.0500)  time: 2.4475  data: 1.8344  max mem: 53905
Epoch: [298]  [ 200/2502]  eta: 0:23:05  lr: 0.000001  min_lr: 0.000001  loss: 2.7927 (2.5671)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5955 (1.6314)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [298]  [ 400/2502]  eta: 0:20:57  lr: 0.000001  min_lr: 0.000001  loss: 2.7332 (2.5624)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5701 (1.5993)  time: 0.5930  data: 0.0004  max mem: 53905
Epoch: [298]  [ 600/2502]  eta: 0:18:53  lr: 0.000001  min_lr: 0.000001  loss: 2.7135 (2.5700)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6039 (1.6053)  time: 0.5918  data: 0.0005  max mem: 53905
Epoch: [298]  [ 800/2502]  eta: 0:16:53  lr: 0.000001  min_lr: 0.000001  loss: 2.3305 (2.5590)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5075 (1.5968)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [298]  [1000/2502]  eta: 0:14:53  lr: 0.000001  min_lr: 0.000001  loss: 2.7757 (2.5687)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5600 (1.6055)  time: 0.5904  data: 0.0005  max mem: 53905
Epoch: [298]  [1200/2502]  eta: 0:12:53  lr: 0.000001  min_lr: 0.000001  loss: 2.5769 (2.5657)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5468 (1.6108)  time: 0.5911  data: 0.0005  max mem: 53905
Epoch: [298]  [1400/2502]  eta: 0:10:54  lr: 0.000001  min_lr: 0.000001  loss: 2.7189 (2.5664)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4836 (1.6243)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [298]  [1600/2502]  eta: 0:08:55  lr: 0.000001  min_lr: 0.000001  loss: 2.3476 (2.5632)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5125 (1.6195)  time: 0.5917  data: 0.0005  max mem: 53905
Epoch: [298]  [1800/2502]  eta: 0:06:56  lr: 0.000001  min_lr: 0.000001  loss: 2.6685 (2.5712)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5457 (1.6249)  time: 0.5923  data: 0.0004  max mem: 53905
Epoch: [298]  [2000/2502]  eta: 0:04:57  lr: 0.000001  min_lr: 0.000001  loss: 2.5632 (2.5717)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5686 (1.6265)  time: 0.5921  data: 0.0004  max mem: 53905
Epoch: [298]  [2200/2502]  eta: 0:02:59  lr: 0.000001  min_lr: 0.000001  loss: 2.5862 (2.5660)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5004 (1.6308)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [298]  [2400/2502]  eta: 0:01:00  lr: 0.000001  min_lr: 0.000001  loss: 2.5739 (2.5625)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5394 (1.6330)  time: 0.5919  data: 0.0004  max mem: 53905
Epoch: [298]  [2501/2502]  eta: 0:00:00  lr: 0.000001  min_lr: 0.000001  loss: 2.7040 (2.5630)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5793 (1.6321)  time: 0.5331  data: 0.0008  max mem: 53905
Epoch: [298] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.000001  min_lr: 0.000001  loss: 2.7040 (2.5735)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5793 (1.6321)
Test:  [ 0/50]  eta: 0:03:04  loss: 0.4852 (0.4852)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.6818  data: 3.3628  max mem: 53905
Test:  [10/50]  eta: 0:00:23  loss: 0.7231 (0.7252)  acc1: 89.2000 (89.5636)  acc5: 98.8000 (98.2546)  time: 0.5937  data: 0.3061  max mem: 53905
Test:  [20/50]  eta: 0:00:13  loss: 0.7607 (0.7666)  acc1: 86.0000 (88.3810)  acc5: 98.0000 (98.0952)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 0.9076 (0.8519)  acc1: 84.8000 (86.6065)  acc5: 97.2000 (97.3419)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.0656 (0.9001)  acc1: 81.6000 (85.2488)  acc5: 95.6000 (97.0244)  time: 0.2849  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.0266 (0.9170)  acc1: 80.4000 (84.6400)  acc5: 96.0000 (96.9280)  time: 0.2846  data: 0.0002  max mem: 53905
Test: Total time: 0:00:17 (0.3551 s / it)
* Acc@1 84.696 Acc@5 97.050 loss 0.907
Accuracy of the model on the 50000 test images: 84.7%
Max accuracy: 84.76%
Epoch: [299]  [   0/2502]  eta: 1:37:57  lr: 0.000001  min_lr: 0.000001  loss: 2.6488 (2.6488)  weight_decay: 0.0500 (0.0500)  time: 2.3492  data: 1.4804  max mem: 53905
Epoch: [299]  [ 200/2502]  eta: 0:23:07  lr: 0.000001  min_lr: 0.000001  loss: 2.8060 (2.5896)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4758 (1.5824)  time: 0.5924  data: 0.0004  max mem: 53905
Epoch: [299]  [ 400/2502]  eta: 0:20:55  lr: 0.000001  min_lr: 0.000001  loss: 2.7493 (2.6020)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6367 (1.6274)  time: 0.5920  data: 0.0004  max mem: 53905
Epoch: [299]  [ 600/2502]  eta: 0:18:52  lr: 0.000001  min_lr: 0.000001  loss: 2.3660 (2.5885)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5474 (1.6336)  time: 0.5950  data: 0.0004  max mem: 53905
Epoch: [299]  [ 800/2502]  eta: 0:16:52  lr: 0.000001  min_lr: 0.000001  loss: 2.7197 (2.5886)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5439 (1.6903)  time: 0.5935  data: 0.0005  max mem: 53905
Epoch: [299]  [1000/2502]  eta: 0:14:53  lr: 0.000001  min_lr: 0.000001  loss: 2.7882 (2.5860)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5054 (1.6818)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [299]  [1200/2502]  eta: 0:12:54  lr: 0.000001  min_lr: 0.000001  loss: 2.6855 (2.5753)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5256 (1.6641)  time: 0.5915  data: 0.0004  max mem: 53905
Epoch: [299]  [1400/2502]  eta: 0:10:54  lr: 0.000001  min_lr: 0.000001  loss: 2.6811 (2.5825)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6273 (1.6545)  time: 0.5918  data: 0.0004  max mem: 53905
Epoch: [299]  [1600/2502]  eta: 0:08:55  lr: 0.000001  min_lr: 0.000001  loss: 2.7003 (2.5838)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4996 (1.6453)  time: 0.5916  data: 0.0004  max mem: 53905
Epoch: [299]  [1800/2502]  eta: 0:06:57  lr: 0.000001  min_lr: 0.000001  loss: 2.7769 (2.5862)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5263 (1.6317)  time: 0.5913  data: 0.0004  max mem: 53905
Epoch: [299]  [2000/2502]  eta: 0:04:58  lr: 0.000001  min_lr: 0.000001  loss: 2.6872 (2.5855)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6148 (1.6433)  time: 0.5901  data: 0.0004  max mem: 53905
Epoch: [299]  [2200/2502]  eta: 0:02:59  lr: 0.000001  min_lr: 0.000001  loss: 2.5770 (2.5825)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5727 (1.6428)  time: 0.5909  data: 0.0004  max mem: 53905
Epoch: [299]  [2400/2502]  eta: 0:01:00  lr: 0.000001  min_lr: 0.000001  loss: 2.1923 (2.5799)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6607 (1.6569)  time: 0.5914  data: 0.0004  max mem: 53905
Epoch: [299]  [2501/2502]  eta: 0:00:00  lr: 0.000001  min_lr: 0.000001  loss: 2.7995 (2.5799)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5686 (1.6565)  time: 0.5329  data: 0.0007  max mem: 53905
Epoch: [299] Total time: 0:24:43 (0.5931 s / it)
Averaged stats: lr: 0.000001  min_lr: 0.000001  loss: 2.7995 (2.5731)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5686 (1.6565)
Test:  [ 0/50]  eta: 0:02:36  loss: 0.6268 (0.6268)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.1347  data: 2.7980  max mem: 53905
Test:  [10/50]  eta: 0:00:21  loss: 0.8694 (0.8590)  acc1: 89.2000 (89.4909)  acc5: 98.4000 (98.1818)  time: 0.5439  data: 0.2549  max mem: 53905
Test:  [20/50]  eta: 0:00:12  loss: 0.8901 (0.8941)  acc1: 86.0000 (88.3048)  acc5: 98.0000 (98.0952)  time: 0.2851  data: 0.0005  max mem: 53905
Test:  [30/50]  eta: 0:00:07  loss: 1.0363 (0.9804)  acc1: 84.8000 (86.4645)  acc5: 97.2000 (97.3032)  time: 0.2853  data: 0.0005  max mem: 53905
Test:  [40/50]  eta: 0:00:03  loss: 1.1882 (1.0310)  acc1: 82.0000 (85.0732)  acc5: 95.2000 (97.0342)  time: 0.2850  data: 0.0003  max mem: 53905
Test:  [49/50]  eta: 0:00:00  loss: 1.1882 (1.0512)  acc1: 80.0000 (84.4640)  acc5: 96.0000 (96.9200)  time: 0.2847  data: 0.0001  max mem: 53905
Test: Total time: 0:00:17 (0.3441 s / it)
* Acc@1 84.600 Acc@5 97.042 loss 1.041
Accuracy of the model on the 50000 test images: 84.6%
Max accuracy: 84.76%
Training time 5 days, 5:21:26
