| distributed init (rank 0): env://, gpu 0
| distributed init (rank 4): env://, gpu 4
| distributed init (rank 6): env://, gpu 6
| distributed init (rank 5): env://, gpu 5
| distributed init (rank 2): env://, gpu 2
| distributed init (rank 7): env://, gpu 7
| distributed init (rank 3): env://, gpu 3
| distributed init (rank 1): env://, gpu 1
Namespace(batch_size=128, epochs=300, update_freq=4, model='pico', drop_path=0, input_size=224, layer_scale_init_value=1e-06, model_ema=False, model_ema_decay=0.9999, model_ema_force_cpu=False, model_ema_eval=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=5.0, momentum=0.9, weight_decay=0.05, weight_decay_end=None, lr=0.004, layer_decay=1.0, min_lr=1e-06, warmup_epochs=20, warmup_steps=-1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', crop_pct=None, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.1, cutmix=0.2, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', head_init_scale=1.0, model_key='model|module', model_prefix='', data_path='/dev/shm/imagenet', eval_data_path=None, nb_classes=1000, imagenet_default_mean_and_std=True, data_set='IMNET', output_dir='./checkpoint_nano_0.7G', log_dir=None, device='cuda', seed=0, resume='', auto_resume=True, save_ckpt=True, save_ckpt_freq=1, save_ckpt_num=3, start_epoch=0, eval=False, dist_eval=True, disable_eval=False, num_workers=10, pin_mem=True, world_size=8, local_rank=-1, dist_on_itp=False, dist_url='env://', use_amp=True, enable_wandb=False, project='convnext', wandb_ckpt=False, rank=0, gpu=0, distributed=True, dist_backend='nccl')
Transform = 
RandomResizedCropAndInterpolation(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=bicubic)
RandomHorizontalFlip(p=0.5)
RandAugment(n=2, ops=
	AugmentOp(name=AutoContrast, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=Equalize, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=Invert, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=Rotate, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=PosterizeIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=SolarizeIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=SolarizeAdd, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=ColorIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=ContrastIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=BrightnessIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=SharpnessIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=ShearX, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=ShearY, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=TranslateXRel, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=TranslateYRel, p=0.5, m=9, mstd=0.5))
ToTensor()
Normalize(mean=tensor([0.4850, 0.4560, 0.4060]), std=tensor([0.2290, 0.2240, 0.2250]))
RandomErasing(p=0.25, mode=pixel, count=(1, 1))
---------------------------
reading from datapath /dev/shm/imagenet
Number of the class = 1000
Transform = 
Resize(size=256, interpolation=bicubic, max_size=None, antialias=True)
CenterCrop(size=(224, 224))
ToTensor()
Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
---------------------------
reading from datapath /dev/shm/imagenet
Number of the class = 1000
Sampler_train = <torch.utils.data.distributed.DistributedSampler object at 0x7f75f3ac9350>
Mixup is activated!
Model = SFCNN(
  (first_conv): ConvX(
    (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (norm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): Act(
      (act): SiLU(inplace=True)
    )
  )
  (layer1): Sequential(
    (0): DropBottleNeck(
      (ln): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
      (le): ConvX(
        (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
        (norm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(16, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Sequential(
        (0): ConvX(
          (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
          (norm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
        (1): ConvX(
          (conv): Conv2d(16, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
      )
      (drop_path): Identity()
    )
    (1): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
    (2): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
  )
  (layer2): Sequential(
    (0): DropBottleNeck(
      (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
      (le): ConvX(
        (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(32, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Sequential(
        (0): ConvX(
          (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=32, bias=False)
          (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
        (1): ConvX(
          (conv): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
      )
      (drop_path): Identity()
    )
    (1): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
    (2): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
    (3): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
  )
  (layer3): Sequential(
    (0): DropBottleNeck(
      (ln): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (le): ConvX(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(64, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Sequential(
        (0): ConvX(
          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
          (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
        (1): ConvX(
          (conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
      )
      (drop_path): Identity()
    )
    (1): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
    (2): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
    (3): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
    (4): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
    (5): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
    (6): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
    (7): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
    (8): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
    (9): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
    (10): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
    (11): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
  )
  (layer4): Sequential(
    (0): DropBottleNeck(
      (ln): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=1024, bias=False)
        (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Sequential(
        (0): ConvX(
          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=128, bias=False)
          (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
        (1): ConvX(
          (conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
      )
      (drop_path): Identity()
    )
    (1): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024, bias=False)
        (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
    (2): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024, bias=False)
        (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): Identity()
    )
  )
  (head): ConvX(
    (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): Act(
      (act): SiLU(inplace=True)
    )
  )
  (gap): AdaptiveAvgPool2d(output_size=1)
  (classifier): MlpHead(
    (fc1): Linear(in_features=1024, out_features=2048, bias=False)
    (norm): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): Act(
      (act): SiLU(inplace=True)
    )
    (drop): Dropout(p=0.2, inplace=False)
    (fc2): Linear(in_features=2048, out_features=1000, bias=False)
  )
)
number of params: 7758800
LR = 0.00400000
Batch size = 4096
Update frequent = 4
Number of training examples = 1281167
Number of training training per epoch = 312
Param groups = {
  "decay": {
    "weight_decay": 0.05,
    "params": [
      "first_conv.conv.weight",
      "layer1.0.le.conv.weight",
      "layer1.0.conv_in.conv.weight",
      "layer1.0.conv.conv.weight",
      "layer1.0.conv_out.conv.weight",
      "layer1.0.skip.0.conv.weight",
      "layer1.0.skip.1.conv.weight",
      "layer1.1.le.conv.weight",
      "layer1.1.conv_in.conv.weight",
      "layer1.1.conv.conv.weight",
      "layer1.1.conv_out.conv.weight",
      "layer1.2.le.conv.weight",
      "layer1.2.conv_in.conv.weight",
      "layer1.2.conv.conv.weight",
      "layer1.2.conv_out.conv.weight",
      "layer2.0.le.conv.weight",
      "layer2.0.conv_in.conv.weight",
      "layer2.0.conv.conv.weight",
      "layer2.0.conv_out.conv.weight",
      "layer2.0.skip.0.conv.weight",
      "layer2.0.skip.1.conv.weight",
      "layer2.1.le.conv.weight",
      "layer2.1.conv_in.conv.weight",
      "layer2.1.conv.conv.weight",
      "layer2.1.conv_out.conv.weight",
      "layer2.2.le.conv.weight",
      "layer2.2.conv_in.conv.weight",
      "layer2.2.conv.conv.weight",
      "layer2.2.conv_out.conv.weight",
      "layer2.3.le.conv.weight",
      "layer2.3.conv_in.conv.weight",
      "layer2.3.conv.conv.weight",
      "layer2.3.conv_out.conv.weight",
      "layer3.0.le.conv.weight",
      "layer3.0.conv_in.conv.weight",
      "layer3.0.conv.conv.weight",
      "layer3.0.conv_out.conv.weight",
      "layer3.0.skip.0.conv.weight",
      "layer3.0.skip.1.conv.weight",
      "layer3.1.le.conv.weight",
      "layer3.1.conv_in.conv.weight",
      "layer3.1.conv.conv.weight",
      "layer3.1.conv_out.conv.weight",
      "layer3.2.le.conv.weight",
      "layer3.2.conv_in.conv.weight",
      "layer3.2.conv.conv.weight",
      "layer3.2.conv_out.conv.weight",
      "layer3.3.le.conv.weight",
      "layer3.3.conv_in.conv.weight",
      "layer3.3.conv.conv.weight",
      "layer3.3.conv_out.conv.weight",
      "layer3.4.le.conv.weight",
      "layer3.4.conv_in.conv.weight",
      "layer3.4.conv.conv.weight",
      "layer3.4.conv_out.conv.weight",
      "layer3.5.le.conv.weight",
      "layer3.5.conv_in.conv.weight",
      "layer3.5.conv.conv.weight",
      "layer3.5.conv_out.conv.weight",
      "layer3.6.le.conv.weight",
      "layer3.6.conv_in.conv.weight",
      "layer3.6.conv.conv.weight",
      "layer3.6.conv_out.conv.weight",
      "layer3.7.le.conv.weight",
      "layer3.7.conv_in.conv.weight",
      "layer3.7.conv.conv.weight",
      "layer3.7.conv_out.conv.weight",
      "layer3.8.le.conv.weight",
      "layer3.8.conv_in.conv.weight",
      "layer3.8.conv.conv.weight",
      "layer3.8.conv_out.conv.weight",
      "layer3.9.le.conv.weight",
      "layer3.9.conv_in.conv.weight",
      "layer3.9.conv.conv.weight",
      "layer3.9.conv_out.conv.weight",
      "layer3.10.le.conv.weight",
      "layer3.10.conv_in.conv.weight",
      "layer3.10.conv.conv.weight",
      "layer3.10.conv_out.conv.weight",
      "layer3.11.le.conv.weight",
      "layer3.11.conv_in.conv.weight",
      "layer3.11.conv.conv.weight",
      "layer3.11.conv_out.conv.weight",
      "layer4.0.le.conv.weight",
      "layer4.0.conv_in.conv.weight",
      "layer4.0.conv.conv.weight",
      "layer4.0.conv_out.conv.weight",
      "layer4.0.skip.0.conv.weight",
      "layer4.0.skip.1.conv.weight",
      "layer4.1.le.conv.weight",
      "layer4.1.conv_in.conv.weight",
      "layer4.1.conv.conv.weight",
      "layer4.1.conv_out.conv.weight",
      "layer4.2.le.conv.weight",
      "layer4.2.conv_in.conv.weight",
      "layer4.2.conv.conv.weight",
      "layer4.2.conv_out.conv.weight",
      "head.conv.weight",
      "classifier.fc1.weight",
      "classifier.fc2.weight"
    ],
    "lr_scale": 1.0
  },
  "no_decay": {
    "weight_decay": 0.0,
    "params": [
      "first_conv.norm.weight",
      "first_conv.norm.bias",
      "layer1.0.ln.weight",
      "layer1.0.ln.bias",
      "layer1.0.le.norm.weight",
      "layer1.0.le.norm.bias",
      "layer1.0.conv_in.norm.weight",
      "layer1.0.conv_in.norm.bias",
      "layer1.0.conv.norm.weight",
      "layer1.0.conv.norm.bias",
      "layer1.0.conv_out.norm.weight",
      "layer1.0.conv_out.norm.bias",
      "layer1.0.skip.0.norm.weight",
      "layer1.0.skip.0.norm.bias",
      "layer1.0.skip.1.norm.weight",
      "layer1.0.skip.1.norm.bias",
      "layer1.1.le.norm.weight",
      "layer1.1.le.norm.bias",
      "layer1.1.conv_in.norm.weight",
      "layer1.1.conv_in.norm.bias",
      "layer1.1.conv.norm.weight",
      "layer1.1.conv.norm.bias",
      "layer1.1.conv_out.norm.weight",
      "layer1.1.conv_out.norm.bias",
      "layer1.2.le.norm.weight",
      "layer1.2.le.norm.bias",
      "layer1.2.conv_in.norm.weight",
      "layer1.2.conv_in.norm.bias",
      "layer1.2.conv.norm.weight",
      "layer1.2.conv.norm.bias",
      "layer1.2.conv_out.norm.weight",
      "layer1.2.conv_out.norm.bias",
      "layer2.0.ln.weight",
      "layer2.0.ln.bias",
      "layer2.0.le.norm.weight",
      "layer2.0.le.norm.bias",
      "layer2.0.conv_in.norm.weight",
      "layer2.0.conv_in.norm.bias",
      "layer2.0.conv.norm.weight",
      "layer2.0.conv.norm.bias",
      "layer2.0.conv_out.norm.weight",
      "layer2.0.conv_out.norm.bias",
      "layer2.0.skip.0.norm.weight",
      "layer2.0.skip.0.norm.bias",
      "layer2.0.skip.1.norm.weight",
      "layer2.0.skip.1.norm.bias",
      "layer2.1.le.norm.weight",
      "layer2.1.le.norm.bias",
      "layer2.1.conv_in.norm.weight",
      "layer2.1.conv_in.norm.bias",
      "layer2.1.conv.norm.weight",
      "layer2.1.conv.norm.bias",
      "layer2.1.conv_out.norm.weight",
      "layer2.1.conv_out.norm.bias",
      "layer2.2.le.norm.weight",
      "layer2.2.le.norm.bias",
      "layer2.2.conv_in.norm.weight",
      "layer2.2.conv_in.norm.bias",
      "layer2.2.conv.norm.weight",
      "layer2.2.conv.norm.bias",
      "layer2.2.conv_out.norm.weight",
      "layer2.2.conv_out.norm.bias",
      "layer2.3.le.norm.weight",
      "layer2.3.le.norm.bias",
      "layer2.3.conv_in.norm.weight",
      "layer2.3.conv_in.norm.bias",
      "layer2.3.conv.norm.weight",
      "layer2.3.conv.norm.bias",
      "layer2.3.conv_out.norm.weight",
      "layer2.3.conv_out.norm.bias",
      "layer3.0.ln.weight",
      "layer3.0.ln.bias",
      "layer3.0.le.norm.weight",
      "layer3.0.le.norm.bias",
      "layer3.0.conv_in.norm.weight",
      "layer3.0.conv_in.norm.bias",
      "layer3.0.conv.norm.weight",
      "layer3.0.conv.norm.bias",
      "layer3.0.conv_out.norm.weight",
      "layer3.0.conv_out.norm.bias",
      "layer3.0.skip.0.norm.weight",
      "layer3.0.skip.0.norm.bias",
      "layer3.0.skip.1.norm.weight",
      "layer3.0.skip.1.norm.bias",
      "layer3.1.le.norm.weight",
      "layer3.1.le.norm.bias",
      "layer3.1.conv_in.norm.weight",
      "layer3.1.conv_in.norm.bias",
      "layer3.1.conv.norm.weight",
      "layer3.1.conv.norm.bias",
      "layer3.1.conv_out.norm.weight",
      "layer3.1.conv_out.norm.bias",
      "layer3.2.le.norm.weight",
      "layer3.2.le.norm.bias",
      "layer3.2.conv_in.norm.weight",
      "layer3.2.conv_in.norm.bias",
      "layer3.2.conv.norm.weight",
      "layer3.2.conv.norm.bias",
      "layer3.2.conv_out.norm.weight",
      "layer3.2.conv_out.norm.bias",
      "layer3.3.le.norm.weight",
      "layer3.3.le.norm.bias",
      "layer3.3.conv_in.norm.weight",
      "layer3.3.conv_in.norm.bias",
      "layer3.3.conv.norm.weight",
      "layer3.3.conv.norm.bias",
      "layer3.3.conv_out.norm.weight",
      "layer3.3.conv_out.norm.bias",
      "layer3.4.le.norm.weight",
      "layer3.4.le.norm.bias",
      "layer3.4.conv_in.norm.weight",
      "layer3.4.conv_in.norm.bias",
      "layer3.4.conv.norm.weight",
      "layer3.4.conv.norm.bias",
      "layer3.4.conv_out.norm.weight",
      "layer3.4.conv_out.norm.bias",
      "layer3.5.le.norm.weight",
      "layer3.5.le.norm.bias",
      "layer3.5.conv_in.norm.weight",
      "layer3.5.conv_in.norm.bias",
      "layer3.5.conv.norm.weight",
      "layer3.5.conv.norm.bias",
      "layer3.5.conv_out.norm.weight",
      "layer3.5.conv_out.norm.bias",
      "layer3.6.le.norm.weight",
      "layer3.6.le.norm.bias",
      "layer3.6.conv_in.norm.weight",
      "layer3.6.conv_in.norm.bias",
      "layer3.6.conv.norm.weight",
      "layer3.6.conv.norm.bias",
      "layer3.6.conv_out.norm.weight",
      "layer3.6.conv_out.norm.bias",
      "layer3.7.le.norm.weight",
      "layer3.7.le.norm.bias",
      "layer3.7.conv_in.norm.weight",
      "layer3.7.conv_in.norm.bias",
      "layer3.7.conv.norm.weight",
      "layer3.7.conv.norm.bias",
      "layer3.7.conv_out.norm.weight",
      "layer3.7.conv_out.norm.bias",
      "layer3.8.le.norm.weight",
      "layer3.8.le.norm.bias",
      "layer3.8.conv_in.norm.weight",
      "layer3.8.conv_in.norm.bias",
      "layer3.8.conv.norm.weight",
      "layer3.8.conv.norm.bias",
      "layer3.8.conv_out.norm.weight",
      "layer3.8.conv_out.norm.bias",
      "layer3.9.le.norm.weight",
      "layer3.9.le.norm.bias",
      "layer3.9.conv_in.norm.weight",
      "layer3.9.conv_in.norm.bias",
      "layer3.9.conv.norm.weight",
      "layer3.9.conv.norm.bias",
      "layer3.9.conv_out.norm.weight",
      "layer3.9.conv_out.norm.bias",
      "layer3.10.le.norm.weight",
      "layer3.10.le.norm.bias",
      "layer3.10.conv_in.norm.weight",
      "layer3.10.conv_in.norm.bias",
      "layer3.10.conv.norm.weight",
      "layer3.10.conv.norm.bias",
      "layer3.10.conv_out.norm.weight",
      "layer3.10.conv_out.norm.bias",
      "layer3.11.le.norm.weight",
      "layer3.11.le.norm.bias",
      "layer3.11.conv_in.norm.weight",
      "layer3.11.conv_in.norm.bias",
      "layer3.11.conv.norm.weight",
      "layer3.11.conv.norm.bias",
      "layer3.11.conv_out.norm.weight",
      "layer3.11.conv_out.norm.bias",
      "layer4.0.ln.weight",
      "layer4.0.ln.bias",
      "layer4.0.le.norm.weight",
      "layer4.0.le.norm.bias",
      "layer4.0.conv_in.norm.weight",
      "layer4.0.conv_in.norm.bias",
      "layer4.0.conv.norm.weight",
      "layer4.0.conv.norm.bias",
      "layer4.0.conv_out.norm.weight",
      "layer4.0.conv_out.norm.bias",
      "layer4.0.skip.0.norm.weight",
      "layer4.0.skip.0.norm.bias",
      "layer4.0.skip.1.norm.weight",
      "layer4.0.skip.1.norm.bias",
      "layer4.1.le.norm.weight",
      "layer4.1.le.norm.bias",
      "layer4.1.conv_in.norm.weight",
      "layer4.1.conv_in.norm.bias",
      "layer4.1.conv.norm.weight",
      "layer4.1.conv.norm.bias",
      "layer4.1.conv_out.norm.weight",
      "layer4.1.conv_out.norm.bias",
      "layer4.2.le.norm.weight",
      "layer4.2.le.norm.bias",
      "layer4.2.conv_in.norm.weight",
      "layer4.2.conv_in.norm.bias",
      "layer4.2.conv.norm.weight",
      "layer4.2.conv.norm.bias",
      "layer4.2.conv_out.norm.weight",
      "layer4.2.conv_out.norm.bias",
      "head.norm.weight",
      "head.norm.bias",
      "classifier.norm.weight",
      "classifier.norm.bias"
    ],
    "lr_scale": 1.0
  }
}
Use Cosine LR scheduler
Set warmup steps = 6240
Set warmup steps = 0
Max WD = 0.0500000, Min WD = 0.0500000
criterion = SoftTargetCrossEntropy()
Auto resume checkpoint: 
Start training for 300 epochs
Epoch: [0]  [   0/1251]  eta: 3:38:41  lr: 0.000000  min_lr: 0.000000  loss: 6.9539 (6.9539)  weight_decay: 0.0500 (0.0500)  time: 10.4885  data: 4.1392  max mem: 9147
Epoch: [0]  [ 200/1251]  eta: 0:04:03  lr: 0.000032  min_lr: 0.000032  loss: 6.8601 (6.9133)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.6600 (nan)  time: 0.1937  data: 0.0014  max mem: 9147
Epoch: [0]  [ 400/1251]  eta: 0:03:06  lr: 0.000064  min_lr: 0.000064  loss: 6.6803 (6.8449)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.4648 (nan)  time: 0.2248  data: 0.0012  max mem: 9147
Epoch: [0]  [ 600/1251]  eta: 0:02:17  lr: 0.000096  min_lr: 0.000096  loss: 6.5438 (6.7799)  weight_decay: 0.0500 (0.0500)  grad_norm: 10.1868 (nan)  time: 0.2000  data: 0.0009  max mem: 9147
Epoch: [0]  [ 800/1251]  eta: 0:01:32  lr: 0.000128  min_lr: 0.000128  loss: 6.4922 (6.7263)  weight_decay: 0.0500 (0.0500)  grad_norm: 11.2467 (nan)  time: 0.1843  data: 0.0008  max mem: 9147
Epoch: [0]  [1000/1251]  eta: 0:00:50  lr: 0.000160  min_lr: 0.000160  loss: 6.3738 (6.6747)  weight_decay: 0.0500 (0.0500)  grad_norm: 14.6872 (nan)  time: 0.2004  data: 0.0010  max mem: 9147
Epoch: [0]  [1200/1251]  eta: 0:00:10  lr: 0.000192  min_lr: 0.000192  loss: 6.2331 (6.6278)  weight_decay: 0.0500 (0.0500)  grad_norm: 13.2347 (nan)  time: 0.2136  data: 0.0100  max mem: 9147
Epoch: [0]  [1250/1251]  eta: 0:00:00  lr: 0.000199  min_lr: 0.000199  loss: 6.3255 (6.6179)  weight_decay: 0.0500 (0.0500)  grad_norm: 12.3349 (nan)  time: 0.1435  data: 0.0011  max mem: 9147
Epoch: [0] Total time: 0:04:15 (0.2040 s / it)
Averaged stats: lr: 0.000199  min_lr: 0.000199  loss: 6.3255 (6.6161)  weight_decay: 0.0500 (0.0500)  grad_norm: 12.3349 (nan)
Test:  [ 0/25]  eta: 0:04:02  loss: 5.6754 (5.6754)  acc1: 6.8000 (6.8000)  acc5: 16.4000 (16.4000)  time: 9.6937  data: 6.6195  max mem: 9147
Test:  [10/25]  eta: 0:00:14  loss: 5.6754 (5.6669)  acc1: 4.8000 (4.8000)  acc5: 12.4000 (13.1636)  time: 0.9442  data: 0.6019  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 5.7046 (5.7015)  acc1: 4.4000 (4.8571)  acc5: 12.4000 (13.4286)  time: 0.0693  data: 0.0002  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 5.6618 (5.6573)  acc1: 4.8000 (5.3600)  acc5: 12.8000 (14.9760)  time: 0.0923  data: 0.0228  max mem: 9147
Test: Total time: 0:00:11 (0.4765 s / it)
* Acc@1 4.806 Acc@5 14.436 loss 5.660
Accuracy of the model on the 50000 test images: 4.8%
Max accuracy: 4.81%
Epoch: [1]  [   0/1251]  eta: 1:00:18  lr: 0.000200  min_lr: 0.000200  loss: 5.9791 (5.9791)  weight_decay: 0.0500 (0.0500)  time: 2.8922  data: 2.2694  max mem: 9147
Epoch: [1]  [ 200/1251]  eta: 0:03:52  lr: 0.000232  min_lr: 0.000232  loss: 6.4694 (6.3023)  weight_decay: 0.0500 (0.0500)  grad_norm: 14.6634 (17.1518)  time: 0.2099  data: 0.0006  max mem: 9147
Epoch: [1]  [ 400/1251]  eta: 0:03:01  lr: 0.000264  min_lr: 0.000264  loss: 6.1687 (6.2531)  weight_decay: 0.0500 (0.0500)  grad_norm: 15.4451 (17.6069)  time: 0.1999  data: 0.0006  max mem: 9147
Epoch: [1]  [ 600/1251]  eta: 0:02:14  lr: 0.000296  min_lr: 0.000296  loss: 5.8785 (6.2174)  weight_decay: 0.0500 (0.0500)  grad_norm: 17.3692 (17.5645)  time: 0.1900  data: 0.0006  max mem: 9147
Epoch: [1]  [ 800/1251]  eta: 0:01:32  lr: 0.000328  min_lr: 0.000328  loss: 6.2667 (6.2010)  weight_decay: 0.0500 (0.0500)  grad_norm: 17.5559 (17.6478)  time: 0.2191  data: 0.0006  max mem: 9147
Epoch: [1]  [1000/1251]  eta: 0:00:51  lr: 0.000360  min_lr: 0.000360  loss: 5.8419 (6.1625)  weight_decay: 0.0500 (0.0500)  grad_norm: 17.8709 (17.9688)  time: 0.2164  data: 0.0006  max mem: 9147
Epoch: [1]  [1200/1251]  eta: 0:00:10  lr: 0.000392  min_lr: 0.000392  loss: 5.8675 (6.1260)  weight_decay: 0.0500 (0.0500)  grad_norm: 15.5151 (18.0472)  time: 0.2248  data: 0.0181  max mem: 9147
Epoch: [1]  [1250/1251]  eta: 0:00:00  lr: 0.000399  min_lr: 0.000399  loss: 6.1684 (6.1236)  weight_decay: 0.0500 (0.0500)  grad_norm: 15.4566 (18.0161)  time: 0.1483  data: 0.0082  max mem: 9147
Epoch: [1] Total time: 0:04:20 (0.2079 s / it)
Averaged stats: lr: 0.000399  min_lr: 0.000399  loss: 6.1684 (6.1095)  weight_decay: 0.0500 (0.0500)  grad_norm: 15.4566 (18.0161)
Test:  [ 0/25]  eta: 0:02:17  loss: 4.7131 (4.7131)  acc1: 13.6000 (13.6000)  acc5: 32.0000 (32.0000)  time: 5.5040  data: 5.4035  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 4.6803 (4.6574)  acc1: 11.2000 (11.7455)  acc5: 32.0000 (31.0545)  time: 0.7710  data: 0.6915  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 4.7948 (4.8027)  acc1: 11.2000 (11.6952)  acc5: 27.6000 (28.2095)  time: 0.2383  data: 0.1645  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 4.8705 (4.7639)  acc1: 11.6000 (12.3040)  acc5: 27.6000 (29.3440)  time: 0.2372  data: 0.1644  max mem: 9147
Test: Total time: 0:00:10 (0.4248 s / it)
* Acc@1 12.148 Acc@5 29.408 loss 4.770
Accuracy of the model on the 50000 test images: 12.1%
Max accuracy: 12.15%
Epoch: [2]  [   0/1251]  eta: 0:57:20  lr: 0.000400  min_lr: 0.000400  loss: 5.4607 (5.4607)  weight_decay: 0.0500 (0.0500)  time: 2.7504  data: 2.5424  max mem: 9147
Epoch: [2]  [ 200/1251]  eta: 0:03:36  lr: 0.000432  min_lr: 0.000432  loss: 6.0181 (5.9018)  weight_decay: 0.0500 (0.0500)  grad_norm: 16.3402 (19.7292)  time: 0.2002  data: 0.0005  max mem: 9147
Epoch: [2]  [ 400/1251]  eta: 0:02:57  lr: 0.000464  min_lr: 0.000464  loss: 5.7025 (5.8643)  weight_decay: 0.0500 (0.0500)  grad_norm: 17.1262 (18.5589)  time: 0.2163  data: 0.0006  max mem: 9147
Epoch: [2]  [ 600/1251]  eta: 0:02:17  lr: 0.000496  min_lr: 0.000496  loss: 5.9106 (5.8529)  weight_decay: 0.0500 (0.0500)  grad_norm: 19.2347 (18.3035)  time: 0.2208  data: 0.0019  max mem: 9147
Epoch: [2]  [ 800/1251]  eta: 0:01:35  lr: 0.000528  min_lr: 0.000528  loss: 5.7277 (5.8198)  weight_decay: 0.0500 (0.0500)  grad_norm: 13.2543 (17.7844)  time: 0.2151  data: 0.0125  max mem: 9147
Epoch: [2]  [1000/1251]  eta: 0:00:53  lr: 0.000560  min_lr: 0.000560  loss: 5.4588 (5.8040)  weight_decay: 0.0500 (0.0500)  grad_norm: 21.3929 (17.8136)  time: 0.2018  data: 0.0008  max mem: 9147
Epoch: [2]  [1200/1251]  eta: 0:00:10  lr: 0.000592  min_lr: 0.000592  loss: 5.4203 (5.7746)  weight_decay: 0.0500 (0.0500)  grad_norm: 13.6491 (17.4422)  time: 0.2391  data: 0.0006  max mem: 9147
Epoch: [2]  [1250/1251]  eta: 0:00:00  lr: 0.000599  min_lr: 0.000599  loss: 5.2543 (5.7672)  weight_decay: 0.0500 (0.0500)  grad_norm: 15.4682 (17.3965)  time: 0.1385  data: 0.0010  max mem: 9147
Epoch: [2] Total time: 0:04:26 (0.2129 s / it)
Averaged stats: lr: 0.000599  min_lr: 0.000599  loss: 5.2543 (5.7677)  weight_decay: 0.0500 (0.0500)  grad_norm: 15.4682 (17.3965)
Test:  [ 0/25]  eta: 0:02:20  loss: 3.8734 (3.8734)  acc1: 24.4000 (24.4000)  acc5: 47.2000 (47.2000)  time: 5.6068  data: 5.5219  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 3.6993 (3.8419)  acc1: 24.4000 (22.9091)  acc5: 48.4000 (47.2364)  time: 0.7483  data: 0.6520  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 4.2774 (4.0968)  acc1: 18.8000 (20.9143)  acc5: 40.4000 (43.0095)  time: 0.2024  data: 0.1161  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 4.2775 (4.0570)  acc1: 19.6000 (21.6640)  acc5: 40.4000 (43.9200)  time: 0.1989  data: 0.1186  max mem: 9147
Test: Total time: 0:00:10 (0.4048 s / it)
* Acc@1 21.492 Acc@5 43.730 loss 4.049
Accuracy of the model on the 50000 test images: 21.5%
Max accuracy: 21.49%
Epoch: [3]  [   0/1251]  eta: 1:11:02  lr: 0.000600  min_lr: 0.000600  loss: 5.6243 (5.6243)  weight_decay: 0.0500 (0.0500)  time: 3.4072  data: 3.2147  max mem: 9147
Epoch: [3]  [ 200/1251]  eta: 0:03:40  lr: 0.000632  min_lr: 0.000632  loss: 5.5600 (5.5707)  weight_decay: 0.0500 (0.0500)  grad_norm: 15.4585 (16.1224)  time: 0.1996  data: 0.0006  max mem: 9147
Epoch: [3]  [ 400/1251]  eta: 0:02:59  lr: 0.000664  min_lr: 0.000664  loss: 5.1731 (5.5039)  weight_decay: 0.0500 (0.0500)  grad_norm: 14.9073 (15.5414)  time: 0.1939  data: 0.0005  max mem: 9147
Epoch: [3]  [ 600/1251]  eta: 0:02:14  lr: 0.000696  min_lr: 0.000696  loss: 5.1690 (5.4843)  weight_decay: 0.0500 (0.0500)  grad_norm: 13.3352 (15.2105)  time: 0.2045  data: 0.0006  max mem: 9147
Epoch: [3]  [ 800/1251]  eta: 0:01:33  lr: 0.000728  min_lr: 0.000728  loss: 5.1660 (5.4845)  weight_decay: 0.0500 (0.0500)  grad_norm: 11.3995 (14.7564)  time: 0.1950  data: 0.0006  max mem: 9147
Epoch: [3]  [1000/1251]  eta: 0:00:51  lr: 0.000760  min_lr: 0.000760  loss: 5.1137 (5.4572)  weight_decay: 0.0500 (0.0500)  grad_norm: 13.7296 (14.7205)  time: 0.1900  data: 0.0011  max mem: 9147
Epoch: [3]  [1200/1251]  eta: 0:00:10  lr: 0.000792  min_lr: 0.000792  loss: 5.0544 (5.4440)  weight_decay: 0.0500 (0.0500)  grad_norm: 11.3556 (14.5216)  time: 0.1998  data: 0.0007  max mem: 9147
Epoch: [3]  [1250/1251]  eta: 0:00:00  lr: 0.000799  min_lr: 0.000799  loss: 5.9645 (5.4453)  weight_decay: 0.0500 (0.0500)  grad_norm: 13.3284 (14.5754)  time: 0.1408  data: 0.0012  max mem: 9147
Epoch: [3] Total time: 0:04:13 (0.2023 s / it)
Averaged stats: lr: 0.000799  min_lr: 0.000799  loss: 5.9645 (5.4702)  weight_decay: 0.0500 (0.0500)  grad_norm: 13.3284 (14.5754)
Test:  [ 0/25]  eta: 0:02:07  loss: 3.6187 (3.6187)  acc1: 25.2000 (25.2000)  acc5: 54.4000 (54.4000)  time: 5.1006  data: 5.0191  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 3.5299 (3.5369)  acc1: 25.2000 (26.9091)  acc5: 54.4000 (52.1091)  time: 0.7244  data: 0.6428  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 3.9272 (3.7957)  acc1: 20.8000 (24.6476)  acc5: 46.0000 (48.2095)  time: 0.2310  data: 0.1524  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 3.9272 (3.7643)  acc1: 21.2000 (25.3920)  acc5: 46.0000 (48.9120)  time: 0.2148  data: 0.1403  max mem: 9147
Test: Total time: 0:00:10 (0.4059 s / it)
* Acc@1 25.664 Acc@5 49.702 loss 3.741
Accuracy of the model on the 50000 test images: 25.7%
Max accuracy: 25.66%
Epoch: [4]  [   0/1251]  eta: 0:56:36  lr: 0.000800  min_lr: 0.000800  loss: 4.8839 (4.8839)  weight_decay: 0.0500 (0.0500)  time: 2.7151  data: 1.9514  max mem: 9147
Epoch: [4]  [ 200/1251]  eta: 0:03:59  lr: 0.000832  min_lr: 0.000832  loss: 4.8923 (5.3763)  weight_decay: 0.0500 (0.0500)  grad_norm: 11.9439 (13.8765)  time: 0.2336  data: 0.0242  max mem: 9147
Epoch: [4]  [ 400/1251]  eta: 0:03:07  lr: 0.000864  min_lr: 0.000864  loss: 4.9205 (5.3284)  weight_decay: 0.0500 (0.0500)  grad_norm: 11.5339 (13.5038)  time: 0.2044  data: 0.0008  max mem: 9147
Epoch: [4]  [ 600/1251]  eta: 0:02:21  lr: 0.000896  min_lr: 0.000896  loss: 5.0428 (5.2891)  weight_decay: 0.0500 (0.0500)  grad_norm: 8.8831 (12.7271)  time: 0.2013  data: 0.0007  max mem: 9147
Epoch: [4]  [ 800/1251]  eta: 0:01:37  lr: 0.000928  min_lr: 0.000928  loss: 4.8966 (5.2737)  weight_decay: 0.0500 (0.0500)  grad_norm: 8.8137 (12.3183)  time: 0.2206  data: 0.0006  max mem: 9147
Epoch: [4]  [1000/1251]  eta: 0:00:53  lr: 0.000960  min_lr: 0.000960  loss: 5.4226 (5.2654)  weight_decay: 0.0500 (0.0500)  grad_norm: 8.8454 (11.8533)  time: 0.2004  data: 0.0006  max mem: 9147
Epoch: [4]  [1200/1251]  eta: 0:00:10  lr: 0.000992  min_lr: 0.000992  loss: 4.9466 (5.2537)  weight_decay: 0.0500 (0.0500)  grad_norm: 9.0061 (11.6184)  time: 0.2196  data: 0.0185  max mem: 9147
Epoch: [4]  [1250/1251]  eta: 0:00:00  lr: 0.001000  min_lr: 0.001000  loss: 4.6793 (5.2446)  weight_decay: 0.0500 (0.0500)  grad_norm: 8.4399 (11.4985)  time: 0.1403  data: 0.0010  max mem: 9147
Epoch: [4] Total time: 0:04:27 (0.2135 s / it)
Averaged stats: lr: 0.001000  min_lr: 0.001000  loss: 4.6793 (5.2370)  weight_decay: 0.0500 (0.0500)  grad_norm: 8.4399 (11.4985)
Test:  [ 0/25]  eta: 0:02:15  loss: 3.1011 (3.1011)  acc1: 35.2000 (35.2000)  acc5: 63.6000 (63.6000)  time: 5.4357  data: 5.3275  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 2.9673 (3.0795)  acc1: 35.2000 (34.8364)  acc5: 63.6000 (60.6909)  time: 0.6512  data: 0.5746  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 3.4756 (3.3457)  acc1: 28.4000 (31.6571)  acc5: 55.2000 (56.1143)  time: 0.1763  data: 0.1038  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 3.5163 (3.3155)  acc1: 28.4000 (32.1440)  acc5: 55.2000 (57.0880)  time: 0.2081  data: 0.1366  max mem: 9147
Test: Total time: 0:00:09 (0.3996 s / it)
* Acc@1 31.988 Acc@5 57.254 loss 3.321
Accuracy of the model on the 50000 test images: 32.0%
Max accuracy: 31.99%
Epoch: [5]  [   0/1251]  eta: 0:56:29  lr: 0.001000  min_lr: 0.001000  loss: 5.5241 (5.5241)  weight_decay: 0.0500 (0.0500)  time: 2.7097  data: 2.3316  max mem: 9147
Epoch: [5]  [ 200/1251]  eta: 0:03:54  lr: 0.001032  min_lr: 0.001032  loss: 4.5778 (5.1351)  weight_decay: 0.0500 (0.0500)  grad_norm: 9.9541 (9.6561)  time: 0.2244  data: 0.0005  max mem: 9147
Epoch: [5]  [ 400/1251]  eta: 0:02:57  lr: 0.001064  min_lr: 0.001064  loss: 4.8251 (5.0720)  weight_decay: 0.0500 (0.0500)  grad_norm: 10.7049 (9.8544)  time: 0.2020  data: 0.0007  max mem: 9147
Epoch: [5]  [ 600/1251]  eta: 0:02:17  lr: 0.001096  min_lr: 0.001096  loss: 4.9702 (5.0888)  weight_decay: 0.0500 (0.0500)  grad_norm: 6.7957 (9.5552)  time: 0.2047  data: 0.0008  max mem: 9147
Epoch: [5]  [ 800/1251]  eta: 0:01:35  lr: 0.001128  min_lr: 0.001128  loss: 4.7370 (5.0588)  weight_decay: 0.0500 (0.0500)  grad_norm: 8.3569 (9.3129)  time: 0.2296  data: 0.0006  max mem: 9147
Epoch: [5]  [1000/1251]  eta: 0:00:52  lr: 0.001160  min_lr: 0.001160  loss: 4.4783 (5.0321)  weight_decay: 0.0500 (0.0500)  grad_norm: 7.5838 (9.0420)  time: 0.1951  data: 0.0006  max mem: 9147
Epoch: [5]  [1200/1251]  eta: 0:00:10  lr: 0.001192  min_lr: 0.001192  loss: 4.6508 (5.0158)  weight_decay: 0.0500 (0.0500)  grad_norm: 7.6418 (8.8920)  time: 0.2277  data: 0.0007  max mem: 9147
Epoch: [5]  [1250/1251]  eta: 0:00:00  lr: 0.001200  min_lr: 0.001200  loss: 4.4936 (5.0132)  weight_decay: 0.0500 (0.0500)  grad_norm: 7.3014 (8.9768)  time: 0.1411  data: 0.0013  max mem: 9147
Epoch: [5] Total time: 0:04:23 (0.2108 s / it)
Averaged stats: lr: 0.001200  min_lr: 0.001200  loss: 4.4936 (5.0255)  weight_decay: 0.0500 (0.0500)  grad_norm: 7.3014 (8.9768)
Test:  [ 0/25]  eta: 0:02:17  loss: 2.4723 (2.4723)  acc1: 50.8000 (50.8000)  acc5: 74.4000 (74.4000)  time: 5.5063  data: 5.3722  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 2.4723 (2.5675)  acc1: 48.8000 (44.6545)  acc5: 72.4000 (71.2000)  time: 0.7326  data: 0.6355  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 3.1033 (2.9165)  acc1: 35.6000 (39.8476)  acc5: 62.0000 (64.7238)  time: 0.2054  data: 0.1241  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 3.1433 (2.9049)  acc1: 35.6000 (40.1280)  acc5: 60.4000 (64.9920)  time: 0.1993  data: 0.1240  max mem: 9147
Test: Total time: 0:00:09 (0.3997 s / it)
* Acc@1 39.676 Acc@5 65.062 loss 2.905
Accuracy of the model on the 50000 test images: 39.7%
Max accuracy: 39.68%
Epoch: [6]  [   0/1251]  eta: 1:03:04  lr: 0.001200  min_lr: 0.001200  loss: 5.8989 (5.8989)  weight_decay: 0.0500 (0.0500)  time: 3.0249  data: 2.8326  max mem: 9147
Epoch: [6]  [ 200/1251]  eta: 0:03:35  lr: 0.001232  min_lr: 0.001232  loss: 4.6727 (4.9203)  weight_decay: 0.0500 (0.0500)  grad_norm: 6.8302 (7.2403)  time: 0.2005  data: 0.0006  max mem: 9147
Epoch: [6]  [ 400/1251]  eta: 0:02:58  lr: 0.001264  min_lr: 0.001264  loss: 4.3468 (4.9250)  weight_decay: 0.0500 (0.0500)  grad_norm: 6.3587 (7.2848)  time: 0.2048  data: 0.0006  max mem: 9147
Epoch: [6]  [ 600/1251]  eta: 0:02:16  lr: 0.001296  min_lr: 0.001296  loss: 4.9915 (4.9041)  weight_decay: 0.0500 (0.0500)  grad_norm: 6.2456 (7.3872)  time: 0.2050  data: 0.0006  max mem: 9147
Epoch: [6]  [ 800/1251]  eta: 0:01:34  lr: 0.001328  min_lr: 0.001328  loss: 4.7336 (4.8993)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.8492 (7.3385)  time: 0.2046  data: 0.0006  max mem: 9147
Epoch: [6]  [1000/1251]  eta: 0:00:52  lr: 0.001360  min_lr: 0.001360  loss: 4.5555 (4.8775)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.9697 (7.2398)  time: 0.1941  data: 0.0006  max mem: 9147
Epoch: [6]  [1200/1251]  eta: 0:00:10  lr: 0.001393  min_lr: 0.001393  loss: 4.2522 (4.8600)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.2547 (7.0696)  time: 0.1962  data: 0.0006  max mem: 9147
Epoch: [6]  [1250/1251]  eta: 0:00:00  lr: 0.001400  min_lr: 0.001400  loss: 4.2026 (4.8570)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.7206 (7.0542)  time: 0.1388  data: 0.0011  max mem: 9147
Epoch: [6] Total time: 0:04:19 (0.2070 s / it)
Averaged stats: lr: 0.001400  min_lr: 0.001400  loss: 4.2026 (4.8420)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.7206 (7.0542)
Test:  [ 0/25]  eta: 0:01:21  loss: 2.3407 (2.3407)  acc1: 50.8000 (50.8000)  acc5: 75.2000 (75.2000)  time: 3.2514  data: 3.1705  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 2.3407 (2.3877)  acc1: 50.8000 (48.6909)  acc5: 77.2000 (74.9455)  time: 0.5682  data: 0.4925  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 2.8587 (2.7266)  acc1: 38.4000 (42.9905)  acc5: 63.6000 (68.0191)  time: 0.2839  data: 0.2096  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 3.0139 (2.7252)  acc1: 37.2000 (43.0400)  acc5: 62.8000 (68.0160)  time: 0.2091  data: 0.1372  max mem: 9147
Test: Total time: 0:00:10 (0.4035 s / it)
* Acc@1 43.258 Acc@5 68.864 loss 2.706
Accuracy of the model on the 50000 test images: 43.3%
Max accuracy: 43.26%
Epoch: [7]  [   0/1251]  eta: 0:56:53  lr: 0.001400  min_lr: 0.001400  loss: 5.7619 (5.7619)  weight_decay: 0.0500 (0.0500)  time: 2.7290  data: 2.5222  max mem: 9147
Epoch: [7]  [ 200/1251]  eta: 0:03:52  lr: 0.001432  min_lr: 0.001432  loss: 4.2868 (4.7336)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.2707 (5.8801)  time: 0.1939  data: 0.0005  max mem: 9147
Epoch: [7]  [ 400/1251]  eta: 0:02:57  lr: 0.001464  min_lr: 0.001464  loss: 4.1764 (4.7454)  weight_decay: 0.0500 (0.0500)  grad_norm: 6.8061 (5.8943)  time: 0.1984  data: 0.0005  max mem: 9147
Epoch: [7]  [ 600/1251]  eta: 0:02:16  lr: 0.001496  min_lr: 0.001496  loss: 4.0734 (4.7206)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.1611 (5.6858)  time: 0.2008  data: 0.0007  max mem: 9147
Epoch: [7]  [ 800/1251]  eta: 0:01:35  lr: 0.001528  min_lr: 0.001528  loss: 4.4081 (4.7167)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.2890 (5.6032)  time: 0.2287  data: 0.0006  max mem: 9147
Epoch: [7]  [1000/1251]  eta: 0:00:53  lr: 0.001561  min_lr: 0.001561  loss: 4.4019 (4.6960)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.6726 (5.4794)  time: 0.2108  data: 0.0007  max mem: 9147
Epoch: [7]  [1200/1251]  eta: 0:00:10  lr: 0.001593  min_lr: 0.001593  loss: 5.2933 (4.6797)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.6558 (5.3099)  time: 0.2155  data: 0.0006  max mem: 9147
Epoch: [7]  [1250/1251]  eta: 0:00:00  lr: 0.001600  min_lr: 0.001600  loss: 4.3199 (4.6770)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.7539 (5.2525)  time: 0.1539  data: 0.0011  max mem: 9147
Epoch: [7] Total time: 0:04:25 (0.2125 s / it)
Averaged stats: lr: 0.001600  min_lr: 0.001600  loss: 4.3199 (4.6789)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.7539 (5.2525)
Test:  [ 0/25]  eta: 0:02:24  loss: 2.1545 (2.1545)  acc1: 55.2000 (55.2000)  acc5: 78.4000 (78.4000)  time: 5.7926  data: 5.7123  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 2.1545 (2.2674)  acc1: 51.2000 (50.8364)  acc5: 78.4000 (77.0182)  time: 0.7872  data: 0.7100  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 2.7829 (2.6117)  acc1: 42.0000 (45.9429)  acc5: 65.2000 (70.4191)  time: 0.2046  data: 0.1311  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 2.8683 (2.6001)  acc1: 42.0000 (46.2560)  acc5: 65.2000 (70.3520)  time: 0.2022  data: 0.1311  max mem: 9147
Test: Total time: 0:00:10 (0.4115 s / it)
* Acc@1 46.006 Acc@5 71.086 loss 2.583
Accuracy of the model on the 50000 test images: 46.0%
Max accuracy: 46.01%
Epoch: [8]  [   0/1251]  eta: 0:52:57  lr: 0.001600  min_lr: 0.001600  loss: 4.1424 (4.1424)  weight_decay: 0.0500 (0.0500)  time: 2.5402  data: 1.9592  max mem: 9147
Epoch: [8]  [ 200/1251]  eta: 0:03:42  lr: 0.001632  min_lr: 0.001632  loss: 4.9252 (4.6658)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.9330 (4.4404)  time: 0.2004  data: 0.0006  max mem: 9147
Epoch: [8]  [ 400/1251]  eta: 0:03:01  lr: 0.001664  min_lr: 0.001664  loss: 4.2895 (4.6149)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.1346 (4.0826)  time: 0.1747  data: 0.0006  max mem: 9147
Epoch: [8]  [ 600/1251]  eta: 0:02:13  lr: 0.001696  min_lr: 0.001696  loss: 4.1385 (4.6103)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.1315 (3.8802)  time: 0.1824  data: 0.0006  max mem: 9147
Epoch: [8]  [ 800/1251]  eta: 0:01:31  lr: 0.001728  min_lr: 0.001728  loss: 4.0028 (4.5966)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.8601 (3.6882)  time: 0.1863  data: 0.0019  max mem: 9147
Epoch: [8]  [1000/1251]  eta: 0:00:49  lr: 0.001761  min_lr: 0.001761  loss: 4.0865 (4.5815)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.1558 (3.6307)  time: 0.1900  data: 0.0006  max mem: 9147
Epoch: [8]  [1200/1251]  eta: 0:00:10  lr: 0.001793  min_lr: 0.001793  loss: 3.9790 (4.5718)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.5438 (3.4731)  time: 0.2458  data: 0.0022  max mem: 9147
Epoch: [8]  [1250/1251]  eta: 0:00:00  lr: 0.001800  min_lr: 0.001800  loss: 3.9438 (4.5642)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.4523 (3.4516)  time: 0.1427  data: 0.0010  max mem: 9147
Epoch: [8] Total time: 0:04:10 (0.2005 s / it)
Averaged stats: lr: 0.001800  min_lr: 0.001800  loss: 3.9438 (4.5379)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.4523 (3.4516)
Test:  [ 0/25]  eta: 0:02:23  loss: 2.0202 (2.0202)  acc1: 59.6000 (59.6000)  acc5: 81.6000 (81.6000)  time: 5.7383  data: 5.6354  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.9959 (2.0629)  acc1: 56.4000 (54.7273)  acc5: 82.8000 (80.3273)  time: 0.7470  data: 0.6732  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 2.4669 (2.4030)  acc1: 44.4000 (49.1619)  acc5: 70.0000 (73.9619)  time: 0.2052  data: 0.1345  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 2.6297 (2.3899)  acc1: 44.4000 (49.5360)  acc5: 68.8000 (74.1600)  time: 0.2063  data: 0.1364  max mem: 9147
Test: Total time: 0:00:10 (0.4091 s / it)
* Acc@1 49.568 Acc@5 74.458 loss 2.379
Accuracy of the model on the 50000 test images: 49.6%
Max accuracy: 49.57%
Epoch: [9]  [   0/1251]  eta: 1:08:17  lr: 0.001800  min_lr: 0.001800  loss: 4.2919 (4.2919)  weight_decay: 0.0500 (0.0500)  time: 3.2750  data: 3.0540  max mem: 9147
Epoch: [9]  [ 200/1251]  eta: 0:03:49  lr: 0.001832  min_lr: 0.001832  loss: 4.0204 (4.5232)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.6041 (2.9243)  time: 0.2248  data: 0.0007  max mem: 9147
Epoch: [9]  [ 400/1251]  eta: 0:03:05  lr: 0.001864  min_lr: 0.001864  loss: 4.0662 (4.4990)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.3147 (2.7498)  time: 0.2425  data: 0.0005  max mem: 9147
Epoch: [9]  [ 600/1251]  eta: 0:02:20  lr: 0.001896  min_lr: 0.001896  loss: 3.6686 (4.4569)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.5443 (2.6695)  time: 0.2243  data: 0.0006  max mem: 9147
Epoch: [9]  [ 800/1251]  eta: 0:01:36  lr: 0.001929  min_lr: 0.001929  loss: 3.9722 (4.4499)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9861 (2.5532)  time: 0.2193  data: 0.0082  max mem: 9147
Epoch: [9]  [1000/1251]  eta: 0:00:53  lr: 0.001961  min_lr: 0.001961  loss: 4.6400 (4.4416)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.3611 (2.4817)  time: 0.1880  data: 0.0014  max mem: 9147
Epoch: [9]  [1200/1251]  eta: 0:00:10  lr: 0.001993  min_lr: 0.001993  loss: 4.6587 (4.4337)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.2433 (2.4665)  time: 0.1999  data: 0.0006  max mem: 9147
Epoch: [9]  [1250/1251]  eta: 0:00:00  lr: 0.002000  min_lr: 0.002000  loss: 3.7690 (4.4208)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9444 (2.4419)  time: 0.1398  data: 0.0008  max mem: 9147
Epoch: [9] Total time: 0:04:20 (0.2084 s / it)
Averaged stats: lr: 0.002000  min_lr: 0.002000  loss: 3.7690 (4.4247)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9444 (2.4419)
Test:  [ 0/25]  eta: 0:02:21  loss: 1.8248 (1.8248)  acc1: 62.4000 (62.4000)  acc5: 84.4000 (84.4000)  time: 5.6402  data: 5.5406  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.8248 (1.9324)  acc1: 59.6000 (56.5091)  acc5: 84.4000 (82.2909)  time: 0.7572  data: 0.6679  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 2.3652 (2.2557)  acc1: 48.4000 (51.2191)  acc5: 73.6000 (76.6095)  time: 0.2041  data: 0.1250  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 2.4688 (2.2488)  acc1: 48.0000 (51.4880)  acc5: 72.0000 (76.5440)  time: 0.1980  data: 0.1249  max mem: 9147
Test: Total time: 0:00:10 (0.4038 s / it)
* Acc@1 52.326 Acc@5 76.898 loss 2.222
Accuracy of the model on the 50000 test images: 52.3%
Max accuracy: 52.33%
Epoch: [10]  [   0/1251]  eta: 1:02:38  lr: 0.002000  min_lr: 0.002000  loss: 5.4109 (5.4109)  weight_decay: 0.0500 (0.0500)  time: 3.0041  data: 2.8008  max mem: 9147
Epoch: [10]  [ 200/1251]  eta: 0:03:36  lr: 0.002032  min_lr: 0.002032  loss: 4.4801 (4.4499)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8570 (2.0579)  time: 0.1812  data: 0.0004  max mem: 9147
Epoch: [10]  [ 400/1251]  eta: 0:02:50  lr: 0.002064  min_lr: 0.002064  loss: 4.1409 (4.4356)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9012 (2.0630)  time: 0.2052  data: 0.0007  max mem: 9147
Epoch: [10]  [ 600/1251]  eta: 0:02:11  lr: 0.002096  min_lr: 0.002096  loss: 4.2198 (4.3968)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9064 (2.0460)  time: 0.1999  data: 0.0006  max mem: 9147
Epoch: [10]  [ 800/1251]  eta: 0:01:32  lr: 0.002129  min_lr: 0.002129  loss: 4.0636 (4.3921)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6514 (2.0365)  time: 0.2053  data: 0.0007  max mem: 9147
Epoch: [10]  [1000/1251]  eta: 0:00:51  lr: 0.002161  min_lr: 0.002161  loss: 3.7490 (4.3885)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9060 (2.0512)  time: 0.2105  data: 0.0006  max mem: 9147
Epoch: [10]  [1200/1251]  eta: 0:00:10  lr: 0.002193  min_lr: 0.002193  loss: 3.7377 (4.3728)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5634 (2.0234)  time: 0.2147  data: 0.0006  max mem: 9147
Epoch: [10]  [1250/1251]  eta: 0:00:00  lr: 0.002200  min_lr: 0.002200  loss: 3.8854 (4.3760)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4924 (2.0069)  time: 0.1405  data: 0.0011  max mem: 9147
Epoch: [10] Total time: 0:04:19 (0.2078 s / it)
Averaged stats: lr: 0.002200  min_lr: 0.002200  loss: 3.8854 (4.3257)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4924 (2.0069)
Test:  [ 0/25]  eta: 0:02:17  loss: 1.8204 (1.8204)  acc1: 62.8000 (62.8000)  acc5: 83.6000 (83.6000)  time: 5.5173  data: 5.4131  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.7570 (1.7998)  acc1: 62.8000 (60.9818)  acc5: 87.6000 (84.4727)  time: 0.6722  data: 0.5959  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 2.2826 (2.1340)  acc1: 52.0000 (54.8191)  acc5: 74.8000 (78.5524)  time: 0.1948  data: 0.1234  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 2.3658 (2.1413)  acc1: 50.4000 (54.6880)  acc5: 73.2000 (78.2880)  time: 0.1997  data: 0.1293  max mem: 9147
Test: Total time: 0:00:09 (0.3966 s / it)
* Acc@1 54.542 Acc@5 78.530 loss 2.138
Accuracy of the model on the 50000 test images: 54.5%
Max accuracy: 54.54%
Epoch: [11]  [   0/1251]  eta: 1:03:04  lr: 0.002200  min_lr: 0.002200  loss: 3.2442 (3.2442)  weight_decay: 0.0500 (0.0500)  time: 3.0251  data: 2.8280  max mem: 9147
Epoch: [11]  [ 200/1251]  eta: 0:03:54  lr: 0.002232  min_lr: 0.002232  loss: 3.6248 (4.2219)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7274 (1.7778)  time: 0.2153  data: 0.0007  max mem: 9147
Epoch: [11]  [ 400/1251]  eta: 0:03:01  lr: 0.002264  min_lr: 0.002264  loss: 3.7731 (4.2868)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8829 (1.9284)  time: 0.1849  data: 0.0005  max mem: 9147
Epoch: [11]  [ 600/1251]  eta: 0:02:18  lr: 0.002297  min_lr: 0.002297  loss: 3.9963 (4.2535)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7690 (1.8636)  time: 0.2011  data: 0.0006  max mem: 9147
Epoch: [11]  [ 800/1251]  eta: 0:01:35  lr: 0.002329  min_lr: 0.002329  loss: 3.7525 (4.2400)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5642 (1.8058)  time: 0.2004  data: 0.0007  max mem: 9147
Epoch: [11]  [1000/1251]  eta: 0:00:53  lr: 0.002361  min_lr: 0.002361  loss: 3.5882 (4.2270)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4024 (1.7745)  time: 0.2291  data: 0.0007  max mem: 9147
Epoch: [11]  [1200/1251]  eta: 0:00:10  lr: 0.002393  min_lr: 0.002393  loss: 3.5402 (4.2290)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4462 (1.7479)  time: 0.2059  data: 0.0006  max mem: 9147
Epoch: [11]  [1250/1251]  eta: 0:00:00  lr: 0.002400  min_lr: 0.002400  loss: 5.1060 (4.2431)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9676 (1.7577)  time: 0.1477  data: 0.0015  max mem: 9147
Epoch: [11] Total time: 0:04:27 (0.2135 s / it)
Averaged stats: lr: 0.002400  min_lr: 0.002400  loss: 5.1060 (4.2492)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9676 (1.7577)
Test:  [ 0/25]  eta: 0:01:20  loss: 1.6858 (1.6858)  acc1: 68.0000 (68.0000)  acc5: 85.2000 (85.2000)  time: 3.2076  data: 3.1273  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 1.7209 (1.8333)  acc1: 60.8000 (60.9091)  acc5: 86.8000 (84.2182)  time: 0.5937  data: 0.5181  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 2.2927 (2.1562)  acc1: 50.4000 (54.9524)  acc5: 74.8000 (78.8381)  time: 0.2725  data: 0.1994  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 2.4454 (2.1578)  acc1: 50.0000 (54.8960)  acc5: 74.4000 (78.8000)  time: 0.2339  data: 0.1626  max mem: 9147
Test: Total time: 0:00:10 (0.4086 s / it)
* Acc@1 55.322 Acc@5 79.090 loss 2.133
Accuracy of the model on the 50000 test images: 55.3%
Max accuracy: 55.32%
Epoch: [12]  [   0/1251]  eta: 1:07:21  lr: 0.002400  min_lr: 0.002400  loss: 3.7863 (3.7863)  weight_decay: 0.0500 (0.0500)  time: 3.2309  data: 3.0421  max mem: 9147
Epoch: [12]  [ 200/1251]  eta: 0:03:36  lr: 0.002432  min_lr: 0.002432  loss: 4.3855 (4.2584)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6537 (1.6629)  time: 0.1907  data: 0.0004  max mem: 9147
Epoch: [12]  [ 400/1251]  eta: 0:02:49  lr: 0.002464  min_lr: 0.002464  loss: 3.8591 (4.1941)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3865 (1.6557)  time: 0.1931  data: 0.0006  max mem: 9147
Epoch: [12]  [ 600/1251]  eta: 0:02:09  lr: 0.002497  min_lr: 0.002497  loss: 3.6128 (4.2116)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4951 (1.6590)  time: 0.2102  data: 0.0008  max mem: 9147
Epoch: [12]  [ 800/1251]  eta: 0:01:31  lr: 0.002529  min_lr: 0.002529  loss: 3.5733 (4.2359)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7517 (1.6848)  time: 0.2336  data: 0.0019  max mem: 9147
Epoch: [12]  [1000/1251]  eta: 0:00:51  lr: 0.002561  min_lr: 0.002561  loss: 3.5626 (4.2209)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3490 (1.6342)  time: 0.2171  data: 0.0007  max mem: 9147
Epoch: [12]  [1200/1251]  eta: 0:00:10  lr: 0.002593  min_lr: 0.002593  loss: 3.5837 (4.2037)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2364 (1.6146)  time: 0.2096  data: 0.0006  max mem: 9147
Epoch: [12]  [1250/1251]  eta: 0:00:00  lr: 0.002600  min_lr: 0.002600  loss: 3.6474 (4.2037)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3890 (1.6114)  time: 0.1480  data: 0.0011  max mem: 9147
Epoch: [12] Total time: 0:04:14 (0.2038 s / it)
Averaged stats: lr: 0.002600  min_lr: 0.002600  loss: 3.6474 (4.1880)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3890 (1.6114)
Test:  [ 0/25]  eta: 0:02:20  loss: 1.4629 (1.4629)  acc1: 72.0000 (72.0000)  acc5: 89.6000 (89.6000)  time: 5.6046  data: 5.5242  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.5268 (1.6695)  acc1: 64.0000 (63.6000)  acc5: 88.4000 (86.6182)  time: 0.7445  data: 0.6702  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 2.0460 (1.9913)  acc1: 53.2000 (57.5238)  acc5: 78.0000 (80.5333)  time: 0.2045  data: 0.1323  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 2.2556 (2.0020)  acc1: 53.2000 (57.1040)  acc5: 76.0000 (80.4000)  time: 0.2037  data: 0.1323  max mem: 9147
Test: Total time: 0:00:10 (0.4029 s / it)
* Acc@1 57.118 Acc@5 80.700 loss 1.994
Accuracy of the model on the 50000 test images: 57.1%
Max accuracy: 57.12%
Epoch: [13]  [   0/1251]  eta: 0:58:05  lr: 0.002600  min_lr: 0.002600  loss: 5.0611 (5.0611)  weight_decay: 0.0500 (0.0500)  time: 2.7858  data: 2.5696  max mem: 9147
Epoch: [13]  [ 200/1251]  eta: 0:03:52  lr: 0.002632  min_lr: 0.002632  loss: 3.5791 (4.1918)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5265 (1.8724)  time: 0.2060  data: 0.0058  max mem: 9147
Epoch: [13]  [ 400/1251]  eta: 0:03:06  lr: 0.002665  min_lr: 0.002665  loss: 4.7084 (4.1779)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4278 (1.6703)  time: 0.2053  data: 0.0007  max mem: 9147
Epoch: [13]  [ 600/1251]  eta: 0:02:20  lr: 0.002697  min_lr: 0.002697  loss: 3.5375 (4.1727)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4181 (1.6060)  time: 0.2250  data: 0.0007  max mem: 9147
Epoch: [13]  [ 800/1251]  eta: 0:01:36  lr: 0.002729  min_lr: 0.002729  loss: 3.6092 (4.1663)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6073 (1.5924)  time: 0.2024  data: 0.0005  max mem: 9147
Epoch: [13]  [1000/1251]  eta: 0:00:53  lr: 0.002761  min_lr: 0.002761  loss: 4.2627 (4.1799)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5801 (1.5770)  time: 0.2204  data: 0.0007  max mem: 9147
Epoch: [13]  [1200/1251]  eta: 0:00:10  lr: 0.002793  min_lr: 0.002793  loss: 3.4948 (4.1513)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3953 (1.5637)  time: 0.2139  data: 0.0006  max mem: 9147
Epoch: [13]  [1250/1251]  eta: 0:00:00  lr: 0.002800  min_lr: 0.002800  loss: 3.6001 (4.1487)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1796 (1.5504)  time: 0.1614  data: 0.0012  max mem: 9147
Epoch: [13] Total time: 0:04:27 (0.2141 s / it)
Averaged stats: lr: 0.002800  min_lr: 0.002800  loss: 3.6001 (4.1455)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1796 (1.5504)
Test:  [ 0/25]  eta: 0:02:25  loss: 1.4905 (1.4905)  acc1: 68.8000 (68.8000)  acc5: 86.4000 (86.4000)  time: 5.8313  data: 5.7509  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.4905 (1.5694)  acc1: 62.0000 (64.4727)  acc5: 88.4000 (87.0909)  time: 0.7525  data: 0.6687  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.9402 (1.9147)  acc1: 54.4000 (58.2476)  acc5: 79.6000 (81.3524)  time: 0.1904  data: 0.1137  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 2.2041 (1.9294)  acc1: 51.6000 (58.0960)  acc5: 76.8000 (81.1840)  time: 0.1878  data: 0.1168  max mem: 9147
Test: Total time: 0:00:10 (0.4021 s / it)
* Acc@1 57.886 Acc@5 81.200 loss 1.927
Accuracy of the model on the 50000 test images: 57.9%
Max accuracy: 57.89%
Epoch: [14]  [   0/1251]  eta: 1:06:51  lr: 0.002800  min_lr: 0.002800  loss: 4.8049 (4.8049)  weight_decay: 0.0500 (0.0500)  time: 3.2065  data: 3.0405  max mem: 9147
Epoch: [14]  [ 200/1251]  eta: 0:03:36  lr: 0.002833  min_lr: 0.002833  loss: 3.8286 (4.0400)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3588 (1.4193)  time: 0.1853  data: 0.0006  max mem: 9147
Epoch: [14]  [ 400/1251]  eta: 0:02:50  lr: 0.002865  min_lr: 0.002865  loss: 3.6524 (4.0532)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6908 (1.4781)  time: 0.2246  data: 0.0146  max mem: 9147
Epoch: [14]  [ 600/1251]  eta: 0:02:14  lr: 0.002897  min_lr: 0.002897  loss: 3.6124 (4.0606)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3050 (1.4585)  time: 0.2242  data: 0.0005  max mem: 9147
Epoch: [14]  [ 800/1251]  eta: 0:01:33  lr: 0.002929  min_lr: 0.002929  loss: 3.8728 (4.0365)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2366 (1.4234)  time: 0.2272  data: 0.0006  max mem: 9147
Epoch: [14]  [1000/1251]  eta: 0:00:52  lr: 0.002961  min_lr: 0.002961  loss: 3.7156 (4.0422)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1975 (1.4148)  time: 0.2242  data: 0.0008  max mem: 9147
Epoch: [14]  [1200/1251]  eta: 0:00:10  lr: 0.002993  min_lr: 0.002993  loss: 3.4539 (4.0389)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0592 (1.3860)  time: 0.2014  data: 0.0006  max mem: 9147
Epoch: [14]  [1250/1251]  eta: 0:00:00  lr: 0.003000  min_lr: 0.003000  loss: 3.7360 (4.0388)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2882 (1.3970)  time: 0.1444  data: 0.0009  max mem: 9147
Epoch: [14] Total time: 0:04:22 (0.2095 s / it)
Averaged stats: lr: 0.003000  min_lr: 0.003000  loss: 3.7360 (4.0756)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2882 (1.3970)
Test:  [ 0/25]  eta: 0:02:25  loss: 1.4951 (1.4951)  acc1: 72.8000 (72.8000)  acc5: 88.4000 (88.4000)  time: 5.8032  data: 5.7228  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.5655 (1.6350)  acc1: 62.4000 (64.0364)  acc5: 88.4000 (87.6000)  time: 0.7628  data: 0.6862  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 2.0437 (1.9310)  acc1: 53.6000 (58.8191)  acc5: 78.4000 (81.8857)  time: 0.1997  data: 0.1270  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 2.1904 (1.9457)  acc1: 53.6000 (58.4160)  acc5: 77.6000 (81.4560)  time: 0.2014  data: 0.1308  max mem: 9147
Test: Total time: 0:00:10 (0.4095 s / it)
* Acc@1 58.306 Acc@5 81.680 loss 1.954
Accuracy of the model on the 50000 test images: 58.3%
Max accuracy: 58.31%
Epoch: [15]  [   0/1251]  eta: 1:03:09  lr: 0.003000  min_lr: 0.003000  loss: 3.2801 (3.2801)  weight_decay: 0.0500 (0.0500)  time: 3.0289  data: 2.8254  max mem: 9147
Epoch: [15]  [ 200/1251]  eta: 0:03:36  lr: 0.003033  min_lr: 0.003033  loss: 3.6133 (3.9711)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5183 (1.4169)  time: 0.1937  data: 0.0005  max mem: 9147
Epoch: [15]  [ 400/1251]  eta: 0:02:51  lr: 0.003065  min_lr: 0.003065  loss: 3.6988 (4.0329)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2092 (1.3801)  time: 0.2095  data: 0.0040  max mem: 9147
Epoch: [15]  [ 600/1251]  eta: 0:02:13  lr: 0.003097  min_lr: 0.003097  loss: 3.6558 (4.0679)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0729 (1.3672)  time: 0.1954  data: 0.0009  max mem: 9147
Epoch: [15]  [ 800/1251]  eta: 0:01:33  lr: 0.003129  min_lr: 0.003129  loss: 3.6097 (4.0529)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2426 (1.3392)  time: 0.1991  data: 0.0007  max mem: 9147
Epoch: [15]  [1000/1251]  eta: 0:00:51  lr: 0.003161  min_lr: 0.003161  loss: 3.6644 (4.0636)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1210 (1.3180)  time: 0.2000  data: 0.0007  max mem: 9147
Epoch: [15]  [1200/1251]  eta: 0:00:10  lr: 0.003193  min_lr: 0.003193  loss: 4.3990 (4.0640)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2643 (1.3235)  time: 0.2005  data: 0.0007  max mem: 9147
Epoch: [15]  [1250/1251]  eta: 0:00:00  lr: 0.003200  min_lr: 0.003200  loss: 3.5430 (4.0628)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3195 (1.3285)  time: 0.1477  data: 0.0009  max mem: 9147
Epoch: [15] Total time: 0:04:14 (0.2031 s / it)
Averaged stats: lr: 0.003200  min_lr: 0.003200  loss: 3.5430 (4.0413)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3195 (1.3285)
Test:  [ 0/25]  eta: 0:02:20  loss: 1.4573 (1.4573)  acc1: 69.2000 (69.2000)  acc5: 87.6000 (87.6000)  time: 5.6080  data: 5.5276  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.5284 (1.6150)  acc1: 63.2000 (63.8545)  acc5: 90.0000 (87.8182)  time: 0.7418  data: 0.6602  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 2.0010 (1.9104)  acc1: 54.4000 (58.6476)  acc5: 79.6000 (82.4000)  time: 0.2093  data: 0.1317  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 2.1589 (1.9135)  acc1: 54.4000 (58.5760)  acc5: 78.8000 (82.3200)  time: 0.2048  data: 0.1317  max mem: 9147
Test: Total time: 0:00:10 (0.4072 s / it)
* Acc@1 58.874 Acc@5 82.236 loss 1.912
Accuracy of the model on the 50000 test images: 58.9%
Max accuracy: 58.87%
Epoch: [16]  [   0/1251]  eta: 1:08:13  lr: 0.003201  min_lr: 0.003201  loss: 3.1908 (3.1908)  weight_decay: 0.0500 (0.0500)  time: 3.2723  data: 3.0729  max mem: 9147
Epoch: [16]  [ 200/1251]  eta: 0:03:28  lr: 0.003233  min_lr: 0.003233  loss: 3.4549 (3.9510)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0900 (1.2607)  time: 0.1900  data: 0.0005  max mem: 9147
Epoch: [16]  [ 400/1251]  eta: 0:02:44  lr: 0.003265  min_lr: 0.003265  loss: 3.4840 (4.0124)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3144 (1.3571)  time: 0.1948  data: 0.0005  max mem: 9147
Epoch: [16]  [ 600/1251]  eta: 0:02:09  lr: 0.003297  min_lr: 0.003297  loss: 3.4789 (4.0230)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1161 (1.3077)  time: 0.2144  data: 0.0006  max mem: 9147
Epoch: [16]  [ 800/1251]  eta: 0:01:31  lr: 0.003329  min_lr: 0.003329  loss: 3.4249 (3.9979)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1184 (1.3299)  time: 0.2098  data: 0.0023  max mem: 9147
Epoch: [16]  [1000/1251]  eta: 0:00:51  lr: 0.003361  min_lr: 0.003361  loss: 3.9659 (4.0037)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9337 (1.2897)  time: 0.2051  data: 0.0006  max mem: 9147
Epoch: [16]  [1200/1251]  eta: 0:00:10  lr: 0.003393  min_lr: 0.003393  loss: 3.6729 (4.0148)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9773 (1.2773)  time: 0.2327  data: 0.0214  max mem: 9147
Epoch: [16]  [1250/1251]  eta: 0:00:00  lr: 0.003400  min_lr: 0.003400  loss: 3.3638 (3.9994)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0624 (1.2736)  time: 0.1390  data: 0.0008  max mem: 9147
Epoch: [16] Total time: 0:04:20 (0.2079 s / it)
Averaged stats: lr: 0.003400  min_lr: 0.003400  loss: 3.3638 (4.0248)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0624 (1.2736)
Test:  [ 0/25]  eta: 0:02:20  loss: 1.3561 (1.3561)  acc1: 70.8000 (70.8000)  acc5: 89.2000 (89.2000)  time: 5.6216  data: 5.5303  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.4688 (1.5299)  acc1: 68.4000 (66.0000)  acc5: 88.4000 (88.0000)  time: 0.7751  data: 0.6782  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.9238 (1.8356)  acc1: 56.4000 (59.9429)  acc5: 79.2000 (82.4191)  time: 0.2129  data: 0.1287  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 2.0865 (1.8462)  acc1: 54.8000 (59.6000)  acc5: 78.4000 (82.3680)  time: 0.2071  data: 0.1286  max mem: 9147
Test: Total time: 0:00:10 (0.4097 s / it)
* Acc@1 59.228 Acc@5 82.568 loss 1.852
Accuracy of the model on the 50000 test images: 59.2%
Max accuracy: 59.23%
Epoch: [17]  [   0/1251]  eta: 0:59:13  lr: 0.003401  min_lr: 0.003401  loss: 4.6581 (4.6581)  weight_decay: 0.0500 (0.0500)  time: 2.8404  data: 2.5519  max mem: 9147
Epoch: [17]  [ 200/1251]  eta: 0:03:43  lr: 0.003433  min_lr: 0.003433  loss: 3.7616 (4.0680)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1762 (1.1758)  time: 0.2098  data: 0.0008  max mem: 9147
Epoch: [17]  [ 400/1251]  eta: 0:03:01  lr: 0.003465  min_lr: 0.003465  loss: 3.6245 (4.0609)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0611 (1.2275)  time: 0.2154  data: 0.0006  max mem: 9147
Epoch: [17]  [ 600/1251]  eta: 0:02:19  lr: 0.003497  min_lr: 0.003497  loss: 4.1737 (4.0117)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0212 (1.1881)  time: 0.2101  data: 0.0006  max mem: 9147
Epoch: [17]  [ 800/1251]  eta: 0:01:35  lr: 0.003529  min_lr: 0.003529  loss: 3.3473 (3.9966)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0467 (1.1938)  time: 0.2108  data: 0.0007  max mem: 9147
Epoch: [17]  [1000/1251]  eta: 0:00:53  lr: 0.003561  min_lr: 0.003561  loss: 4.2695 (3.9975)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0281 (1.1899)  time: 0.2292  data: 0.0008  max mem: 9147
Epoch: [17]  [1200/1251]  eta: 0:00:10  lr: 0.003593  min_lr: 0.003593  loss: 4.7284 (4.0058)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1355 (1.1816)  time: 0.1933  data: 0.0006  max mem: 9147
Epoch: [17]  [1250/1251]  eta: 0:00:00  lr: 0.003600  min_lr: 0.003600  loss: 3.6190 (4.0102)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9229 (1.1738)  time: 0.1382  data: 0.0008  max mem: 9147
Epoch: [17] Total time: 0:04:22 (0.2101 s / it)
Averaged stats: lr: 0.003600  min_lr: 0.003600  loss: 3.6190 (3.9895)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9229 (1.1738)
Test:  [ 0/25]  eta: 0:02:23  loss: 1.3862 (1.3862)  acc1: 70.4000 (70.4000)  acc5: 86.4000 (86.4000)  time: 5.7298  data: 5.6496  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.3913 (1.4986)  acc1: 69.6000 (66.6909)  acc5: 88.4000 (88.1091)  time: 0.7321  data: 0.6471  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.8026 (1.7996)  acc1: 58.8000 (60.5333)  acc5: 82.0000 (83.1238)  time: 0.1997  data: 0.1224  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 2.0094 (1.8139)  acc1: 55.6000 (60.2720)  acc5: 78.8000 (82.8960)  time: 0.2025  data: 0.1290  max mem: 9147
Test: Total time: 0:00:10 (0.4104 s / it)
* Acc@1 60.584 Acc@5 83.530 loss 1.798
Accuracy of the model on the 50000 test images: 60.6%
Max accuracy: 60.58%
Epoch: [18]  [   0/1251]  eta: 0:57:40  lr: 0.003601  min_lr: 0.003601  loss: 3.2241 (3.2241)  weight_decay: 0.0500 (0.0500)  time: 2.7666  data: 2.3886  max mem: 9147
Epoch: [18]  [ 200/1251]  eta: 0:03:50  lr: 0.003633  min_lr: 0.003633  loss: 3.3129 (3.8737)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1519 (1.1422)  time: 0.2203  data: 0.0007  max mem: 9147
Epoch: [18]  [ 400/1251]  eta: 0:03:05  lr: 0.003665  min_lr: 0.003665  loss: 4.4159 (3.8994)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0574 (1.1788)  time: 0.2013  data: 0.0007  max mem: 9147
Epoch: [18]  [ 600/1251]  eta: 0:02:21  lr: 0.003697  min_lr: 0.003697  loss: 3.7857 (3.9036)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1689 (1.1946)  time: 0.2006  data: 0.0007  max mem: 9147
Epoch: [18]  [ 800/1251]  eta: 0:01:37  lr: 0.003729  min_lr: 0.003729  loss: 3.5442 (3.8867)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9961 (1.1731)  time: 0.2113  data: 0.0007  max mem: 9147
Epoch: [18]  [1000/1251]  eta: 0:00:54  lr: 0.003761  min_lr: 0.003761  loss: 4.0299 (3.9204)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2202 (1.1806)  time: 0.2397  data: 0.0006  max mem: 9147
Epoch: [18]  [1200/1251]  eta: 0:00:10  lr: 0.003793  min_lr: 0.003793  loss: 3.2666 (3.9117)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0363 (1.1697)  time: 0.1864  data: 0.0011  max mem: 9147
Epoch: [18]  [1250/1251]  eta: 0:00:00  lr: 0.003800  min_lr: 0.003800  loss: 4.1200 (3.9070)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0025 (1.1632)  time: 0.1379  data: 0.0007  max mem: 9147
Epoch: [18] Total time: 0:04:26 (0.2131 s / it)
Averaged stats: lr: 0.003800  min_lr: 0.003800  loss: 4.1200 (3.9368)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0025 (1.1632)
Test:  [ 0/25]  eta: 0:02:21  loss: 1.4734 (1.4734)  acc1: 68.0000 (68.0000)  acc5: 88.8000 (88.8000)  time: 5.6792  data: 5.5988  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.4734 (1.6059)  acc1: 67.6000 (65.3091)  acc5: 88.8000 (88.6909)  time: 0.7442  data: 0.6702  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.9577 (1.8855)  acc1: 55.2000 (60.2095)  acc5: 83.6000 (83.7714)  time: 0.2041  data: 0.1328  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 2.1216 (1.9010)  acc1: 56.8000 (60.2080)  acc5: 79.2000 (83.5200)  time: 0.2027  data: 0.1328  max mem: 9147
Test: Total time: 0:00:10 (0.4046 s / it)
* Acc@1 60.376 Acc@5 83.164 loss 1.898
Accuracy of the model on the 50000 test images: 60.4%
Max accuracy: 60.58%
Epoch: [19]  [   0/1251]  eta: 1:02:30  lr: 0.003801  min_lr: 0.003801  loss: 3.1980 (3.1980)  weight_decay: 0.0500 (0.0500)  time: 2.9981  data: 1.5337  max mem: 9147
Epoch: [19]  [ 200/1251]  eta: 0:03:57  lr: 0.003833  min_lr: 0.003833  loss: 3.2221 (3.9471)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1131 (1.0580)  time: 0.2248  data: 0.0020  max mem: 9147
Epoch: [19]  [ 400/1251]  eta: 0:03:01  lr: 0.003865  min_lr: 0.003865  loss: 4.0689 (3.9312)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2702 (1.1215)  time: 0.2116  data: 0.0008  max mem: 9147
Epoch: [19]  [ 600/1251]  eta: 0:02:20  lr: 0.003897  min_lr: 0.003897  loss: 3.4001 (3.9197)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0192 (1.1246)  time: 0.2100  data: 0.0006  max mem: 9147
Epoch: [19]  [ 800/1251]  eta: 0:01:35  lr: 0.003929  min_lr: 0.003929  loss: 4.3395 (3.9095)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8628 (1.0817)  time: 0.2051  data: 0.0007  max mem: 9147
Epoch: [19]  [1000/1251]  eta: 0:00:53  lr: 0.003961  min_lr: 0.003961  loss: 4.0174 (3.9374)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9780 (1.0898)  time: 0.2030  data: 0.0006  max mem: 9147
Epoch: [19]  [1200/1251]  eta: 0:00:10  lr: 0.003993  min_lr: 0.003993  loss: 3.4829 (3.9264)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8981 (1.0857)  time: 0.2000  data: 0.0006  max mem: 9147
Epoch: [19]  [1250/1251]  eta: 0:00:00  lr: 0.004000  min_lr: 0.004000  loss: 3.4199 (3.9319)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9044 (1.0806)  time: 0.1415  data: 0.0012  max mem: 9147
Epoch: [19] Total time: 0:04:25 (0.2120 s / it)
Averaged stats: lr: 0.004000  min_lr: 0.004000  loss: 3.4199 (3.9220)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9044 (1.0806)
Test:  [ 0/25]  eta: 0:02:21  loss: 1.2842 (1.2842)  acc1: 71.6000 (71.6000)  acc5: 90.8000 (90.8000)  time: 5.6510  data: 5.5705  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.3841 (1.4742)  acc1: 67.2000 (67.3455)  acc5: 90.8000 (88.6546)  time: 0.7104  data: 0.6361  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.8485 (1.7920)  acc1: 58.8000 (61.2381)  acc5: 81.6000 (83.6571)  time: 0.1847  data: 0.1133  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 2.0132 (1.7985)  acc1: 58.4000 (61.2480)  acc5: 80.0000 (83.6000)  time: 0.1854  data: 0.1148  max mem: 9147
Test: Total time: 0:00:09 (0.3940 s / it)
* Acc@1 60.746 Acc@5 83.750 loss 1.806
Accuracy of the model on the 50000 test images: 60.7%
Max accuracy: 60.75%
Epoch: [20]  [   0/1251]  eta: 0:59:45  lr: 0.004000  min_lr: 0.004000  loss: 4.9427 (4.9427)  weight_decay: 0.0500 (0.0500)  time: 2.8663  data: 2.6179  max mem: 9147
Epoch: [20]  [ 200/1251]  eta: 0:03:57  lr: 0.004000  min_lr: 0.004000  loss: 3.6639 (3.8432)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9638 (1.0720)  time: 0.2006  data: 0.0006  max mem: 9147
Epoch: [20]  [ 400/1251]  eta: 0:03:06  lr: 0.004000  min_lr: 0.004000  loss: 3.8125 (3.8692)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9260 (1.0578)  time: 0.2167  data: 0.0006  max mem: 9147
Epoch: [20]  [ 600/1251]  eta: 0:02:17  lr: 0.004000  min_lr: 0.004000  loss: 3.5521 (3.8572)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8853 (1.0461)  time: 0.1881  data: 0.0005  max mem: 9147
Epoch: [20]  [ 800/1251]  eta: 0:01:34  lr: 0.004000  min_lr: 0.004000  loss: 3.3149 (3.8465)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9775 (1.0385)  time: 0.2296  data: 0.0124  max mem: 9147
Epoch: [20]  [1000/1251]  eta: 0:00:53  lr: 0.004000  min_lr: 0.004000  loss: 3.6390 (3.8692)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9341 (nan)  time: 0.2241  data: 0.0320  max mem: 9147
Epoch: [20]  [1200/1251]  eta: 0:00:10  lr: 0.004000  min_lr: 0.004000  loss: 3.1829 (3.8687)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0773 (nan)  time: 0.1966  data: 0.0017  max mem: 9147
Epoch: [20]  [1250/1251]  eta: 0:00:00  lr: 0.004000  min_lr: 0.004000  loss: 3.5410 (3.8735)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0297 (nan)  time: 0.1380  data: 0.0015  max mem: 9147
Epoch: [20] Total time: 0:04:19 (0.2076 s / it)
Averaged stats: lr: 0.004000  min_lr: 0.004000  loss: 3.5410 (3.8902)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0297 (nan)
Test:  [ 0/25]  eta: 0:02:14  loss: 1.1966 (1.1966)  acc1: 74.8000 (74.8000)  acc5: 91.6000 (91.6000)  time: 5.3807  data: 5.2765  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.4312 (1.4579)  acc1: 68.4000 (68.2909)  acc5: 91.2000 (89.4909)  time: 0.7502  data: 0.6575  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.8059 (1.7672)  acc1: 59.2000 (62.1714)  acc5: 82.0000 (84.1333)  time: 0.2150  data: 0.1338  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.9892 (1.7775)  acc1: 57.2000 (61.7280)  acc5: 80.4000 (84.0160)  time: 0.2146  data: 0.1337  max mem: 9147
Test: Total time: 0:00:10 (0.4060 s / it)
* Acc@1 61.798 Acc@5 84.312 loss 1.767
Accuracy of the model on the 50000 test images: 61.8%
Max accuracy: 61.80%
Epoch: [21]  [   0/1251]  eta: 0:59:54  lr: 0.004000  min_lr: 0.004000  loss: 2.9423 (2.9423)  weight_decay: 0.0500 (0.0500)  time: 2.8729  data: 2.3279  max mem: 9147
Epoch: [21]  [ 200/1251]  eta: 0:03:38  lr: 0.004000  min_lr: 0.004000  loss: 3.3983 (3.9018)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0507 (1.0748)  time: 0.2007  data: 0.0006  max mem: 9147
Epoch: [21]  [ 400/1251]  eta: 0:03:00  lr: 0.004000  min_lr: 0.004000  loss: 3.9738 (3.9468)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0454 (inf)  time: 0.2197  data: 0.0149  max mem: 9147
Epoch: [21]  [ 600/1251]  eta: 0:02:19  lr: 0.004000  min_lr: 0.004000  loss: 3.4484 (3.8993)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8846 (inf)  time: 0.2393  data: 0.0007  max mem: 9147
Epoch: [21]  [ 800/1251]  eta: 0:01:36  lr: 0.004000  min_lr: 0.004000  loss: 3.3150 (3.8780)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8777 (inf)  time: 0.2199  data: 0.0007  max mem: 9147
Epoch: [21]  [1000/1251]  eta: 0:00:53  lr: 0.004000  min_lr: 0.004000  loss: 3.1874 (3.8680)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0126 (inf)  time: 0.2056  data: 0.0007  max mem: 9147
Epoch: [21]  [1200/1251]  eta: 0:00:10  lr: 0.004000  min_lr: 0.004000  loss: 3.5247 (3.8615)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9067 (inf)  time: 0.2005  data: 0.0007  max mem: 9147
Epoch: [21]  [1250/1251]  eta: 0:00:00  lr: 0.003999  min_lr: 0.003999  loss: 3.1968 (3.8518)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9173 (inf)  time: 0.1495  data: 0.0030  max mem: 9147
Epoch: [21] Total time: 0:04:26 (0.2131 s / it)
Averaged stats: lr: 0.003999  min_lr: 0.003999  loss: 3.1968 (3.8341)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9173 (inf)
Test:  [ 0/25]  eta: 0:02:27  loss: 1.2799 (1.2799)  acc1: 72.8000 (72.8000)  acc5: 91.2000 (91.2000)  time: 5.9147  data: 5.8344  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.3338 (1.3998)  acc1: 70.4000 (68.7273)  acc5: 91.2000 (90.1455)  time: 0.7692  data: 0.6948  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.7318 (1.6984)  acc1: 58.8000 (62.7619)  acc5: 84.0000 (85.3143)  time: 0.2006  data: 0.1276  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.8959 (1.7140)  acc1: 57.2000 (62.3200)  acc5: 81.6000 (85.0880)  time: 0.1997  data: 0.1276  max mem: 9147
Test: Total time: 0:00:10 (0.4114 s / it)
* Acc@1 62.388 Acc@5 85.062 loss 1.702
Accuracy of the model on the 50000 test images: 62.4%
Max accuracy: 62.39%
Epoch: [22]  [   0/1251]  eta: 1:01:23  lr: 0.003999  min_lr: 0.003999  loss: 4.8362 (4.8362)  weight_decay: 0.0500 (0.0500)  time: 2.9448  data: 2.7452  max mem: 9147
Epoch: [22]  [ 200/1251]  eta: 0:03:48  lr: 0.003999  min_lr: 0.003999  loss: 3.5280 (3.8446)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1181 (1.2171)  time: 0.2004  data: 0.0006  max mem: 9147
Epoch: [22]  [ 400/1251]  eta: 0:03:04  lr: 0.003999  min_lr: 0.003999  loss: 3.2860 (3.7993)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8856 (1.1317)  time: 0.2245  data: 0.0178  max mem: 9147
Epoch: [22]  [ 600/1251]  eta: 0:02:21  lr: 0.003999  min_lr: 0.003999  loss: 3.6953 (3.8252)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8549 (1.1123)  time: 0.2105  data: 0.0006  max mem: 9147
Epoch: [22]  [ 800/1251]  eta: 0:01:37  lr: 0.003999  min_lr: 0.003999  loss: 4.5498 (3.8518)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9011 (1.0639)  time: 0.2005  data: 0.0005  max mem: 9147
Epoch: [22]  [1000/1251]  eta: 0:00:54  lr: 0.003999  min_lr: 0.003999  loss: 3.2091 (3.8519)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7991 (1.0265)  time: 0.2000  data: 0.0006  max mem: 9147
Epoch: [22]  [1200/1251]  eta: 0:00:10  lr: 0.003999  min_lr: 0.003999  loss: 3.1777 (3.8426)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0768 (1.0155)  time: 0.2154  data: 0.0007  max mem: 9147
Epoch: [22]  [1250/1251]  eta: 0:00:00  lr: 0.003999  min_lr: 0.003999  loss: 3.3023 (3.8387)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8543 (1.0097)  time: 0.1486  data: 0.0021  max mem: 9147
Epoch: [22] Total time: 0:04:28 (0.2150 s / it)
Averaged stats: lr: 0.003999  min_lr: 0.003999  loss: 3.3023 (3.8169)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8543 (1.0097)
Test:  [ 0/25]  eta: 0:02:12  loss: 1.2556 (1.2556)  acc1: 72.0000 (72.0000)  acc5: 92.0000 (92.0000)  time: 5.3046  data: 5.1907  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.3314 (1.4388)  acc1: 68.8000 (68.4364)  acc5: 92.0000 (90.2909)  time: 0.7651  data: 0.6773  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.7908 (1.7216)  acc1: 58.8000 (62.8000)  acc5: 83.2000 (85.5619)  time: 0.2285  data: 0.1498  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.9190 (1.7257)  acc1: 57.6000 (62.4000)  acc5: 82.8000 (85.2960)  time: 0.2210  data: 0.1484  max mem: 9147
Test: Total time: 0:00:10 (0.4093 s / it)
* Acc@1 62.724 Acc@5 84.956 loss 1.719
Accuracy of the model on the 50000 test images: 62.7%
Max accuracy: 62.72%
Epoch: [23]  [   0/1251]  eta: 0:59:05  lr: 0.003999  min_lr: 0.003999  loss: 3.1482 (3.1482)  weight_decay: 0.0500 (0.0500)  time: 2.8343  data: 1.8981  max mem: 9147
Epoch: [23]  [ 200/1251]  eta: 0:03:58  lr: 0.003999  min_lr: 0.003999  loss: 3.1674 (3.7252)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8077 (1.0331)  time: 0.2090  data: 0.0008  max mem: 9147
Epoch: [23]  [ 400/1251]  eta: 0:03:08  lr: 0.003999  min_lr: 0.003999  loss: 3.4820 (3.7219)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9980 (1.0115)  time: 0.2356  data: 0.0007  max mem: 9147
Epoch: [23]  [ 600/1251]  eta: 0:02:21  lr: 0.003998  min_lr: 0.003998  loss: 3.2231 (3.7046)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8247 (0.9740)  time: 0.2237  data: 0.0042  max mem: 9147
Epoch: [23]  [ 800/1251]  eta: 0:01:37  lr: 0.003998  min_lr: 0.003998  loss: 3.8834 (3.7268)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8945 (0.9819)  time: 0.2172  data: 0.0008  max mem: 9147
Epoch: [23]  [1000/1251]  eta: 0:00:53  lr: 0.003998  min_lr: 0.003998  loss: 3.7426 (3.7327)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9061 (0.9786)  time: 0.1885  data: 0.0006  max mem: 9147
Epoch: [23]  [1200/1251]  eta: 0:00:10  lr: 0.003998  min_lr: 0.003998  loss: 4.7461 (3.7585)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8038 (0.9815)  time: 0.1884  data: 0.0004  max mem: 9147
Epoch: [23]  [1250/1251]  eta: 0:00:00  lr: 0.003998  min_lr: 0.003998  loss: 3.1352 (3.7570)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8873 (0.9910)  time: 0.1382  data: 0.0009  max mem: 9147
Epoch: [23] Total time: 0:04:21 (0.2088 s / it)
Averaged stats: lr: 0.003998  min_lr: 0.003998  loss: 3.1352 (3.7671)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8873 (0.9910)
Test:  [ 0/25]  eta: 0:01:20  loss: 1.2638 (1.2638)  acc1: 74.4000 (74.4000)  acc5: 90.8000 (90.8000)  time: 3.2291  data: 3.1487  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 1.4221 (1.4136)  acc1: 71.6000 (68.9091)  acc5: 90.8000 (89.2727)  time: 0.6167  data: 0.5274  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.7978 (1.6928)  acc1: 60.8000 (63.7714)  acc5: 83.6000 (85.4476)  time: 0.2965  data: 0.2141  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.8258 (1.7133)  acc1: 60.8000 (63.2640)  acc5: 83.6000 (85.1680)  time: 0.2322  data: 0.1520  max mem: 9147
Test: Total time: 0:00:09 (0.3973 s / it)
* Acc@1 63.012 Acc@5 85.044 loss 1.709
Accuracy of the model on the 50000 test images: 63.0%
Max accuracy: 63.01%
Epoch: [24]  [   0/1251]  eta: 1:03:06  lr: 0.003998  min_lr: 0.003998  loss: 2.9795 (2.9795)  weight_decay: 0.0500 (0.0500)  time: 3.0265  data: 2.8247  max mem: 9147
Epoch: [24]  [ 200/1251]  eta: 0:03:39  lr: 0.003998  min_lr: 0.003998  loss: 4.4211 (3.7149)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9300 (0.9326)  time: 0.2049  data: 0.0007  max mem: 9147
Epoch: [24]  [ 400/1251]  eta: 0:03:01  lr: 0.003998  min_lr: 0.003998  loss: 3.9145 (3.7692)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7955 (0.9092)  time: 0.2100  data: 0.0006  max mem: 9147
Epoch: [24]  [ 600/1251]  eta: 0:02:18  lr: 0.003997  min_lr: 0.003997  loss: 3.5535 (3.7547)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7565 (0.8982)  time: 0.2148  data: 0.0169  max mem: 9147
Epoch: [24]  [ 800/1251]  eta: 0:01:35  lr: 0.003997  min_lr: 0.003997  loss: 3.6464 (3.7441)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0347 (0.9597)  time: 0.2129  data: 0.0007  max mem: 9147
Epoch: [24]  [1000/1251]  eta: 0:00:53  lr: 0.003997  min_lr: 0.003997  loss: 4.0169 (3.7623)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8018 (0.9467)  time: 0.1899  data: 0.0005  max mem: 9147
Epoch: [24]  [1200/1251]  eta: 0:00:10  lr: 0.003997  min_lr: 0.003997  loss: 3.7166 (3.7682)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9392 (0.9362)  time: 0.1941  data: 0.0005  max mem: 9147
Epoch: [24]  [1250/1251]  eta: 0:00:00  lr: 0.003997  min_lr: 0.003997  loss: 3.4863 (3.7656)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8371 (0.9323)  time: 0.1392  data: 0.0009  max mem: 9147
Epoch: [24] Total time: 0:04:20 (0.2080 s / it)
Averaged stats: lr: 0.003997  min_lr: 0.003997  loss: 3.4863 (3.7597)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8371 (0.9323)
Test:  [ 0/25]  eta: 0:02:21  loss: 1.2527 (1.2527)  acc1: 73.2000 (73.2000)  acc5: 91.6000 (91.6000)  time: 5.6733  data: 5.5929  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.3746 (1.4567)  acc1: 70.8000 (68.9455)  acc5: 91.6000 (90.1091)  time: 0.7625  data: 0.6737  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.7674 (1.7271)  acc1: 59.6000 (63.7905)  acc5: 83.6000 (85.7143)  time: 0.2207  data: 0.1394  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.9762 (1.7418)  acc1: 59.2000 (63.3440)  acc5: 82.0000 (85.6160)  time: 0.2147  data: 0.1360  max mem: 9147
Test: Total time: 0:00:10 (0.4181 s / it)
* Acc@1 63.712 Acc@5 85.650 loss 1.724
Accuracy of the model on the 50000 test images: 63.7%
Max accuracy: 63.71%
Epoch: [25]  [   0/1251]  eta: 1:08:31  lr: 0.003997  min_lr: 0.003997  loss: 2.8807 (2.8807)  weight_decay: 0.0500 (0.0500)  time: 3.2863  data: 3.1131  max mem: 9147
Epoch: [25]  [ 200/1251]  eta: 0:03:37  lr: 0.003997  min_lr: 0.003997  loss: 3.6979 (3.7551)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8871 (0.9119)  time: 0.1950  data: 0.0006  max mem: 9147
Epoch: [25]  [ 400/1251]  eta: 0:02:55  lr: 0.003996  min_lr: 0.003996  loss: 3.5640 (3.7510)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8498 (inf)  time: 0.2289  data: 0.0008  max mem: 9147
Epoch: [25]  [ 600/1251]  eta: 0:02:16  lr: 0.003996  min_lr: 0.003996  loss: 3.2274 (3.7117)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9322 (inf)  time: 0.2297  data: 0.0006  max mem: 9147
Epoch: [25]  [ 800/1251]  eta: 0:01:35  lr: 0.003996  min_lr: 0.003996  loss: 3.9313 (3.7232)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8423 (inf)  time: 0.2241  data: 0.0026  max mem: 9147
Epoch: [25]  [1000/1251]  eta: 0:00:52  lr: 0.003996  min_lr: 0.003996  loss: 3.4498 (3.7237)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8068 (inf)  time: 0.2007  data: 0.0007  max mem: 9147
Epoch: [25]  [1200/1251]  eta: 0:00:10  lr: 0.003996  min_lr: 0.003996  loss: 3.6315 (3.7337)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9557 (inf)  time: 0.1906  data: 0.0012  max mem: 9147
Epoch: [25]  [1250/1251]  eta: 0:00:00  lr: 0.003995  min_lr: 0.003995  loss: 3.1450 (3.7303)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0362 (inf)  time: 0.1400  data: 0.0012  max mem: 9147
Epoch: [25] Total time: 0:04:18 (0.2067 s / it)
Averaged stats: lr: 0.003995  min_lr: 0.003995  loss: 3.1450 (3.7307)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0362 (inf)
Test:  [ 0/25]  eta: 0:02:19  loss: 1.1599 (1.1599)  acc1: 77.2000 (77.2000)  acc5: 92.4000 (92.4000)  time: 5.5943  data: 5.5136  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.2611 (1.3798)  acc1: 69.6000 (68.6909)  acc5: 92.0000 (90.5091)  time: 0.7507  data: 0.6572  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.8023 (1.6561)  acc1: 58.0000 (63.1619)  acc5: 83.6000 (86.0571)  time: 0.2198  data: 0.1365  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.8373 (1.6608)  acc1: 58.0000 (63.1520)  acc5: 82.8000 (86.1120)  time: 0.2197  data: 0.1410  max mem: 9147
Test: Total time: 0:00:10 (0.4173 s / it)
* Acc@1 63.984 Acc@5 86.042 loss 1.651
Accuracy of the model on the 50000 test images: 64.0%
Max accuracy: 63.98%
Epoch: [26]  [   0/1251]  eta: 1:02:34  lr: 0.003995  min_lr: 0.003995  loss: 4.6978 (4.6978)  weight_decay: 0.0500 (0.0500)  time: 3.0014  data: 2.8103  max mem: 9147
Epoch: [26]  [ 200/1251]  eta: 0:03:31  lr: 0.003995  min_lr: 0.003995  loss: 3.0862 (3.6642)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8614 (0.8934)  time: 0.1889  data: 0.0015  max mem: 9147
Epoch: [26]  [ 400/1251]  eta: 0:02:46  lr: 0.003995  min_lr: 0.003995  loss: 3.2223 (3.6724)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9438 (0.8802)  time: 0.1918  data: 0.0012  max mem: 9147
Epoch: [26]  [ 600/1251]  eta: 0:02:06  lr: 0.003995  min_lr: 0.003995  loss: 4.1334 (3.6887)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8248 (0.8789)  time: 0.2000  data: 0.0005  max mem: 9147
Epoch: [26]  [ 800/1251]  eta: 0:01:29  lr: 0.003994  min_lr: 0.003994  loss: 3.1729 (3.7011)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1280 (0.9371)  time: 0.2020  data: 0.0006  max mem: 9147
Epoch: [26]  [1000/1251]  eta: 0:00:50  lr: 0.003994  min_lr: 0.003994  loss: 4.5440 (3.7261)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8658 (0.9431)  time: 0.2152  data: 0.0008  max mem: 9147
Epoch: [26]  [1200/1251]  eta: 0:00:10  lr: 0.003994  min_lr: 0.003994  loss: 3.0961 (3.7334)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6812 (0.9265)  time: 0.2052  data: 0.0007  max mem: 9147
Epoch: [26]  [1250/1251]  eta: 0:00:00  lr: 0.003994  min_lr: 0.003994  loss: 2.9394 (3.7274)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7925 (0.9221)  time: 0.1502  data: 0.0013  max mem: 9147
Epoch: [26] Total time: 0:04:16 (0.2049 s / it)
Averaged stats: lr: 0.003994  min_lr: 0.003994  loss: 2.9394 (3.7195)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7925 (0.9221)
Test:  [ 0/25]  eta: 0:02:18  loss: 1.0598 (1.0598)  acc1: 77.6000 (77.6000)  acc5: 90.8000 (90.8000)  time: 5.5463  data: 5.4642  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.2012 (1.3071)  acc1: 72.4000 (70.5091)  acc5: 90.8000 (90.6182)  time: 0.7221  data: 0.6406  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.6644 (1.6084)  acc1: 60.0000 (64.5333)  acc5: 84.4000 (85.8476)  time: 0.2051  data: 0.1268  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.8155 (1.6092)  acc1: 60.0000 (64.3840)  acc5: 83.6000 (86.0640)  time: 0.2006  data: 0.1268  max mem: 9147
Test: Total time: 0:00:10 (0.4002 s / it)
* Acc@1 64.670 Acc@5 86.326 loss 1.602
Accuracy of the model on the 50000 test images: 64.7%
Max accuracy: 64.67%
Epoch: [27]  [   0/1251]  eta: 0:55:30  lr: 0.003994  min_lr: 0.003994  loss: 3.7336 (3.7336)  weight_decay: 0.0500 (0.0500)  time: 2.6624  data: 2.4485  max mem: 9147
Epoch: [27]  [ 200/1251]  eta: 0:03:48  lr: 0.003994  min_lr: 0.003994  loss: 3.9223 (3.6019)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7426 (0.8086)  time: 0.2052  data: 0.0007  max mem: 9147
Epoch: [27]  [ 400/1251]  eta: 0:03:01  lr: 0.003993  min_lr: 0.003993  loss: 3.0916 (3.6422)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7491 (0.7862)  time: 0.2046  data: 0.0007  max mem: 9147
Epoch: [27]  [ 600/1251]  eta: 0:02:15  lr: 0.003993  min_lr: 0.003993  loss: 3.0288 (3.6331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7985 (0.8080)  time: 0.1949  data: 0.0007  max mem: 9147
Epoch: [27]  [ 800/1251]  eta: 0:01:32  lr: 0.003993  min_lr: 0.003993  loss: 3.0955 (3.6357)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8160 (0.8598)  time: 0.2005  data: 0.0007  max mem: 9147
Epoch: [27]  [1000/1251]  eta: 0:00:51  lr: 0.003992  min_lr: 0.003992  loss: 3.4852 (3.6440)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8030 (0.8690)  time: 0.2002  data: 0.0006  max mem: 9147
Epoch: [27]  [1200/1251]  eta: 0:00:10  lr: 0.003992  min_lr: 0.003992  loss: 3.9259 (3.6727)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9673 (0.8721)  time: 0.2010  data: 0.0007  max mem: 9147
Epoch: [27]  [1250/1251]  eta: 0:00:00  lr: 0.003992  min_lr: 0.003992  loss: 3.0619 (3.6720)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9996 (0.8758)  time: 0.1470  data: 0.0009  max mem: 9147
Epoch: [27] Total time: 0:04:18 (0.2070 s / it)
Averaged stats: lr: 0.003992  min_lr: 0.003992  loss: 3.0619 (3.6810)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9996 (0.8758)
Test:  [ 0/25]  eta: 0:02:18  loss: 1.1379 (1.1379)  acc1: 75.2000 (75.2000)  acc5: 92.4000 (92.4000)  time: 5.5522  data: 5.4718  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1893 (1.3277)  acc1: 69.6000 (70.4364)  acc5: 92.0000 (90.9455)  time: 0.7545  data: 0.6760  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.7664 (1.6039)  acc1: 60.4000 (64.6476)  acc5: 83.6000 (86.2857)  time: 0.2083  data: 0.1344  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.8125 (1.6169)  acc1: 59.2000 (64.3200)  acc5: 82.0000 (85.9360)  time: 0.2059  data: 0.1344  max mem: 9147
Test: Total time: 0:00:10 (0.4041 s / it)
* Acc@1 64.376 Acc@5 86.120 loss 1.620
Accuracy of the model on the 50000 test images: 64.4%
Max accuracy: 64.67%
Epoch: [28]  [   0/1251]  eta: 1:04:41  lr: 0.003992  min_lr: 0.003992  loss: 3.1696 (3.1696)  weight_decay: 0.0500 (0.0500)  time: 3.1027  data: 1.5759  max mem: 9147
Epoch: [28]  [ 200/1251]  eta: 0:03:55  lr: 0.003992  min_lr: 0.003992  loss: 3.0986 (3.5996)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8087 (0.8364)  time: 0.2372  data: 0.0007  max mem: 9147
Epoch: [28]  [ 400/1251]  eta: 0:03:08  lr: 0.003991  min_lr: 0.003991  loss: 3.2491 (3.6405)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8921 (0.8820)  time: 0.2169  data: 0.0007  max mem: 9147
Epoch: [28]  [ 600/1251]  eta: 0:02:21  lr: 0.003991  min_lr: 0.003991  loss: 3.1290 (3.6514)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1513 (0.9597)  time: 0.2056  data: 0.0007  max mem: 9147
Epoch: [28]  [ 800/1251]  eta: 0:01:37  lr: 0.003991  min_lr: 0.003991  loss: 3.1182 (3.6555)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7145 (0.9046)  time: 0.2254  data: 0.0006  max mem: 9147
Epoch: [28]  [1000/1251]  eta: 0:00:53  lr: 0.003990  min_lr: 0.003990  loss: 2.9445 (3.6547)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7141 (0.8952)  time: 0.2057  data: 0.0007  max mem: 9147
Epoch: [28]  [1200/1251]  eta: 0:00:10  lr: 0.003990  min_lr: 0.003990  loss: 3.2170 (3.6773)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6817 (0.8794)  time: 0.2005  data: 0.0006  max mem: 9147
Epoch: [28]  [1250/1251]  eta: 0:00:00  lr: 0.003990  min_lr: 0.003990  loss: 4.4780 (3.6838)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8514 (0.8832)  time: 0.1473  data: 0.0012  max mem: 9147
Epoch: [28] Total time: 0:04:28 (0.2149 s / it)
Averaged stats: lr: 0.003990  min_lr: 0.003990  loss: 4.4780 (3.6684)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8514 (0.8832)
Test:  [ 0/25]  eta: 0:02:14  loss: 1.2735 (1.2735)  acc1: 77.2000 (77.2000)  acc5: 92.4000 (92.4000)  time: 5.3910  data: 5.2933  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.4212 (1.4302)  acc1: 70.0000 (70.6182)  acc5: 92.4000 (90.8364)  time: 0.7663  data: 0.6756  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.8375 (1.7144)  acc1: 60.8000 (64.9905)  acc5: 84.8000 (86.2857)  time: 0.2321  data: 0.1515  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.8891 (1.7285)  acc1: 58.8000 (64.4160)  acc5: 81.6000 (85.9680)  time: 0.2260  data: 0.1515  max mem: 9147
Test: Total time: 0:00:10 (0.4154 s / it)
* Acc@1 63.932 Acc@5 85.962 loss 1.740
Accuracy of the model on the 50000 test images: 63.9%
Max accuracy: 64.67%
Epoch: [29]  [   0/1251]  eta: 1:08:50  lr: 0.003990  min_lr: 0.003990  loss: 4.8711 (4.8711)  weight_decay: 0.0500 (0.0500)  time: 3.3018  data: 2.7491  max mem: 9147
Epoch: [29]  [ 200/1251]  eta: 0:03:56  lr: 0.003989  min_lr: 0.003989  loss: 3.0586 (3.6320)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8600 (0.8844)  time: 0.2167  data: 0.0008  max mem: 9147
Epoch: [29]  [ 400/1251]  eta: 0:03:03  lr: 0.003989  min_lr: 0.003989  loss: 3.6637 (3.6550)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8641 (0.8858)  time: 0.2096  data: 0.0007  max mem: 9147
Epoch: [29]  [ 600/1251]  eta: 0:02:21  lr: 0.003989  min_lr: 0.003989  loss: 3.4058 (3.6734)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9441 (0.8948)  time: 0.1947  data: 0.0006  max mem: 9147
Epoch: [29]  [ 800/1251]  eta: 0:01:35  lr: 0.003988  min_lr: 0.003988  loss: 3.1229 (3.6849)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0745 (0.9073)  time: 0.1997  data: 0.0006  max mem: 9147
Epoch: [29]  [1000/1251]  eta: 0:00:53  lr: 0.003988  min_lr: 0.003988  loss: 3.4044 (3.6710)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9567 (0.9189)  time: 0.2294  data: 0.0007  max mem: 9147
Epoch: [29]  [1200/1251]  eta: 0:00:10  lr: 0.003988  min_lr: 0.003988  loss: 3.1587 (3.6788)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8396 (0.8983)  time: 0.2146  data: 0.0006  max mem: 9147
Epoch: [29]  [1250/1251]  eta: 0:00:00  lr: 0.003987  min_lr: 0.003987  loss: 3.0082 (3.6809)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8396 (0.8960)  time: 0.1476  data: 0.0012  max mem: 9147
Epoch: [29] Total time: 0:04:24 (0.2118 s / it)
Averaged stats: lr: 0.003987  min_lr: 0.003987  loss: 3.0082 (3.6543)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8396 (0.8960)
Test:  [ 0/25]  eta: 0:02:20  loss: 1.1956 (1.1956)  acc1: 77.2000 (77.2000)  acc5: 90.8000 (90.8000)  time: 5.6370  data: 5.5565  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.2533 (1.3179)  acc1: 74.4000 (70.8000)  acc5: 92.0000 (90.9455)  time: 0.7504  data: 0.6575  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.6104 (1.5782)  acc1: 62.4000 (65.8095)  acc5: 84.4000 (86.6476)  time: 0.2021  data: 0.1204  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.7637 (1.5968)  acc1: 60.8000 (65.0880)  acc5: 83.6000 (86.4960)  time: 0.2091  data: 0.1317  max mem: 9147
Test: Total time: 0:00:10 (0.4116 s / it)
* Acc@1 65.392 Acc@5 86.602 loss 1.589
Accuracy of the model on the 50000 test images: 65.4%
Max accuracy: 65.39%
Epoch: [30]  [   0/1251]  eta: 0:55:32  lr: 0.003987  min_lr: 0.003987  loss: 3.8683 (3.8683)  weight_decay: 0.0500 (0.0500)  time: 2.6638  data: 2.4552  max mem: 9147
Epoch: [30]  [ 200/1251]  eta: 0:03:47  lr: 0.003987  min_lr: 0.003987  loss: 3.0372 (3.6399)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9385 (0.9488)  time: 0.1833  data: 0.0006  max mem: 9147
Epoch: [30]  [ 400/1251]  eta: 0:03:01  lr: 0.003987  min_lr: 0.003987  loss: 3.0768 (3.6053)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8538 (0.9300)  time: 0.1998  data: 0.0006  max mem: 9147
Epoch: [30]  [ 600/1251]  eta: 0:02:18  lr: 0.003986  min_lr: 0.003986  loss: 2.9879 (3.6223)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6930 (0.8905)  time: 0.2118  data: 0.0006  max mem: 9147
Epoch: [30]  [ 800/1251]  eta: 0:01:36  lr: 0.003986  min_lr: 0.003986  loss: 3.4750 (3.6170)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7110 (0.8843)  time: 0.2097  data: 0.0006  max mem: 9147
Epoch: [30]  [1000/1251]  eta: 0:00:53  lr: 0.003985  min_lr: 0.003985  loss: 3.1699 (3.6208)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8645 (0.8698)  time: 0.2044  data: 0.0011  max mem: 9147
Epoch: [30]  [1200/1251]  eta: 0:00:10  lr: 0.003985  min_lr: 0.003985  loss: 2.9919 (3.6174)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7846 (0.8535)  time: 0.2108  data: 0.0006  max mem: 9147
Epoch: [30]  [1250/1251]  eta: 0:00:00  lr: 0.003985  min_lr: 0.003985  loss: 3.2319 (3.6206)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8127 (0.8535)  time: 0.1402  data: 0.0009  max mem: 9147
Epoch: [30] Total time: 0:04:22 (0.2096 s / it)
Averaged stats: lr: 0.003985  min_lr: 0.003985  loss: 3.2319 (3.6229)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8127 (0.8535)
Test:  [ 0/25]  eta: 0:02:14  loss: 1.1322 (1.1322)  acc1: 77.2000 (77.2000)  acc5: 93.2000 (93.2000)  time: 5.3945  data: 5.3071  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.3013 (1.3337)  acc1: 72.4000 (71.2000)  acc5: 91.2000 (90.6545)  time: 0.7208  data: 0.6357  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.6364 (1.5927)  acc1: 62.0000 (66.1905)  acc5: 84.8000 (86.8762)  time: 0.2111  data: 0.1323  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.8118 (1.6068)  acc1: 62.0000 (65.6800)  acc5: 83.6000 (86.6560)  time: 0.2068  data: 0.1322  max mem: 9147
Test: Total time: 0:00:10 (0.4006 s / it)
* Acc@1 65.856 Acc@5 87.170 loss 1.601
Accuracy of the model on the 50000 test images: 65.9%
Max accuracy: 65.86%
Epoch: [31]  [   0/1251]  eta: 1:02:38  lr: 0.003985  min_lr: 0.003985  loss: 3.3629 (3.3629)  weight_decay: 0.0500 (0.0500)  time: 3.0046  data: 2.7988  max mem: 9147
Epoch: [31]  [ 200/1251]  eta: 0:03:34  lr: 0.003984  min_lr: 0.003984  loss: 3.2257 (3.5055)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7417 (0.8492)  time: 0.1950  data: 0.0006  max mem: 9147
Epoch: [31]  [ 400/1251]  eta: 0:02:50  lr: 0.003984  min_lr: 0.003984  loss: 2.9887 (3.5367)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7350 (0.8504)  time: 0.2148  data: 0.0008  max mem: 9147
Epoch: [31]  [ 600/1251]  eta: 0:02:11  lr: 0.003983  min_lr: 0.003983  loss: 3.2665 (3.5508)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9086 (0.8631)  time: 0.2043  data: 0.0007  max mem: 9147
Epoch: [31]  [ 800/1251]  eta: 0:01:32  lr: 0.003983  min_lr: 0.003983  loss: 3.0806 (3.5906)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7961 (0.8604)  time: 0.1997  data: 0.0006  max mem: 9147
Epoch: [31]  [1000/1251]  eta: 0:00:52  lr: 0.003982  min_lr: 0.003982  loss: 3.0820 (3.5854)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8323 (0.8588)  time: 0.2050  data: 0.0008  max mem: 9147
Epoch: [31]  [1200/1251]  eta: 0:00:10  lr: 0.003982  min_lr: 0.003982  loss: 3.4223 (3.5859)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6780 (0.8570)  time: 0.2088  data: 0.0006  max mem: 9147
Epoch: [31]  [1250/1251]  eta: 0:00:00  lr: 0.003982  min_lr: 0.003982  loss: 3.3896 (3.5887)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6871 (0.8539)  time: 0.1509  data: 0.0023  max mem: 9147
Epoch: [31] Total time: 0:04:21 (0.2090 s / it)
Averaged stats: lr: 0.003982  min_lr: 0.003982  loss: 3.3896 (3.6020)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6871 (0.8539)
Test:  [ 0/25]  eta: 0:01:45  loss: 1.2583 (1.2583)  acc1: 74.4000 (74.4000)  acc5: 91.6000 (91.6000)  time: 4.2177  data: 4.1372  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 1.2610 (1.3790)  acc1: 72.8000 (70.5455)  acc5: 91.6000 (91.0546)  time: 0.6221  data: 0.5363  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.7420 (1.6240)  acc1: 62.8000 (65.7333)  acc5: 86.8000 (87.3714)  time: 0.2128  data: 0.1334  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.7826 (1.6350)  acc1: 62.0000 (65.4560)  acc5: 85.2000 (87.2640)  time: 0.2123  data: 0.1367  max mem: 9147
Test: Total time: 0:00:09 (0.3849 s / it)
* Acc@1 65.550 Acc@5 87.090 loss 1.629
Accuracy of the model on the 50000 test images: 65.6%
Max accuracy: 65.86%
Epoch: [32]  [   0/1251]  eta: 1:04:13  lr: 0.003982  min_lr: 0.003982  loss: 4.6117 (4.6117)  weight_decay: 0.0500 (0.0500)  time: 3.0803  data: 2.5335  max mem: 9147
Epoch: [32]  [ 200/1251]  eta: 0:03:44  lr: 0.003981  min_lr: 0.003981  loss: 2.9791 (3.4941)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7919 (0.8513)  time: 0.2005  data: 0.0006  max mem: 9147
Epoch: [32]  [ 400/1251]  eta: 0:03:04  lr: 0.003981  min_lr: 0.003981  loss: 3.2303 (3.5345)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8453 (0.8574)  time: 0.2188  data: 0.0006  max mem: 9147
Epoch: [32]  [ 600/1251]  eta: 0:02:19  lr: 0.003980  min_lr: 0.003980  loss: 3.3318 (3.5750)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9362 (0.9197)  time: 0.2002  data: 0.0006  max mem: 9147
Epoch: [32]  [ 800/1251]  eta: 0:01:34  lr: 0.003980  min_lr: 0.003980  loss: 2.9831 (3.5950)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6861 (0.8936)  time: 0.1946  data: 0.0006  max mem: 9147
Epoch: [32]  [1000/1251]  eta: 0:00:51  lr: 0.003979  min_lr: 0.003979  loss: 3.7696 (3.6239)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8563 (0.9128)  time: 0.2054  data: 0.0008  max mem: 9147
Epoch: [32]  [1200/1251]  eta: 0:00:10  lr: 0.003979  min_lr: 0.003979  loss: 3.9124 (3.6398)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7714 (0.8921)  time: 0.2155  data: 0.0007  max mem: 9147
Epoch: [32]  [1250/1251]  eta: 0:00:00  lr: 0.003979  min_lr: 0.003979  loss: 3.0333 (3.6349)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7350 (0.8861)  time: 0.1481  data: 0.0015  max mem: 9147
Epoch: [32] Total time: 0:04:20 (0.2085 s / it)
Averaged stats: lr: 0.003979  min_lr: 0.003979  loss: 3.0333 (3.6181)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7350 (0.8861)
Test:  [ 0/25]  eta: 0:01:20  loss: 1.1972 (1.1972)  acc1: 75.2000 (75.2000)  acc5: 93.2000 (93.2000)  time: 3.2330  data: 3.1525  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 1.2905 (1.3534)  acc1: 73.2000 (71.6000)  acc5: 92.8000 (91.3091)  time: 0.5575  data: 0.4822  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.6014 (1.6028)  acc1: 63.2000 (66.5714)  acc5: 86.0000 (87.6762)  time: 0.2775  data: 0.2042  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.8095 (1.6243)  acc1: 62.0000 (65.9520)  acc5: 85.6000 (87.4560)  time: 0.2136  data: 0.1431  max mem: 9147
Test: Total time: 0:00:09 (0.3982 s / it)
* Acc@1 66.064 Acc@5 87.548 loss 1.608
Accuracy of the model on the 50000 test images: 66.1%
Max accuracy: 66.06%
Epoch: [33]  [   0/1251]  eta: 0:54:19  lr: 0.003979  min_lr: 0.003979  loss: 4.6768 (4.6768)  weight_decay: 0.0500 (0.0500)  time: 2.6051  data: 2.3860  max mem: 9147
Epoch: [33]  [ 200/1251]  eta: 0:03:53  lr: 0.003978  min_lr: 0.003978  loss: 2.9466 (3.4908)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7672 (0.8084)  time: 0.2051  data: 0.0007  max mem: 9147
Epoch: [33]  [ 400/1251]  eta: 0:03:06  lr: 0.003978  min_lr: 0.003978  loss: 3.2337 (3.5766)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8674 (0.8756)  time: 0.2146  data: 0.0006  max mem: 9147
Epoch: [33]  [ 600/1251]  eta: 0:02:20  lr: 0.003977  min_lr: 0.003977  loss: 2.9991 (3.5860)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8387 (0.8783)  time: 0.2005  data: 0.0007  max mem: 9147
Epoch: [33]  [ 800/1251]  eta: 0:01:37  lr: 0.003977  min_lr: 0.003977  loss: 3.1881 (3.5738)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8673 (0.8556)  time: 0.2268  data: 0.0007  max mem: 9147
Epoch: [33]  [1000/1251]  eta: 0:00:53  lr: 0.003976  min_lr: 0.003976  loss: 2.8911 (3.5616)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9534 (0.8793)  time: 0.2253  data: 0.0008  max mem: 9147
Epoch: [33]  [1200/1251]  eta: 0:00:10  lr: 0.003976  min_lr: 0.003976  loss: 3.1665 (3.5646)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7504 (0.8762)  time: 0.2005  data: 0.0005  max mem: 9147
Epoch: [33]  [1250/1251]  eta: 0:00:00  lr: 0.003975  min_lr: 0.003975  loss: 3.8860 (3.5808)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7504 (0.8709)  time: 0.1437  data: 0.0008  max mem: 9147
Epoch: [33] Total time: 0:04:28 (0.2144 s / it)
Averaged stats: lr: 0.003975  min_lr: 0.003975  loss: 3.8860 (3.5911)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7504 (0.8709)
Test:  [ 0/25]  eta: 0:02:19  loss: 1.3048 (1.3048)  acc1: 78.4000 (78.4000)  acc5: 92.8000 (92.8000)  time: 5.5708  data: 5.4877  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.3873 (1.4416)  acc1: 71.6000 (71.1636)  acc5: 92.8000 (91.2727)  time: 0.7826  data: 0.6920  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.7886 (1.6776)  acc1: 62.0000 (66.3619)  acc5: 85.2000 (87.1048)  time: 0.2163  data: 0.1357  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.8264 (1.6914)  acc1: 62.0000 (65.8560)  acc5: 84.4000 (87.0400)  time: 0.2102  data: 0.1356  max mem: 9147
Test: Total time: 0:00:10 (0.4103 s / it)
* Acc@1 66.056 Acc@5 87.220 loss 1.681
Accuracy of the model on the 50000 test images: 66.1%
Max accuracy: 66.06%
Epoch: [34]  [   0/1251]  eta: 1:05:48  lr: 0.003975  min_lr: 0.003975  loss: 4.6854 (4.6854)  weight_decay: 0.0500 (0.0500)  time: 3.1561  data: 1.6078  max mem: 9147
Epoch: [34]  [ 200/1251]  eta: 0:03:45  lr: 0.003975  min_lr: 0.003975  loss: 2.9048 (3.5045)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9233 (0.9454)  time: 0.1918  data: 0.0005  max mem: 9147
Epoch: [34]  [ 400/1251]  eta: 0:02:58  lr: 0.003974  min_lr: 0.003974  loss: 3.7666 (3.5192)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8204 (0.8973)  time: 0.2246  data: 0.0006  max mem: 9147
Epoch: [34]  [ 600/1251]  eta: 0:02:16  lr: 0.003974  min_lr: 0.003974  loss: 3.5529 (3.5483)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7179 (0.8406)  time: 0.1949  data: 0.0006  max mem: 9147
Epoch: [34]  [ 800/1251]  eta: 0:01:34  lr: 0.003973  min_lr: 0.003973  loss: 3.1737 (3.5391)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6918 (0.8337)  time: 0.2147  data: 0.0111  max mem: 9147
Epoch: [34]  [1000/1251]  eta: 0:00:52  lr: 0.003972  min_lr: 0.003972  loss: 3.1157 (3.5569)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9071 (0.8855)  time: 0.2300  data: 0.0240  max mem: 9147
Epoch: [34]  [1200/1251]  eta: 0:00:10  lr: 0.003972  min_lr: 0.003972  loss: 2.9073 (3.5528)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7228 (0.8652)  time: 0.2158  data: 0.0008  max mem: 9147
Epoch: [34]  [1250/1251]  eta: 0:00:00  lr: 0.003972  min_lr: 0.003972  loss: 3.1476 (3.5585)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7702 (0.8676)  time: 0.1433  data: 0.0012  max mem: 9147
Epoch: [34] Total time: 0:04:24 (0.2112 s / it)
Averaged stats: lr: 0.003972  min_lr: 0.003972  loss: 3.1476 (3.5993)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7702 (0.8676)
Test:  [ 0/25]  eta: 0:02:23  loss: 1.0617 (1.0617)  acc1: 78.8000 (78.8000)  acc5: 94.0000 (94.0000)  time: 5.7396  data: 5.6544  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.3339 (1.3427)  acc1: 74.4000 (71.8545)  acc5: 92.8000 (91.7455)  time: 0.7556  data: 0.6821  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.6109 (1.6006)  acc1: 61.6000 (66.6095)  acc5: 86.0000 (87.5429)  time: 0.2119  data: 0.1406  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.7995 (1.6156)  acc1: 61.6000 (66.3520)  acc5: 84.8000 (87.3120)  time: 0.2098  data: 0.1391  max mem: 9147
Test: Total time: 0:00:10 (0.4131 s / it)
* Acc@1 66.362 Acc@5 87.402 loss 1.613
Accuracy of the model on the 50000 test images: 66.4%
Max accuracy: 66.36%
Epoch: [35]  [   0/1251]  eta: 1:03:33  lr: 0.003972  min_lr: 0.003972  loss: 4.4091 (4.4091)  weight_decay: 0.0500 (0.0500)  time: 3.0482  data: 2.8386  max mem: 9147
Epoch: [35]  [ 200/1251]  eta: 0:03:34  lr: 0.003971  min_lr: 0.003971  loss: 3.4305 (3.6330)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7576 (0.8888)  time: 0.1908  data: 0.0005  max mem: 9147
Epoch: [35]  [ 400/1251]  eta: 0:02:53  lr: 0.003971  min_lr: 0.003971  loss: 3.3806 (3.5678)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7612 (0.8664)  time: 0.2201  data: 0.0007  max mem: 9147
Epoch: [35]  [ 600/1251]  eta: 0:02:13  lr: 0.003970  min_lr: 0.003970  loss: 3.4578 (3.5927)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.2094  data: 0.0006  max mem: 9147
Epoch: [35]  [ 800/1251]  eta: 0:01:33  lr: 0.003969  min_lr: 0.003969  loss: 3.1670 (3.5815)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7976 (nan)  time: 0.1949  data: 0.0006  max mem: 9147
Epoch: [35]  [1000/1251]  eta: 0:00:51  lr: 0.003969  min_lr: 0.003969  loss: 2.9991 (3.5799)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7364 (nan)  time: 0.2260  data: 0.0006  max mem: 9147
Epoch: [35]  [1200/1251]  eta: 0:00:10  lr: 0.003968  min_lr: 0.003968  loss: 2.9779 (3.5611)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6650 (nan)  time: 0.1955  data: 0.0010  max mem: 9147
Epoch: [35]  [1250/1251]  eta: 0:00:00  lr: 0.003968  min_lr: 0.003968  loss: 4.1225 (3.5658)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7184 (nan)  time: 0.1390  data: 0.0006  max mem: 9147
Epoch: [35] Total time: 0:04:15 (0.2044 s / it)
Averaged stats: lr: 0.003968  min_lr: 0.003968  loss: 4.1225 (3.5715)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7184 (nan)
Test:  [ 0/25]  eta: 0:02:23  loss: 1.0713 (1.0713)  acc1: 78.8000 (78.8000)  acc5: 93.6000 (93.6000)  time: 5.7202  data: 5.6398  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.2562 (1.2720)  acc1: 73.6000 (73.0182)  acc5: 92.8000 (91.8546)  time: 0.6988  data: 0.6244  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.5574 (1.5520)  acc1: 62.0000 (67.2571)  acc5: 85.6000 (87.6952)  time: 0.1814  data: 0.1099  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.7800 (1.5705)  acc1: 62.0000 (66.7040)  acc5: 84.4000 (87.4880)  time: 0.1905  data: 0.1202  max mem: 9147
Test: Total time: 0:00:10 (0.4020 s / it)
* Acc@1 66.672 Acc@5 87.794 loss 1.568
Accuracy of the model on the 50000 test images: 66.7%
Max accuracy: 66.67%
Epoch: [36]  [   0/1251]  eta: 1:00:48  lr: 0.003968  min_lr: 0.003968  loss: 4.2256 (4.2256)  weight_decay: 0.0500 (0.0500)  time: 2.9161  data: 2.7155  max mem: 9147
Epoch: [36]  [ 200/1251]  eta: 0:03:53  lr: 0.003967  min_lr: 0.003967  loss: 3.1471 (3.6271)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6592 (0.7834)  time: 0.2152  data: 0.0007  max mem: 9147
Epoch: [36]  [ 400/1251]  eta: 0:03:06  lr: 0.003967  min_lr: 0.003967  loss: 3.0910 (3.6046)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7917 (0.8105)  time: 0.2146  data: 0.0006  max mem: 9147
Epoch: [36]  [ 600/1251]  eta: 0:02:21  lr: 0.003966  min_lr: 0.003966  loss: 3.0359 (3.5832)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8316 (0.8600)  time: 0.2095  data: 0.0007  max mem: 9147
Epoch: [36]  [ 800/1251]  eta: 0:01:37  lr: 0.003965  min_lr: 0.003965  loss: 3.1186 (3.5659)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8132 (0.8904)  time: 0.2204  data: 0.0008  max mem: 9147
Epoch: [36]  [1000/1251]  eta: 0:00:53  lr: 0.003965  min_lr: 0.003965  loss: 2.9480 (3.5754)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7109 (0.8833)  time: 0.1969  data: 0.0010  max mem: 9147
Epoch: [36]  [1200/1251]  eta: 0:00:10  lr: 0.003964  min_lr: 0.003964  loss: 2.8667 (3.5773)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7757 (0.8705)  time: 0.1867  data: 0.0005  max mem: 9147
Epoch: [36]  [1250/1251]  eta: 0:00:00  lr: 0.003964  min_lr: 0.003964  loss: 2.8946 (3.5781)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7235 (0.8660)  time: 0.1376  data: 0.0007  max mem: 9147
Epoch: [36] Total time: 0:04:21 (0.2087 s / it)
Averaged stats: lr: 0.003964  min_lr: 0.003964  loss: 2.8946 (3.5493)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7235 (0.8660)
Test:  [ 0/25]  eta: 0:02:26  loss: 1.0200 (1.0200)  acc1: 80.4000 (80.4000)  acc5: 94.8000 (94.8000)  time: 5.8462  data: 5.7495  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.2596 (1.2695)  acc1: 74.4000 (72.1091)  acc5: 92.4000 (91.8545)  time: 0.7773  data: 0.7023  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.5597 (1.5301)  acc1: 63.6000 (66.4571)  acc5: 86.8000 (87.7333)  time: 0.2184  data: 0.1458  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.6849 (1.5445)  acc1: 63.6000 (66.3520)  acc5: 84.4000 (87.6480)  time: 0.2173  data: 0.1457  max mem: 9147
Test: Total time: 0:00:10 (0.4225 s / it)
* Acc@1 66.624 Acc@5 87.902 loss 1.537
Accuracy of the model on the 50000 test images: 66.6%
Max accuracy: 66.67%
Epoch: [37]  [   0/1251]  eta: 1:06:32  lr: 0.003964  min_lr: 0.003964  loss: 2.6310 (2.6310)  weight_decay: 0.0500 (0.0500)  time: 3.1917  data: 1.7862  max mem: 9147
Epoch: [37]  [ 200/1251]  eta: 0:03:59  lr: 0.003963  min_lr: 0.003963  loss: 3.2002 (3.5094)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6303 (0.7278)  time: 0.2201  data: 0.0007  max mem: 9147
Epoch: [37]  [ 400/1251]  eta: 0:03:09  lr: 0.003962  min_lr: 0.003962  loss: 3.5278 (3.5654)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7635 (0.8607)  time: 0.2091  data: 0.0006  max mem: 9147
Epoch: [37]  [ 600/1251]  eta: 0:02:22  lr: 0.003962  min_lr: 0.003962  loss: 2.9688 (3.5387)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6476 (0.8161)  time: 0.2269  data: 0.0007  max mem: 9147
Epoch: [37]  [ 800/1251]  eta: 0:01:36  lr: 0.003961  min_lr: 0.003961  loss: 3.3967 (3.5420)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6845 (0.7846)  time: 0.2060  data: 0.0008  max mem: 9147
Epoch: [37]  [1000/1251]  eta: 0:00:53  lr: 0.003960  min_lr: 0.003960  loss: 4.5521 (3.5605)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6850 (0.8059)  time: 0.2201  data: 0.0007  max mem: 9147
Epoch: [37]  [1200/1251]  eta: 0:00:10  lr: 0.003960  min_lr: 0.003960  loss: 3.0523 (3.5513)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7497 (0.8489)  time: 0.2181  data: 0.0006  max mem: 9147
Epoch: [37]  [1250/1251]  eta: 0:00:00  lr: 0.003959  min_lr: 0.003959  loss: 3.9187 (3.5541)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6839 (0.8427)  time: 0.1427  data: 0.0011  max mem: 9147
Epoch: [37] Total time: 0:04:26 (0.2129 s / it)
Averaged stats: lr: 0.003959  min_lr: 0.003959  loss: 3.9187 (3.5429)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6839 (0.8427)
Test:  [ 0/25]  eta: 0:02:23  loss: 1.2127 (1.2127)  acc1: 76.0000 (76.0000)  acc5: 92.0000 (92.0000)  time: 5.7327  data: 5.6330  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.2127 (1.3526)  acc1: 75.2000 (72.2545)  acc5: 92.0000 (91.5273)  time: 0.7842  data: 0.7058  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.7239 (1.6318)  acc1: 61.2000 (66.6095)  acc5: 85.6000 (87.2571)  time: 0.2241  data: 0.1502  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.7973 (1.6399)  acc1: 61.2000 (66.4000)  acc5: 84.8000 (87.3280)  time: 0.2229  data: 0.1501  max mem: 9147
Test: Total time: 0:00:10 (0.4229 s / it)
* Acc@1 66.578 Acc@5 87.672 loss 1.626
Accuracy of the model on the 50000 test images: 66.6%
Max accuracy: 66.67%
Epoch: [38]  [   0/1251]  eta: 1:05:06  lr: 0.003959  min_lr: 0.003959  loss: 3.0647 (3.0647)  weight_decay: 0.0500 (0.0500)  time: 3.1231  data: 2.3296  max mem: 9147
Epoch: [38]  [ 200/1251]  eta: 0:03:46  lr: 0.003959  min_lr: 0.003959  loss: 3.0669 (3.5966)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6480 (1.0134)  time: 0.1875  data: 0.0006  max mem: 9147
Epoch: [38]  [ 400/1251]  eta: 0:02:53  lr: 0.003958  min_lr: 0.003958  loss: 2.8986 (3.5046)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6520 (0.8685)  time: 0.1901  data: 0.0010  max mem: 9147
Epoch: [38]  [ 600/1251]  eta: 0:02:10  lr: 0.003957  min_lr: 0.003957  loss: 3.1364 (3.5158)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9006 (0.8828)  time: 0.1904  data: 0.0013  max mem: 9147
Epoch: [38]  [ 800/1251]  eta: 0:01:31  lr: 0.003956  min_lr: 0.003956  loss: 2.9607 (3.5288)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7849 (0.8784)  time: 0.2003  data: 0.0160  max mem: 9147
Epoch: [38]  [1000/1251]  eta: 0:00:51  lr: 0.003956  min_lr: 0.003956  loss: 3.0255 (3.5312)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6929 (0.8959)  time: 0.2109  data: 0.0007  max mem: 9147
Epoch: [38]  [1200/1251]  eta: 0:00:10  lr: 0.003955  min_lr: 0.003955  loss: 2.9447 (3.5241)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8654 (0.8731)  time: 0.1898  data: 0.0005  max mem: 9147
Epoch: [38]  [1250/1251]  eta: 0:00:00  lr: 0.003955  min_lr: 0.003955  loss: 3.1038 (3.5256)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7695 (0.8702)  time: 0.1383  data: 0.0015  max mem: 9147
Epoch: [38] Total time: 0:04:14 (0.2038 s / it)
Averaged stats: lr: 0.003955  min_lr: 0.003955  loss: 3.1038 (3.5553)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7695 (0.8702)
Test:  [ 0/25]  eta: 0:02:16  loss: 1.0992 (1.0992)  acc1: 77.6000 (77.6000)  acc5: 93.2000 (93.2000)  time: 5.4738  data: 5.3932  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.2298 (1.3072)  acc1: 72.0000 (72.0364)  acc5: 92.4000 (91.8546)  time: 0.7342  data: 0.6453  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.7222 (1.5882)  acc1: 64.8000 (67.4286)  acc5: 85.6000 (87.8286)  time: 0.2069  data: 0.1229  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.7391 (1.6093)  acc1: 63.6000 (67.0240)  acc5: 85.6000 (87.6320)  time: 0.2014  data: 0.1230  max mem: 9147
Test: Total time: 0:00:10 (0.4026 s / it)
* Acc@1 66.862 Acc@5 87.826 loss 1.604
Accuracy of the model on the 50000 test images: 66.9%
Max accuracy: 66.86%
Epoch: [39]  [   0/1251]  eta: 0:55:38  lr: 0.003955  min_lr: 0.003955  loss: 3.4691 (3.4691)  weight_decay: 0.0500 (0.0500)  time: 2.6683  data: 2.4573  max mem: 9147
Epoch: [39]  [ 200/1251]  eta: 0:03:55  lr: 0.003954  min_lr: 0.003954  loss: 3.0793 (3.5202)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7852 (0.7784)  time: 0.2145  data: 0.0009  max mem: 9147
Epoch: [39]  [ 400/1251]  eta: 0:03:07  lr: 0.003953  min_lr: 0.003953  loss: 3.9943 (3.5131)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7895 (0.8012)  time: 0.2075  data: 0.0011  max mem: 9147
Epoch: [39]  [ 600/1251]  eta: 0:02:19  lr: 0.003952  min_lr: 0.003952  loss: 3.0971 (3.5283)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7762 (0.8187)  time: 0.1898  data: 0.0006  max mem: 9147
Epoch: [39]  [ 800/1251]  eta: 0:01:33  lr: 0.003952  min_lr: 0.003952  loss: 3.8922 (3.5362)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7061 (0.8122)  time: 0.2010  data: 0.0005  max mem: 9147
Epoch: [39]  [1000/1251]  eta: 0:00:52  lr: 0.003951  min_lr: 0.003951  loss: 2.9864 (3.5533)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7838 (0.8097)  time: 0.2319  data: 0.0007  max mem: 9147
Epoch: [39]  [1200/1251]  eta: 0:00:10  lr: 0.003950  min_lr: 0.003950  loss: 3.6081 (3.5536)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6840 (0.7966)  time: 0.2186  data: 0.0007  max mem: 9147
Epoch: [39]  [1250/1251]  eta: 0:00:00  lr: 0.003950  min_lr: 0.003950  loss: 2.9048 (3.5457)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7632 (0.8006)  time: 0.1409  data: 0.0013  max mem: 9147
Epoch: [39] Total time: 0:04:23 (0.2107 s / it)
Averaged stats: lr: 0.003950  min_lr: 0.003950  loss: 2.9048 (3.5214)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7632 (0.8006)
Test:  [ 0/25]  eta: 0:02:20  loss: 1.0661 (1.0661)  acc1: 78.4000 (78.4000)  acc5: 93.6000 (93.6000)  time: 5.6352  data: 5.5547  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1641 (1.2340)  acc1: 73.6000 (73.8182)  acc5: 93.2000 (92.6909)  time: 0.7559  data: 0.6670  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.6840 (1.5110)  acc1: 64.8000 (68.3429)  acc5: 86.8000 (88.1333)  time: 0.2098  data: 0.1290  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.6840 (1.5400)  acc1: 65.6000 (68.0640)  acc5: 84.8000 (87.9520)  time: 0.2053  data: 0.1300  max mem: 9147
Test: Total time: 0:00:10 (0.4093 s / it)
* Acc@1 67.722 Acc@5 88.262 loss 1.523
Accuracy of the model on the 50000 test images: 67.7%
Max accuracy: 67.72%
Epoch: [40]  [   0/1251]  eta: 1:00:33  lr: 0.003950  min_lr: 0.003950  loss: 2.9453 (2.9453)  weight_decay: 0.0500 (0.0500)  time: 2.9041  data: 2.6755  max mem: 9147
Epoch: [40]  [ 200/1251]  eta: 0:03:45  lr: 0.003949  min_lr: 0.003949  loss: 3.0079 (3.4537)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8662 (0.8979)  time: 0.1998  data: 0.0007  max mem: 9147
Epoch: [40]  [ 400/1251]  eta: 0:02:53  lr: 0.003948  min_lr: 0.003948  loss: 3.1066 (3.5178)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7610 (0.8607)  time: 0.1887  data: 0.0005  max mem: 9147
Epoch: [40]  [ 600/1251]  eta: 0:02:09  lr: 0.003947  min_lr: 0.003947  loss: 2.9186 (3.4678)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7734 (0.8484)  time: 0.1853  data: 0.0006  max mem: 9147
Epoch: [40]  [ 800/1251]  eta: 0:01:31  lr: 0.003947  min_lr: 0.003947  loss: 2.9663 (3.4618)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7794 (0.8432)  time: 0.2293  data: 0.0202  max mem: 9147
Epoch: [40]  [1000/1251]  eta: 0:00:51  lr: 0.003946  min_lr: 0.003946  loss: 4.1184 (3.4725)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5996 (0.8197)  time: 0.2050  data: 0.0006  max mem: 9147
Epoch: [40]  [1200/1251]  eta: 0:00:10  lr: 0.003945  min_lr: 0.003945  loss: 2.9695 (3.4851)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6984 (0.8095)  time: 0.1813  data: 0.0005  max mem: 9147
Epoch: [40]  [1250/1251]  eta: 0:00:00  lr: 0.003945  min_lr: 0.003945  loss: 2.8560 (3.4841)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7200 (0.8102)  time: 0.1373  data: 0.0009  max mem: 9147
Epoch: [40] Total time: 0:04:17 (0.2060 s / it)
Averaged stats: lr: 0.003945  min_lr: 0.003945  loss: 2.8560 (3.5010)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7200 (0.8102)
Test:  [ 0/25]  eta: 0:02:15  loss: 0.9764 (0.9764)  acc1: 79.2000 (79.2000)  acc5: 93.6000 (93.6000)  time: 5.4083  data: 5.3279  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1295 (1.1563)  acc1: 76.0000 (73.7818)  acc5: 92.8000 (92.4727)  time: 0.7685  data: 0.6924  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4424 (1.4348)  acc1: 66.0000 (68.0381)  acc5: 88.4000 (88.2667)  time: 0.2336  data: 0.1602  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5800 (1.4391)  acc1: 64.4000 (67.6960)  acc5: 85.2000 (88.1760)  time: 0.2318  data: 0.1601  max mem: 9147
Test: Total time: 0:00:10 (0.4174 s / it)
* Acc@1 68.058 Acc@5 88.520 loss 1.427
Accuracy of the model on the 50000 test images: 68.1%
Max accuracy: 68.06%
Epoch: [41]  [   0/1251]  eta: 0:56:36  lr: 0.003945  min_lr: 0.003945  loss: 3.3306 (3.3306)  weight_decay: 0.0500 (0.0500)  time: 2.7148  data: 2.3578  max mem: 9147
Epoch: [41]  [ 200/1251]  eta: 0:03:55  lr: 0.003944  min_lr: 0.003944  loss: 2.9567 (3.5280)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7328 (0.8566)  time: 0.2191  data: 0.0006  max mem: 9147
Epoch: [41]  [ 400/1251]  eta: 0:02:57  lr: 0.003943  min_lr: 0.003943  loss: 3.0166 (3.5030)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7369 (0.8532)  time: 0.1900  data: 0.0005  max mem: 9147
Epoch: [41]  [ 600/1251]  eta: 0:02:16  lr: 0.003942  min_lr: 0.003942  loss: 2.9971 (3.5246)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7223 (0.8374)  time: 0.2184  data: 0.0006  max mem: 9147
Epoch: [41]  [ 800/1251]  eta: 0:01:34  lr: 0.003941  min_lr: 0.003941  loss: 3.4569 (3.5285)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8127 (0.8393)  time: 0.2256  data: 0.0257  max mem: 9147
Epoch: [41]  [1000/1251]  eta: 0:00:52  lr: 0.003940  min_lr: 0.003940  loss: 2.9394 (3.5181)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6779 (0.8343)  time: 0.1909  data: 0.0006  max mem: 9147
Epoch: [41]  [1200/1251]  eta: 0:00:10  lr: 0.003940  min_lr: 0.003940  loss: 3.6920 (3.5030)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8302 (0.8353)  time: 0.1904  data: 0.0005  max mem: 9147
Epoch: [41]  [1250/1251]  eta: 0:00:00  lr: 0.003939  min_lr: 0.003939  loss: 2.9058 (3.4984)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8302 (0.8424)  time: 0.1391  data: 0.0009  max mem: 9147
Epoch: [41] Total time: 0:04:19 (0.2074 s / it)
Averaged stats: lr: 0.003939  min_lr: 0.003939  loss: 2.9058 (3.5099)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8302 (0.8424)
Test:  [ 0/25]  eta: 0:02:17  loss: 1.1204 (1.1204)  acc1: 78.8000 (78.8000)  acc5: 94.8000 (94.8000)  time: 5.4803  data: 5.3983  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.3055 (1.3711)  acc1: 72.4000 (73.2000)  acc5: 92.8000 (91.8545)  time: 0.6968  data: 0.6215  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.5999 (1.5874)  acc1: 64.0000 (67.9810)  acc5: 86.8000 (88.1905)  time: 0.1978  data: 0.1245  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.7433 (1.6056)  acc1: 64.8000 (67.6800)  acc5: 86.0000 (88.1760)  time: 0.1989  data: 0.1272  max mem: 9147
Test: Total time: 0:00:09 (0.3963 s / it)
* Acc@1 67.458 Acc@5 88.112 loss 1.602
Accuracy of the model on the 50000 test images: 67.5%
Max accuracy: 68.06%
Epoch: [42]  [   0/1251]  eta: 1:07:04  lr: 0.003939  min_lr: 0.003939  loss: 2.9043 (2.9043)  weight_decay: 0.0500 (0.0500)  time: 3.2170  data: 2.4339  max mem: 9147
Epoch: [42]  [ 200/1251]  eta: 0:03:59  lr: 0.003939  min_lr: 0.003939  loss: 3.2078 (3.4791)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9051 (0.8802)  time: 0.2341  data: 0.0113  max mem: 9147
Epoch: [42]  [ 400/1251]  eta: 0:03:06  lr: 0.003938  min_lr: 0.003938  loss: 2.8895 (3.5320)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6401 (0.8405)  time: 0.2064  data: 0.0006  max mem: 9147
Epoch: [42]  [ 600/1251]  eta: 0:02:22  lr: 0.003937  min_lr: 0.003937  loss: 3.0296 (3.5099)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9304 (0.8735)  time: 0.2205  data: 0.0006  max mem: 9147
Epoch: [42]  [ 800/1251]  eta: 0:01:38  lr: 0.003936  min_lr: 0.003936  loss: 3.1110 (3.5087)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8577 (0.8631)  time: 0.2295  data: 0.0251  max mem: 9147
Epoch: [42]  [1000/1251]  eta: 0:00:54  lr: 0.003935  min_lr: 0.003935  loss: 2.8246 (3.4977)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9270 (0.8657)  time: 0.2093  data: 0.0009  max mem: 9147
Epoch: [42]  [1200/1251]  eta: 0:00:11  lr: 0.003934  min_lr: 0.003934  loss: 3.1752 (3.4886)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8125 (0.8568)  time: 0.2198  data: 0.0006  max mem: 9147
Epoch: [42]  [1250/1251]  eta: 0:00:00  lr: 0.003934  min_lr: 0.003934  loss: 2.8728 (3.4814)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6963 (0.8496)  time: 0.1462  data: 0.0012  max mem: 9147
Epoch: [42] Total time: 0:04:31 (0.2171 s / it)
Averaged stats: lr: 0.003934  min_lr: 0.003934  loss: 2.8728 (3.5084)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6963 (0.8496)
Test:  [ 0/25]  eta: 0:02:25  loss: 1.1232 (1.1232)  acc1: 79.6000 (79.6000)  acc5: 94.4000 (94.4000)  time: 5.8333  data: 5.7529  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.2133 (1.2524)  acc1: 76.0000 (74.2546)  acc5: 93.2000 (92.6909)  time: 0.7150  data: 0.6397  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.5797 (1.4973)  acc1: 66.4000 (68.6476)  acc5: 87.2000 (88.8381)  time: 0.1849  data: 0.1117  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.6028 (1.5064)  acc1: 64.4000 (68.1920)  acc5: 85.6000 (88.7520)  time: 0.1830  data: 0.1108  max mem: 9147
Test: Total time: 0:00:09 (0.3955 s / it)
* Acc@1 67.672 Acc@5 88.446 loss 1.520
Accuracy of the model on the 50000 test images: 67.7%
Max accuracy: 68.06%
Epoch: [43]  [   0/1251]  eta: 1:03:37  lr: 0.003934  min_lr: 0.003934  loss: 2.5913 (2.5913)  weight_decay: 0.0500 (0.0500)  time: 3.0512  data: 2.8186  max mem: 9147
Epoch: [43]  [ 200/1251]  eta: 0:03:49  lr: 0.003933  min_lr: 0.003933  loss: 3.1084 (3.4405)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7082 (0.7732)  time: 0.2000  data: 0.0006  max mem: 9147
Epoch: [43]  [ 400/1251]  eta: 0:03:05  lr: 0.003932  min_lr: 0.003932  loss: 3.1935 (3.4640)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8096 (0.8095)  time: 0.2145  data: 0.0007  max mem: 9147
Epoch: [43]  [ 600/1251]  eta: 0:02:21  lr: 0.003931  min_lr: 0.003931  loss: 3.7350 (3.4524)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7475 (0.8048)  time: 0.1991  data: 0.0010  max mem: 9147
Epoch: [43]  [ 800/1251]  eta: 0:01:37  lr: 0.003930  min_lr: 0.003930  loss: 2.9929 (3.4771)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6794 (0.7904)  time: 0.2196  data: 0.0006  max mem: 9147
Epoch: [43]  [1000/1251]  eta: 0:00:53  lr: 0.003929  min_lr: 0.003929  loss: 2.8837 (3.4910)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7560 (0.7917)  time: 0.2090  data: 0.0006  max mem: 9147
Epoch: [43]  [1200/1251]  eta: 0:00:10  lr: 0.003928  min_lr: 0.003928  loss: 2.9359 (3.4845)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7065 (0.7822)  time: 0.2301  data: 0.0127  max mem: 9147
Epoch: [43]  [1250/1251]  eta: 0:00:00  lr: 0.003928  min_lr: 0.003928  loss: 3.0402 (3.4835)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7296 (0.7825)  time: 0.1505  data: 0.0030  max mem: 9147
Epoch: [43] Total time: 0:04:26 (0.2131 s / it)
Averaged stats: lr: 0.003928  min_lr: 0.003928  loss: 3.0402 (3.4894)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7296 (0.7825)
Test:  [ 0/25]  eta: 0:02:08  loss: 1.1547 (1.1547)  acc1: 77.6000 (77.6000)  acc5: 93.2000 (93.2000)  time: 5.1562  data: 5.0759  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.1484 (1.1793)  acc1: 76.0000 (74.4727)  acc5: 93.2000 (92.9455)  time: 0.6734  data: 0.5819  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4355 (1.4426)  acc1: 64.0000 (68.4762)  acc5: 89.6000 (88.9143)  time: 0.1960  data: 0.1137  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5403 (1.4492)  acc1: 64.0000 (68.1440)  acc5: 85.6000 (88.8640)  time: 0.2173  data: 0.1400  max mem: 9147
Test: Total time: 0:00:09 (0.3983 s / it)
* Acc@1 68.430 Acc@5 88.898 loss 1.448
Accuracy of the model on the 50000 test images: 68.4%
Max accuracy: 68.43%
Epoch: [44]  [   0/1251]  eta: 1:03:05  lr: 0.003928  min_lr: 0.003928  loss: 3.5583 (3.5583)  weight_decay: 0.0500 (0.0500)  time: 3.0257  data: 2.8249  max mem: 9147
Epoch: [44]  [ 200/1251]  eta: 0:03:57  lr: 0.003927  min_lr: 0.003927  loss: 2.8966 (3.3998)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7409 (0.7584)  time: 0.2263  data: 0.0007  max mem: 9147
Epoch: [44]  [ 400/1251]  eta: 0:03:07  lr: 0.003926  min_lr: 0.003926  loss: 3.0963 (3.4648)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7981 (0.8082)  time: 0.2352  data: 0.0007  max mem: 9147
Epoch: [44]  [ 600/1251]  eta: 0:02:19  lr: 0.003925  min_lr: 0.003925  loss: 2.9681 (3.4850)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9595 (0.8133)  time: 0.2056  data: 0.0008  max mem: 9147
Epoch: [44]  [ 800/1251]  eta: 0:01:36  lr: 0.003924  min_lr: 0.003924  loss: 2.7893 (3.4952)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6560 (0.7896)  time: 0.2244  data: 0.0007  max mem: 9147
Epoch: [44]  [1000/1251]  eta: 0:00:53  lr: 0.003923  min_lr: 0.003923  loss: 3.1414 (3.4835)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8835 (0.7940)  time: 0.2009  data: 0.0007  max mem: 9147
Epoch: [44]  [1200/1251]  eta: 0:00:10  lr: 0.003922  min_lr: 0.003922  loss: 2.9205 (3.4866)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7367 (0.8035)  time: 0.2339  data: 0.0007  max mem: 9147
Epoch: [44]  [1250/1251]  eta: 0:00:00  lr: 0.003922  min_lr: 0.003922  loss: 2.8857 (3.4884)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6112 (0.7955)  time: 0.1418  data: 0.0015  max mem: 9147
Epoch: [44] Total time: 0:04:28 (0.2144 s / it)
Averaged stats: lr: 0.003922  min_lr: 0.003922  loss: 2.8857 (3.4626)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6112 (0.7955)
Test:  [ 0/25]  eta: 0:02:20  loss: 1.3089 (1.3089)  acc1: 78.8000 (78.8000)  acc5: 93.2000 (93.2000)  time: 5.6381  data: 5.5577  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.3089 (1.3483)  acc1: 75.2000 (74.4364)  acc5: 92.0000 (92.4364)  time: 0.7697  data: 0.6916  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.5618 (1.5489)  acc1: 66.4000 (68.8381)  acc5: 88.4000 (88.9143)  time: 0.2109  data: 0.1362  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.6688 (1.5628)  acc1: 64.8000 (68.3200)  acc5: 86.8000 (88.6240)  time: 0.2081  data: 0.1361  max mem: 9147
Test: Total time: 0:00:10 (0.4087 s / it)
* Acc@1 68.100 Acc@5 88.490 loss 1.574
Accuracy of the model on the 50000 test images: 68.1%
Max accuracy: 68.43%
Epoch: [45]  [   0/1251]  eta: 1:00:37  lr: 0.003922  min_lr: 0.003922  loss: 2.6252 (2.6252)  weight_decay: 0.0500 (0.0500)  time: 2.9077  data: 2.4087  max mem: 9147
Epoch: [45]  [ 200/1251]  eta: 0:03:35  lr: 0.003921  min_lr: 0.003921  loss: 3.1899 (3.4446)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8904 (0.9296)  time: 0.1800  data: 0.0005  max mem: 9147
Epoch: [45]  [ 400/1251]  eta: 0:02:48  lr: 0.003920  min_lr: 0.003920  loss: 3.1813 (3.4740)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6844 (0.8650)  time: 0.2008  data: 0.0007  max mem: 9147
Epoch: [45]  [ 600/1251]  eta: 0:02:12  lr: 0.003919  min_lr: 0.003919  loss: 2.9189 (3.4567)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7669 (0.8243)  time: 0.2051  data: 0.0007  max mem: 9147
Epoch: [45]  [ 800/1251]  eta: 0:01:30  lr: 0.003918  min_lr: 0.003918  loss: 3.1992 (3.4756)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7618 (0.8454)  time: 0.1932  data: 0.0006  max mem: 9147
Epoch: [45]  [1000/1251]  eta: 0:00:50  lr: 0.003917  min_lr: 0.003917  loss: 3.1051 (3.4844)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7293 (0.8397)  time: 0.2046  data: 0.0007  max mem: 9147
Epoch: [45]  [1200/1251]  eta: 0:00:10  lr: 0.003916  min_lr: 0.003916  loss: 2.8382 (3.4906)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7788 (0.8468)  time: 0.2060  data: 0.0006  max mem: 9147
Epoch: [45]  [1250/1251]  eta: 0:00:00  lr: 0.003916  min_lr: 0.003916  loss: 3.2563 (3.4977)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8236 (0.8500)  time: 0.1486  data: 0.0019  max mem: 9147
Epoch: [45] Total time: 0:04:13 (0.2023 s / it)
Averaged stats: lr: 0.003916  min_lr: 0.003916  loss: 3.2563 (3.4888)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8236 (0.8500)
Test:  [ 0/25]  eta: 0:01:48  loss: 1.0404 (1.0404)  acc1: 76.8000 (76.8000)  acc5: 93.6000 (93.6000)  time: 4.3498  data: 4.2648  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 1.0673 (1.1921)  acc1: 75.2000 (73.5636)  acc5: 93.6000 (93.0546)  time: 0.6578  data: 0.5798  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.5249 (1.4684)  acc1: 65.6000 (68.4571)  acc5: 88.8000 (88.5905)  time: 0.2425  data: 0.1658  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.6299 (1.4768)  acc1: 64.8000 (68.1600)  acc5: 84.4000 (88.4960)  time: 0.2373  data: 0.1619  max mem: 9147
Test: Total time: 0:00:09 (0.3948 s / it)
* Acc@1 68.248 Acc@5 88.624 loss 1.467
Accuracy of the model on the 50000 test images: 68.2%
Max accuracy: 68.43%
Epoch: [46]  [   0/1251]  eta: 0:57:57  lr: 0.003916  min_lr: 0.003916  loss: 2.3878 (2.3878)  weight_decay: 0.0500 (0.0500)  time: 2.7794  data: 2.4214  max mem: 9147
Epoch: [46]  [ 200/1251]  eta: 0:03:55  lr: 0.003914  min_lr: 0.003914  loss: 4.2545 (3.5259)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7386 (0.8198)  time: 0.1951  data: 0.0005  max mem: 9147
Epoch: [46]  [ 400/1251]  eta: 0:03:05  lr: 0.003913  min_lr: 0.003913  loss: 2.8836 (3.5328)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9342 (0.8411)  time: 0.2149  data: 0.0007  max mem: 9147
Epoch: [46]  [ 600/1251]  eta: 0:02:21  lr: 0.003912  min_lr: 0.003912  loss: 3.2760 (3.5149)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6632 (0.8165)  time: 0.2004  data: 0.0007  max mem: 9147
Epoch: [46]  [ 800/1251]  eta: 0:01:37  lr: 0.003911  min_lr: 0.003911  loss: 2.9086 (3.5037)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7173 (0.8142)  time: 0.1880  data: 0.0012  max mem: 9147
Epoch: [46]  [1000/1251]  eta: 0:00:53  lr: 0.003910  min_lr: 0.003910  loss: 2.9294 (3.4866)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8243 (0.8115)  time: 0.2000  data: 0.0006  max mem: 9147
Epoch: [46]  [1200/1251]  eta: 0:00:10  lr: 0.003909  min_lr: 0.003909  loss: 2.8631 (3.4819)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6469 (0.8014)  time: 0.2259  data: 0.0007  max mem: 9147
Epoch: [46]  [1250/1251]  eta: 0:00:00  lr: 0.003909  min_lr: 0.003909  loss: 3.4554 (3.4826)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6801 (0.7974)  time: 0.1383  data: 0.0010  max mem: 9147
Epoch: [46] Total time: 0:04:25 (0.2123 s / it)
Averaged stats: lr: 0.003909  min_lr: 0.003909  loss: 3.4554 (3.4690)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6801 (0.7974)
Test:  [ 0/25]  eta: 0:02:18  loss: 1.0113 (1.0113)  acc1: 80.4000 (80.4000)  acc5: 94.4000 (94.4000)  time: 5.5304  data: 5.4320  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1921 (1.2500)  acc1: 76.8000 (74.3273)  acc5: 94.0000 (92.7273)  time: 0.7460  data: 0.6667  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.5755 (1.5283)  acc1: 65.2000 (68.6095)  acc5: 88.0000 (88.9524)  time: 0.2126  data: 0.1383  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.6576 (1.5392)  acc1: 65.2000 (68.3360)  acc5: 85.6000 (88.7520)  time: 0.2110  data: 0.1382  max mem: 9147
Test: Total time: 0:00:10 (0.4054 s / it)
* Acc@1 68.436 Acc@5 88.956 loss 1.531
Accuracy of the model on the 50000 test images: 68.4%
Max accuracy: 68.44%
Epoch: [47]  [   0/1251]  eta: 0:59:05  lr: 0.003909  min_lr: 0.003909  loss: 4.2058 (4.2058)  weight_decay: 0.0500 (0.0500)  time: 2.8341  data: 2.6300  max mem: 9147
Epoch: [47]  [ 200/1251]  eta: 0:03:55  lr: 0.003908  min_lr: 0.003908  loss: 2.9163 (3.4450)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8236 (0.8520)  time: 0.2161  data: 0.0006  max mem: 9147
Epoch: [47]  [ 400/1251]  eta: 0:03:06  lr: 0.003907  min_lr: 0.003907  loss: 2.8879 (3.4416)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7668 (0.8038)  time: 0.2057  data: 0.0007  max mem: 9147
Epoch: [47]  [ 600/1251]  eta: 0:02:21  lr: 0.003906  min_lr: 0.003906  loss: 3.3443 (3.4461)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7621 (0.7962)  time: 0.2147  data: 0.0007  max mem: 9147
Epoch: [47]  [ 800/1251]  eta: 0:01:37  lr: 0.003905  min_lr: 0.003905  loss: 2.8665 (3.4605)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8093 (0.7938)  time: 0.2098  data: 0.0021  max mem: 9147
Epoch: [47]  [1000/1251]  eta: 0:00:54  lr: 0.003904  min_lr: 0.003904  loss: 2.9121 (3.4556)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6421 (0.7918)  time: 0.2146  data: 0.0006  max mem: 9147
Epoch: [47]  [1200/1251]  eta: 0:00:10  lr: 0.003902  min_lr: 0.003902  loss: 4.1179 (3.4582)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9416 (0.7971)  time: 0.1734  data: 0.0005  max mem: 9147
Epoch: [47]  [1250/1251]  eta: 0:00:00  lr: 0.003902  min_lr: 0.003902  loss: 2.8916 (3.4600)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6917 (0.7950)  time: 0.1390  data: 0.0006  max mem: 9147
Epoch: [47] Total time: 0:04:23 (0.2110 s / it)
Averaged stats: lr: 0.003902  min_lr: 0.003902  loss: 2.8916 (3.4751)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6917 (0.7950)
Test:  [ 0/25]  eta: 0:02:21  loss: 1.0351 (1.0351)  acc1: 81.2000 (81.2000)  acc5: 93.2000 (93.2000)  time: 5.6472  data: 5.5552  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.3018 (1.3094)  acc1: 72.0000 (73.2364)  acc5: 93.2000 (92.1818)  time: 0.7533  data: 0.6745  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.6803 (1.5581)  acc1: 64.4000 (67.9048)  acc5: 87.6000 (88.4000)  time: 0.2079  data: 0.1335  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.7318 (1.5768)  acc1: 63.6000 (67.7280)  acc5: 86.0000 (88.2720)  time: 0.2064  data: 0.1334  max mem: 9147
Test: Total time: 0:00:10 (0.4065 s / it)
* Acc@1 68.100 Acc@5 88.758 loss 1.554
Accuracy of the model on the 50000 test images: 68.1%
Max accuracy: 68.44%
Epoch: [48]  [   0/1251]  eta: 1:04:13  lr: 0.003902  min_lr: 0.003902  loss: 4.3376 (4.3376)  weight_decay: 0.0500 (0.0500)  time: 3.0800  data: 2.2442  max mem: 9147
Epoch: [48]  [ 200/1251]  eta: 0:03:58  lr: 0.003901  min_lr: 0.003901  loss: 2.7850 (3.3673)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6477 (0.8669)  time: 0.2254  data: 0.0006  max mem: 9147
Epoch: [48]  [ 400/1251]  eta: 0:03:04  lr: 0.003900  min_lr: 0.003900  loss: 2.8021 (3.4068)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7942 (0.8395)  time: 0.2041  data: 0.0006  max mem: 9147
Epoch: [48]  [ 600/1251]  eta: 0:02:21  lr: 0.003899  min_lr: 0.003899  loss: 2.9341 (3.4274)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5989 (0.8037)  time: 0.2300  data: 0.0007  max mem: 9147
Epoch: [48]  [ 800/1251]  eta: 0:01:37  lr: 0.003898  min_lr: 0.003898  loss: 2.9188 (3.4602)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6865 (0.8107)  time: 0.1859  data: 0.0005  max mem: 9147
Epoch: [48]  [1000/1251]  eta: 0:00:52  lr: 0.003897  min_lr: 0.003897  loss: 3.3201 (3.4550)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6293 (0.8098)  time: 0.1926  data: 0.0005  max mem: 9147
Epoch: [48]  [1200/1251]  eta: 0:00:10  lr: 0.003895  min_lr: 0.003895  loss: 2.9611 (3.4510)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7932 (0.8031)  time: 0.1908  data: 0.0005  max mem: 9147
Epoch: [48]  [1250/1251]  eta: 0:00:00  lr: 0.003895  min_lr: 0.003895  loss: 4.1041 (3.4519)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8444 (0.8055)  time: 0.1376  data: 0.0007  max mem: 9147
Epoch: [48] Total time: 0:04:19 (0.2072 s / it)
Averaged stats: lr: 0.003895  min_lr: 0.003895  loss: 4.1041 (3.4380)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8444 (0.8055)
Test:  [ 0/25]  eta: 0:02:26  loss: 1.3028 (1.3028)  acc1: 79.6000 (79.6000)  acc5: 94.0000 (94.0000)  time: 5.8491  data: 5.7688  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.2465 (1.2883)  acc1: 76.8000 (74.2545)  acc5: 94.0000 (93.3455)  time: 0.7597  data: 0.6838  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.5412 (1.5300)  acc1: 65.6000 (68.9714)  acc5: 87.6000 (89.1619)  time: 0.2038  data: 0.1308  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.7069 (1.5435)  acc1: 63.6000 (68.6400)  acc5: 86.0000 (88.9760)  time: 0.2017  data: 0.1307  max mem: 9147
Test: Total time: 0:00:10 (0.4109 s / it)
* Acc@1 68.756 Acc@5 89.090 loss 1.542
Accuracy of the model on the 50000 test images: 68.8%
Max accuracy: 68.76%
Epoch: [49]  [   0/1251]  eta: 0:57:06  lr: 0.003895  min_lr: 0.003895  loss: 2.6497 (2.6497)  weight_decay: 0.0500 (0.0500)  time: 2.7390  data: 1.8291  max mem: 9147
Epoch: [49]  [ 200/1251]  eta: 0:03:48  lr: 0.003894  min_lr: 0.003894  loss: 4.0521 (3.4683)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8338 (nan)  time: 0.1902  data: 0.0005  max mem: 9147
Epoch: [49]  [ 400/1251]  eta: 0:02:55  lr: 0.003893  min_lr: 0.003893  loss: 3.3052 (3.4858)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8113 (nan)  time: 0.2002  data: 0.0006  max mem: 9147
Epoch: [49]  [ 600/1251]  eta: 0:02:16  lr: 0.003892  min_lr: 0.003892  loss: 3.3093 (3.4811)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7007 (nan)  time: 0.2150  data: 0.0007  max mem: 9147
Epoch: [49]  [ 800/1251]  eta: 0:01:34  lr: 0.003890  min_lr: 0.003890  loss: 2.8803 (3.4762)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8188 (nan)  time: 0.2008  data: 0.0005  max mem: 9147
Epoch: [49]  [1000/1251]  eta: 0:00:52  lr: 0.003889  min_lr: 0.003889  loss: 3.1198 (3.4757)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7500 (nan)  time: 0.2203  data: 0.0007  max mem: 9147
Epoch: [49]  [1200/1251]  eta: 0:00:10  lr: 0.003888  min_lr: 0.003888  loss: 2.9278 (3.4746)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7247 (nan)  time: 0.2101  data: 0.0006  max mem: 9147
Epoch: [49]  [1250/1251]  eta: 0:00:00  lr: 0.003888  min_lr: 0.003888  loss: 3.0298 (3.4730)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6759 (nan)  time: 0.1416  data: 0.0012  max mem: 9147
Epoch: [49] Total time: 0:04:25 (0.2118 s / it)
Averaged stats: lr: 0.003888  min_lr: 0.003888  loss: 3.0298 (3.4493)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6759 (nan)
Test:  [ 0/25]  eta: 0:02:22  loss: 1.1542 (1.1542)  acc1: 77.6000 (77.6000)  acc5: 93.2000 (93.2000)  time: 5.6994  data: 5.6107  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1960 (1.2340)  acc1: 75.2000 (74.2909)  acc5: 93.2000 (92.7273)  time: 0.7517  data: 0.6649  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.5462 (1.4743)  acc1: 65.2000 (69.1429)  acc5: 88.0000 (89.3714)  time: 0.2238  data: 0.1452  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5977 (1.4962)  acc1: 65.6000 (69.0880)  acc5: 86.8000 (89.1520)  time: 0.2204  data: 0.1452  max mem: 9147
Test: Total time: 0:00:10 (0.4208 s / it)
* Acc@1 69.032 Acc@5 89.156 loss 1.478
Accuracy of the model on the 50000 test images: 69.0%
Max accuracy: 69.03%
Epoch: [50]  [   0/1251]  eta: 1:03:55  lr: 0.003888  min_lr: 0.003888  loss: 2.8171 (2.8171)  weight_decay: 0.0500 (0.0500)  time: 3.0662  data: 2.8312  max mem: 9147
Epoch: [50]  [ 200/1251]  eta: 0:03:47  lr: 0.003887  min_lr: 0.003887  loss: 2.9379 (3.3616)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7073 (0.9087)  time: 0.2248  data: 0.0007  max mem: 9147
Epoch: [50]  [ 400/1251]  eta: 0:03:04  lr: 0.003885  min_lr: 0.003885  loss: 2.8917 (3.3611)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7986 (0.8805)  time: 0.2034  data: 0.0006  max mem: 9147
Epoch: [50]  [ 600/1251]  eta: 0:02:20  lr: 0.003884  min_lr: 0.003884  loss: 3.2380 (3.3965)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7511 (0.8657)  time: 0.2187  data: 0.0008  max mem: 9147
Epoch: [50]  [ 800/1251]  eta: 0:01:36  lr: 0.003883  min_lr: 0.003883  loss: 3.0999 (3.3932)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8524 (0.8495)  time: 0.2261  data: 0.0006  max mem: 9147
Epoch: [50]  [1000/1251]  eta: 0:00:53  lr: 0.003882  min_lr: 0.003882  loss: 3.1161 (3.4218)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7166 (0.8557)  time: 0.2022  data: 0.0008  max mem: 9147
Epoch: [50]  [1200/1251]  eta: 0:00:10  lr: 0.003881  min_lr: 0.003881  loss: 2.9548 (3.4351)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8068 (0.8389)  time: 0.2114  data: 0.0007  max mem: 9147
Epoch: [50]  [1250/1251]  eta: 0:00:00  lr: 0.003880  min_lr: 0.003880  loss: 2.8757 (3.4329)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8060 (0.8484)  time: 0.1635  data: 0.0011  max mem: 9147
Epoch: [50] Total time: 0:04:26 (0.2133 s / it)
Averaged stats: lr: 0.003880  min_lr: 0.003880  loss: 2.8757 (3.4430)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8060 (0.8484)
Test:  [ 0/25]  eta: 0:02:14  loss: 1.2558 (1.2558)  acc1: 80.4000 (80.4000)  acc5: 94.4000 (94.4000)  time: 5.3688  data: 5.2726  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.2558 (1.2626)  acc1: 76.4000 (73.9636)  acc5: 94.4000 (93.2000)  time: 0.6934  data: 0.5959  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.5155 (1.4726)  acc1: 65.2000 (69.4667)  acc5: 89.6000 (89.4857)  time: 0.1962  data: 0.1128  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5896 (1.4785)  acc1: 65.2000 (68.8800)  acc5: 86.8000 (89.3920)  time: 0.2032  data: 0.1258  max mem: 9147
Test: Total time: 0:00:09 (0.3986 s / it)
* Acc@1 68.998 Acc@5 89.152 loss 1.455
Accuracy of the model on the 50000 test images: 69.0%
Max accuracy: 69.03%
Epoch: [51]  [   0/1251]  eta: 1:03:34  lr: 0.003880  min_lr: 0.003880  loss: 4.5895 (4.5895)  weight_decay: 0.0500 (0.0500)  time: 3.0488  data: 2.4125  max mem: 9147
Epoch: [51]  [ 200/1251]  eta: 0:03:50  lr: 0.003879  min_lr: 0.003879  loss: 2.9938 (3.5330)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6876 (0.9007)  time: 0.1735  data: 0.0006  max mem: 9147
Epoch: [51]  [ 400/1251]  eta: 0:02:55  lr: 0.003878  min_lr: 0.003878  loss: 3.5676 (3.5326)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7315 (0.8453)  time: 0.1997  data: 0.0008  max mem: 9147
Epoch: [51]  [ 600/1251]  eta: 0:02:15  lr: 0.003877  min_lr: 0.003877  loss: 2.9683 (3.5261)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7156 (0.8369)  time: 0.2005  data: 0.0006  max mem: 9147
Epoch: [51]  [ 800/1251]  eta: 0:01:34  lr: 0.003875  min_lr: 0.003875  loss: 2.7805 (3.4855)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6117 (0.8376)  time: 0.2107  data: 0.0007  max mem: 9147
Epoch: [51]  [1000/1251]  eta: 0:00:52  lr: 0.003874  min_lr: 0.003874  loss: 3.7501 (3.4836)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7791 (0.8349)  time: 0.2005  data: 0.0007  max mem: 9147
Epoch: [51]  [1200/1251]  eta: 0:00:10  lr: 0.003873  min_lr: 0.003873  loss: 3.1042 (3.4644)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6847 (0.8240)  time: 0.1950  data: 0.0006  max mem: 9147
Epoch: [51]  [1250/1251]  eta: 0:00:00  lr: 0.003873  min_lr: 0.003873  loss: 3.0265 (3.4606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8694 (0.8300)  time: 0.1393  data: 0.0017  max mem: 9147
Epoch: [51] Total time: 0:04:21 (0.2093 s / it)
Averaged stats: lr: 0.003873  min_lr: 0.003873  loss: 3.0265 (3.4372)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8694 (0.8300)
Test:  [ 0/25]  eta: 0:02:21  loss: 1.0078 (1.0078)  acc1: 80.4000 (80.4000)  acc5: 95.2000 (95.2000)  time: 5.6545  data: 5.5736  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0838 (1.1601)  acc1: 74.0000 (74.6545)  acc5: 94.4000 (93.2364)  time: 0.7378  data: 0.6628  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4953 (1.4179)  acc1: 65.2000 (69.3143)  acc5: 88.0000 (89.6000)  time: 0.2001  data: 0.1245  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5918 (1.4392)  acc1: 64.8000 (68.9120)  acc5: 87.6000 (89.3120)  time: 0.2017  data: 0.1270  max mem: 9147
Test: Total time: 0:00:10 (0.4028 s / it)
* Acc@1 69.144 Acc@5 89.118 loss 1.449
Accuracy of the model on the 50000 test images: 69.1%
Max accuracy: 69.14%
Epoch: [52]  [   0/1251]  eta: 1:08:58  lr: 0.003873  min_lr: 0.003873  loss: 2.7513 (2.7513)  weight_decay: 0.0500 (0.0500)  time: 3.3085  data: 3.0864  max mem: 9147
Epoch: [52]  [ 200/1251]  eta: 0:03:34  lr: 0.003871  min_lr: 0.003871  loss: 2.8921 (3.3553)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7014 (0.7499)  time: 0.1826  data: 0.0012  max mem: 9147
Epoch: [52]  [ 400/1251]  eta: 0:02:48  lr: 0.003870  min_lr: 0.003870  loss: 3.0690 (3.4138)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7736 (0.7908)  time: 0.1907  data: 0.0005  max mem: 9147
Epoch: [52]  [ 600/1251]  eta: 0:02:10  lr: 0.003869  min_lr: 0.003869  loss: 2.8270 (3.4429)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7438 (0.8044)  time: 0.2007  data: 0.0006  max mem: 9147
Epoch: [52]  [ 800/1251]  eta: 0:01:31  lr: 0.003867  min_lr: 0.003867  loss: 2.8160 (3.4339)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7633 (0.8260)  time: 0.2044  data: 0.0006  max mem: 9147
Epoch: [52]  [1000/1251]  eta: 0:00:51  lr: 0.003866  min_lr: 0.003866  loss: 2.9485 (3.4104)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8646 (0.8229)  time: 0.2340  data: 0.0007  max mem: 9147
Epoch: [52]  [1200/1251]  eta: 0:00:10  lr: 0.003865  min_lr: 0.003865  loss: 3.3285 (3.4386)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7383 (0.8155)  time: 0.2152  data: 0.0006  max mem: 9147
Epoch: [52]  [1250/1251]  eta: 0:00:00  lr: 0.003865  min_lr: 0.003865  loss: 2.7434 (3.4285)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7885 (0.8152)  time: 0.1426  data: 0.0017  max mem: 9147
Epoch: [52] Total time: 0:04:17 (0.2057 s / it)
Averaged stats: lr: 0.003865  min_lr: 0.003865  loss: 2.7434 (3.4315)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7885 (0.8152)
Test:  [ 0/25]  eta: 0:02:15  loss: 1.0563 (1.0563)  acc1: 83.2000 (83.2000)  acc5: 94.8000 (94.8000)  time: 5.4291  data: 5.3294  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.2210 (1.3035)  acc1: 74.8000 (74.4364)  acc5: 93.2000 (92.5455)  time: 0.7062  data: 0.6275  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.5339 (1.5290)  acc1: 66.0000 (68.8762)  acc5: 86.8000 (88.8952)  time: 0.2105  data: 0.1372  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.7111 (1.5379)  acc1: 64.8000 (68.5600)  acc5: 85.6000 (88.6560)  time: 0.2092  data: 0.1372  max mem: 9147
Test: Total time: 0:00:10 (0.4001 s / it)
* Acc@1 68.730 Acc@5 89.074 loss 1.540
Accuracy of the model on the 50000 test images: 68.7%
Max accuracy: 69.14%
Epoch: [53]  [   0/1251]  eta: 1:06:38  lr: 0.003865  min_lr: 0.003865  loss: 2.7886 (2.7886)  weight_decay: 0.0500 (0.0500)  time: 3.1962  data: 2.3157  max mem: 9147
Epoch: [53]  [ 200/1251]  eta: 0:03:53  lr: 0.003863  min_lr: 0.003863  loss: 3.7191 (3.4738)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6936 (0.8066)  time: 0.1992  data: 0.0008  max mem: 9147
Epoch: [53]  [ 400/1251]  eta: 0:03:05  lr: 0.003862  min_lr: 0.003862  loss: 3.2431 (3.4645)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7802 (0.7953)  time: 0.2124  data: 0.0006  max mem: 9147
Epoch: [53]  [ 600/1251]  eta: 0:02:21  lr: 0.003861  min_lr: 0.003861  loss: 2.8788 (3.4186)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7465 (0.7844)  time: 0.2201  data: 0.0120  max mem: 9147
Epoch: [53]  [ 800/1251]  eta: 0:01:37  lr: 0.003859  min_lr: 0.003859  loss: 2.9135 (3.3844)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7563 (0.7914)  time: 0.2260  data: 0.0096  max mem: 9147
Epoch: [53]  [1000/1251]  eta: 0:00:53  lr: 0.003858  min_lr: 0.003858  loss: 3.9317 (3.4018)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6515 (0.7856)  time: 0.2264  data: 0.0010  max mem: 9147
Epoch: [53]  [1200/1251]  eta: 0:00:10  lr: 0.003857  min_lr: 0.003857  loss: 2.8076 (3.4139)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6637 (0.7959)  time: 0.2446  data: 0.0008  max mem: 9147
Epoch: [53]  [1250/1251]  eta: 0:00:00  lr: 0.003856  min_lr: 0.003856  loss: 2.7435 (3.4119)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6349 (0.7926)  time: 0.1400  data: 0.0012  max mem: 9147
Epoch: [53] Total time: 0:04:28 (0.2149 s / it)
Averaged stats: lr: 0.003856  min_lr: 0.003856  loss: 2.7435 (3.4137)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6349 (0.7926)
Test:  [ 0/25]  eta: 0:01:40  loss: 0.9941 (0.9941)  acc1: 82.4000 (82.4000)  acc5: 94.4000 (94.4000)  time: 4.0371  data: 3.9560  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.1224 (1.1723)  acc1: 76.4000 (75.8909)  acc5: 93.6000 (92.7636)  time: 0.6964  data: 0.6170  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4772 (1.4140)  acc1: 66.0000 (70.6095)  acc5: 87.6000 (89.2952)  time: 0.2744  data: 0.1994  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.6145 (1.4321)  acc1: 65.2000 (69.9680)  acc5: 86.4000 (88.9120)  time: 0.2229  data: 0.1509  max mem: 9147
Test: Total time: 0:00:10 (0.4063 s / it)
* Acc@1 69.576 Acc@5 89.366 loss 1.420
Accuracy of the model on the 50000 test images: 69.6%
Max accuracy: 69.58%
Epoch: [54]  [   0/1251]  eta: 0:58:53  lr: 0.003856  min_lr: 0.003856  loss: 4.2475 (4.2475)  weight_decay: 0.0500 (0.0500)  time: 2.8246  data: 2.6277  max mem: 9147
Epoch: [54]  [ 200/1251]  eta: 0:03:39  lr: 0.003855  min_lr: 0.003855  loss: 2.7343 (3.4458)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7450 (0.8485)  time: 0.1859  data: 0.0006  max mem: 9147
Epoch: [54]  [ 400/1251]  eta: 0:02:50  lr: 0.003854  min_lr: 0.003854  loss: 2.8705 (3.4128)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7564 (0.8426)  time: 0.1886  data: 0.0006  max mem: 9147
Epoch: [54]  [ 600/1251]  eta: 0:02:11  lr: 0.003852  min_lr: 0.003852  loss: 3.1717 (3.4177)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7159 (0.8408)  time: 0.2449  data: 0.0007  max mem: 9147
Epoch: [54]  [ 800/1251]  eta: 0:01:32  lr: 0.003851  min_lr: 0.003851  loss: 2.9858 (3.4232)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6837 (0.8282)  time: 0.2021  data: 0.0012  max mem: 9147
Epoch: [54]  [1000/1251]  eta: 0:00:51  lr: 0.003849  min_lr: 0.003849  loss: 3.4671 (3.3991)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6524 (0.8334)  time: 0.2497  data: 0.0007  max mem: 9147
Epoch: [54]  [1200/1251]  eta: 0:00:10  lr: 0.003848  min_lr: 0.003848  loss: 3.2287 (3.4050)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7218 (0.8216)  time: 0.2243  data: 0.0007  max mem: 9147
Epoch: [54]  [1250/1251]  eta: 0:00:00  lr: 0.003848  min_lr: 0.003848  loss: 2.9804 (3.4106)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7218 (0.8241)  time: 0.1451  data: 0.0013  max mem: 9147
Epoch: [54] Total time: 0:04:18 (0.2070 s / it)
Averaged stats: lr: 0.003848  min_lr: 0.003848  loss: 2.9804 (3.4067)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7218 (0.8241)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.9991 (0.9991)  acc1: 80.4000 (80.4000)  acc5: 96.0000 (96.0000)  time: 5.5379  data: 5.4538  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.2753 (1.2990)  acc1: 76.0000 (74.6909)  acc5: 92.8000 (93.1273)  time: 0.7667  data: 0.6851  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.5944 (1.4860)  acc1: 65.6000 (69.6191)  acc5: 89.6000 (89.3333)  time: 0.2233  data: 0.1469  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5944 (1.4913)  acc1: 65.6000 (69.2320)  acc5: 86.0000 (89.2160)  time: 0.2275  data: 0.1555  max mem: 9147
Test: Total time: 0:00:10 (0.4219 s / it)
* Acc@1 69.060 Acc@5 89.356 loss 1.483
Accuracy of the model on the 50000 test images: 69.1%
Max accuracy: 69.58%
Epoch: [55]  [   0/1251]  eta: 1:04:48  lr: 0.003848  min_lr: 0.003848  loss: 2.7681 (2.7681)  weight_decay: 0.0500 (0.0500)  time: 3.1082  data: 2.7220  max mem: 9147
Epoch: [55]  [ 200/1251]  eta: 0:03:55  lr: 0.003846  min_lr: 0.003846  loss: 3.1691 (3.2949)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7375 (0.7713)  time: 0.2006  data: 0.0021  max mem: 9147
Epoch: [55]  [ 400/1251]  eta: 0:03:03  lr: 0.003845  min_lr: 0.003845  loss: 2.9837 (3.3331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7497 (0.7482)  time: 0.2010  data: 0.0007  max mem: 9147
Epoch: [55]  [ 600/1251]  eta: 0:02:17  lr: 0.003844  min_lr: 0.003844  loss: 2.7849 (3.3521)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6675 (inf)  time: 0.1967  data: 0.0006  max mem: 9147
Epoch: [55]  [ 800/1251]  eta: 0:01:33  lr: 0.003842  min_lr: 0.003842  loss: 2.9992 (3.3860)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6938 (inf)  time: 0.2004  data: 0.0006  max mem: 9147
Epoch: [55]  [1000/1251]  eta: 0:00:52  lr: 0.003841  min_lr: 0.003841  loss: 3.0009 (3.3873)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6444 (inf)  time: 0.2248  data: 0.0007  max mem: 9147
Epoch: [55]  [1200/1251]  eta: 0:00:10  lr: 0.003839  min_lr: 0.003839  loss: 3.5326 (3.4011)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8080 (inf)  time: 0.1905  data: 0.0010  max mem: 9147
Epoch: [55]  [1250/1251]  eta: 0:00:00  lr: 0.003839  min_lr: 0.003839  loss: 3.4845 (3.4017)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7066 (inf)  time: 0.1388  data: 0.0011  max mem: 9147
Epoch: [55] Total time: 0:04:18 (0.2065 s / it)
Averaged stats: lr: 0.003839  min_lr: 0.003839  loss: 3.4845 (3.4076)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7066 (inf)
Test:  [ 0/25]  eta: 0:02:15  loss: 1.0281 (1.0281)  acc1: 81.6000 (81.6000)  acc5: 94.4000 (94.4000)  time: 5.4256  data: 5.3266  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.2540 (1.2585)  acc1: 76.4000 (75.2000)  acc5: 93.6000 (92.8000)  time: 0.7112  data: 0.6352  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.6067 (1.4858)  acc1: 66.0000 (69.4286)  acc5: 86.0000 (89.2191)  time: 0.2009  data: 0.1276  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.6404 (1.4969)  acc1: 66.0000 (69.2960)  acc5: 86.0000 (89.1360)  time: 0.2159  data: 0.1437  max mem: 9147
Test: Total time: 0:00:10 (0.4084 s / it)
* Acc@1 69.192 Acc@5 89.292 loss 1.483
Accuracy of the model on the 50000 test images: 69.2%
Max accuracy: 69.58%
Epoch: [56]  [   0/1251]  eta: 1:05:33  lr: 0.003839  min_lr: 0.003839  loss: 2.8120 (2.8120)  weight_decay: 0.0500 (0.0500)  time: 3.1442  data: 2.3077  max mem: 9147
Epoch: [56]  [ 200/1251]  eta: 0:03:58  lr: 0.003838  min_lr: 0.003838  loss: 3.0508 (3.4254)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8499 (0.8837)  time: 0.2098  data: 0.0007  max mem: 9147
Epoch: [56]  [ 400/1251]  eta: 0:03:04  lr: 0.003836  min_lr: 0.003836  loss: 2.6339 (3.3761)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6904 (0.8048)  time: 0.2150  data: 0.0067  max mem: 9147
Epoch: [56]  [ 600/1251]  eta: 0:02:20  lr: 0.003835  min_lr: 0.003835  loss: 4.1538 (3.4041)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7006 (0.7887)  time: 0.2197  data: 0.0006  max mem: 9147
Epoch: [56]  [ 800/1251]  eta: 0:01:36  lr: 0.003833  min_lr: 0.003833  loss: 3.3900 (3.4028)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8347 (0.8060)  time: 0.1963  data: 0.0005  max mem: 9147
Epoch: [56]  [1000/1251]  eta: 0:00:53  lr: 0.003832  min_lr: 0.003832  loss: 3.0200 (3.4003)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7240 (0.8077)  time: 0.2283  data: 0.0006  max mem: 9147
Epoch: [56]  [1200/1251]  eta: 0:00:10  lr: 0.003831  min_lr: 0.003831  loss: 2.7819 (3.3933)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7562 (0.8155)  time: 0.2104  data: 0.0071  max mem: 9147
Epoch: [56]  [1250/1251]  eta: 0:00:00  lr: 0.003830  min_lr: 0.003830  loss: 2.8649 (3.3971)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6488 (0.8118)  time: 0.1462  data: 0.0008  max mem: 9147
Epoch: [56] Total time: 0:04:26 (0.2131 s / it)
Averaged stats: lr: 0.003830  min_lr: 0.003830  loss: 2.8649 (3.3916)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6488 (0.8118)
Test:  [ 0/25]  eta: 0:02:23  loss: 0.9972 (0.9972)  acc1: 83.2000 (83.2000)  acc5: 95.2000 (95.2000)  time: 5.7590  data: 5.6659  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0876 (1.1522)  acc1: 76.0000 (75.6364)  acc5: 94.4000 (93.0909)  time: 0.7230  data: 0.6309  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4705 (1.4014)  acc1: 66.8000 (70.3810)  acc5: 89.2000 (89.8857)  time: 0.1831  data: 0.1021  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5825 (1.4113)  acc1: 66.0000 (70.2080)  acc5: 87.6000 (89.8240)  time: 0.2015  data: 0.1266  max mem: 9147
Test: Total time: 0:00:10 (0.4108 s / it)
* Acc@1 69.950 Acc@5 89.708 loss 1.408
Accuracy of the model on the 50000 test images: 70.0%
Max accuracy: 69.95%
Epoch: [57]  [   0/1251]  eta: 1:02:53  lr: 0.003830  min_lr: 0.003830  loss: 2.7528 (2.7528)  weight_decay: 0.0500 (0.0500)  time: 3.0167  data: 2.7949  max mem: 9147
Epoch: [57]  [ 200/1251]  eta: 0:03:54  lr: 0.003829  min_lr: 0.003829  loss: 2.8094 (3.3894)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7374 (0.8961)  time: 0.1897  data: 0.0012  max mem: 9147
Epoch: [57]  [ 400/1251]  eta: 0:02:55  lr: 0.003827  min_lr: 0.003827  loss: 3.6954 (3.4237)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7441 (0.8386)  time: 0.1884  data: 0.0005  max mem: 9147
Epoch: [57]  [ 600/1251]  eta: 0:02:12  lr: 0.003826  min_lr: 0.003826  loss: 2.9837 (3.3913)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7484 (0.8524)  time: 0.2148  data: 0.0021  max mem: 9147
Epoch: [57]  [ 800/1251]  eta: 0:01:32  lr: 0.003824  min_lr: 0.003824  loss: 3.6802 (3.3814)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7817 (0.8428)  time: 0.1894  data: 0.0020  max mem: 9147
Epoch: [57]  [1000/1251]  eta: 0:00:51  lr: 0.003823  min_lr: 0.003823  loss: 3.3899 (3.3930)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8007 (0.8334)  time: 0.2248  data: 0.0007  max mem: 9147
Epoch: [57]  [1200/1251]  eta: 0:00:10  lr: 0.003821  min_lr: 0.003821  loss: 3.8114 (3.4080)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6960 (0.8267)  time: 0.2239  data: 0.0012  max mem: 9147
Epoch: [57]  [1250/1251]  eta: 0:00:00  lr: 0.003821  min_lr: 0.003821  loss: 2.8677 (3.4089)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7904 (0.8278)  time: 0.1400  data: 0.0014  max mem: 9147
Epoch: [57] Total time: 0:04:18 (0.2063 s / it)
Averaged stats: lr: 0.003821  min_lr: 0.003821  loss: 2.8677 (3.3986)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7904 (0.8278)
Test:  [ 0/25]  eta: 0:01:47  loss: 0.9140 (0.9140)  acc1: 80.4000 (80.4000)  acc5: 95.2000 (95.2000)  time: 4.3181  data: 4.1756  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 1.0345 (1.1326)  acc1: 75.6000 (74.6545)  acc5: 94.0000 (93.4182)  time: 0.6402  data: 0.5561  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4545 (1.3581)  acc1: 65.6000 (69.6762)  acc5: 88.4000 (89.9810)  time: 0.2396  data: 0.1644  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5069 (1.3788)  acc1: 66.0000 (69.5680)  acc5: 86.4000 (89.6480)  time: 0.2146  data: 0.1406  max mem: 9147
Test: Total time: 0:00:09 (0.3989 s / it)
* Acc@1 70.036 Acc@5 89.944 loss 1.374
Accuracy of the model on the 50000 test images: 70.0%
Max accuracy: 70.04%
Epoch: [58]  [   0/1251]  eta: 0:58:37  lr: 0.003821  min_lr: 0.003821  loss: 3.0827 (3.0827)  weight_decay: 0.0500 (0.0500)  time: 2.8118  data: 1.7119  max mem: 9147
Epoch: [58]  [ 200/1251]  eta: 0:03:41  lr: 0.003820  min_lr: 0.003820  loss: 3.8930 (3.2827)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7530 (0.8211)  time: 0.1757  data: 0.0006  max mem: 9147
Epoch: [58]  [ 400/1251]  eta: 0:02:57  lr: 0.003818  min_lr: 0.003818  loss: 2.9322 (3.3421)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7328 (0.7948)  time: 0.2307  data: 0.0006  max mem: 9147
Epoch: [58]  [ 600/1251]  eta: 0:02:13  lr: 0.003817  min_lr: 0.003817  loss: 2.7953 (3.3400)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7079 (0.8063)  time: 0.2141  data: 0.0007  max mem: 9147
Epoch: [58]  [ 800/1251]  eta: 0:01:33  lr: 0.003815  min_lr: 0.003815  loss: 2.9675 (3.3753)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7089 (0.8047)  time: 0.2111  data: 0.0007  max mem: 9147
Epoch: [58]  [1000/1251]  eta: 0:00:51  lr: 0.003813  min_lr: 0.003813  loss: 2.9411 (3.3859)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6799 (0.8215)  time: 0.2000  data: 0.0005  max mem: 9147
Epoch: [58]  [1200/1251]  eta: 0:00:10  lr: 0.003812  min_lr: 0.003812  loss: 3.5284 (3.3952)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9957 (0.8292)  time: 0.2190  data: 0.0007  max mem: 9147
Epoch: [58]  [1250/1251]  eta: 0:00:00  lr: 0.003812  min_lr: 0.003812  loss: 3.3946 (3.3911)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8457 (0.8300)  time: 0.1409  data: 0.0017  max mem: 9147
Epoch: [58] Total time: 0:04:20 (0.2081 s / it)
Averaged stats: lr: 0.003812  min_lr: 0.003812  loss: 3.3946 (3.3898)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8457 (0.8300)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.9267 (0.9267)  acc1: 82.8000 (82.8000)  acc5: 95.2000 (95.2000)  time: 5.6063  data: 5.5100  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.2148 (1.2355)  acc1: 74.8000 (74.7273)  acc5: 93.6000 (93.0909)  time: 0.7593  data: 0.6843  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4812 (1.5173)  acc1: 66.8000 (69.6381)  acc5: 88.4000 (89.4857)  time: 0.2126  data: 0.1413  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.6240 (1.5234)  acc1: 65.2000 (69.2640)  acc5: 87.2000 (89.4080)  time: 0.2115  data: 0.1415  max mem: 9147
Test: Total time: 0:00:10 (0.4106 s / it)
* Acc@1 69.226 Acc@5 89.342 loss 1.500
Accuracy of the model on the 50000 test images: 69.2%
Max accuracy: 70.04%
Epoch: [59]  [   0/1251]  eta: 1:04:26  lr: 0.003812  min_lr: 0.003812  loss: 2.5834 (2.5834)  weight_decay: 0.0500 (0.0500)  time: 3.0910  data: 2.5846  max mem: 9147
Epoch: [59]  [ 200/1251]  eta: 0:03:59  lr: 0.003810  min_lr: 0.003810  loss: 3.8221 (3.4046)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9943 (0.8943)  time: 0.2118  data: 0.0007  max mem: 9147
Epoch: [59]  [ 400/1251]  eta: 0:03:04  lr: 0.003809  min_lr: 0.003809  loss: 2.9562 (3.3032)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6885 (0.8261)  time: 0.2148  data: 0.0006  max mem: 9147
Epoch: [59]  [ 600/1251]  eta: 0:02:18  lr: 0.003807  min_lr: 0.003807  loss: 2.9378 (3.3398)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7041 (0.7988)  time: 0.2001  data: 0.0005  max mem: 9147
Epoch: [59]  [ 800/1251]  eta: 0:01:34  lr: 0.003805  min_lr: 0.003805  loss: 2.9253 (3.3587)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8886 (0.8039)  time: 0.2299  data: 0.0164  max mem: 9147
Epoch: [59]  [1000/1251]  eta: 0:00:52  lr: 0.003804  min_lr: 0.003804  loss: 2.9760 (3.3816)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6762 (0.7947)  time: 0.2003  data: 0.0006  max mem: 9147
Epoch: [59]  [1200/1251]  eta: 0:00:10  lr: 0.003802  min_lr: 0.003802  loss: 3.4601 (3.3994)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7030 (0.7874)  time: 0.1900  data: 0.0007  max mem: 9147
Epoch: [59]  [1250/1251]  eta: 0:00:00  lr: 0.003802  min_lr: 0.003802  loss: 2.7818 (3.3925)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8705 (0.7937)  time: 0.1378  data: 0.0012  max mem: 9147
Epoch: [59] Total time: 0:04:19 (0.2072 s / it)
Averaged stats: lr: 0.003802  min_lr: 0.003802  loss: 2.7818 (3.3994)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8705 (0.7937)
Test:  [ 0/25]  eta: 0:02:14  loss: 0.9602 (0.9602)  acc1: 80.8000 (80.8000)  acc5: 94.4000 (94.4000)  time: 5.3827  data: 5.3022  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0912 (1.1386)  acc1: 76.0000 (75.2000)  acc5: 94.8000 (93.8909)  time: 0.7410  data: 0.6477  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4329 (1.3730)  acc1: 65.2000 (70.2095)  acc5: 88.8000 (90.0762)  time: 0.2081  data: 0.1238  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5202 (1.3913)  acc1: 65.2000 (69.9200)  acc5: 87.2000 (89.6960)  time: 0.2036  data: 0.1239  max mem: 9147
Test: Total time: 0:00:09 (0.3976 s / it)
* Acc@1 70.056 Acc@5 89.844 loss 1.385
Accuracy of the model on the 50000 test images: 70.1%
Max accuracy: 70.06%
Epoch: [60]  [   0/1251]  eta: 1:00:32  lr: 0.003802  min_lr: 0.003802  loss: 2.8477 (2.8477)  weight_decay: 0.0500 (0.0500)  time: 2.9039  data: 2.7058  max mem: 9147
Epoch: [60]  [ 200/1251]  eta: 0:03:55  lr: 0.003800  min_lr: 0.003800  loss: 2.8003 (3.3479)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7215 (0.8151)  time: 0.2193  data: 0.0008  max mem: 9147
Epoch: [60]  [ 400/1251]  eta: 0:03:07  lr: 0.003799  min_lr: 0.003799  loss: 2.8878 (3.4189)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8123 (0.8537)  time: 0.2274  data: 0.0006  max mem: 9147
Epoch: [60]  [ 600/1251]  eta: 0:02:20  lr: 0.003797  min_lr: 0.003797  loss: 2.7406 (3.4301)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7115 (0.8547)  time: 0.1922  data: 0.0006  max mem: 9147
Epoch: [60]  [ 800/1251]  eta: 0:01:37  lr: 0.003796  min_lr: 0.003796  loss: 2.8139 (3.4409)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6063 (0.8306)  time: 0.2296  data: 0.0007  max mem: 9147
Epoch: [60]  [1000/1251]  eta: 0:00:53  lr: 0.003794  min_lr: 0.003794  loss: 2.8626 (3.4132)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6118 (0.8056)  time: 0.2053  data: 0.0006  max mem: 9147
Epoch: [60]  [1200/1251]  eta: 0:00:10  lr: 0.003793  min_lr: 0.003793  loss: 2.8974 (3.3945)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6721 (0.8020)  time: 0.2219  data: 0.0007  max mem: 9147
Epoch: [60]  [1250/1251]  eta: 0:00:00  lr: 0.003792  min_lr: 0.003792  loss: 2.8633 (3.3877)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6695 (0.8004)  time: 0.1405  data: 0.0011  max mem: 9147
Epoch: [60] Total time: 0:04:26 (0.2127 s / it)
Averaged stats: lr: 0.003792  min_lr: 0.003792  loss: 2.8633 (3.3894)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6695 (0.8004)
Test:  [ 0/25]  eta: 0:02:23  loss: 0.9492 (0.9492)  acc1: 83.6000 (83.6000)  acc5: 94.4000 (94.4000)  time: 5.7597  data: 5.6793  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0541 (1.1234)  acc1: 75.6000 (75.7455)  acc5: 94.4000 (93.2727)  time: 0.6993  data: 0.6241  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4108 (1.3580)  acc1: 67.2000 (70.4381)  acc5: 88.8000 (90.0000)  time: 0.1853  data: 0.1121  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5289 (1.3794)  acc1: 66.0000 (69.8240)  acc5: 86.8000 (89.8240)  time: 0.1996  data: 0.1281  max mem: 9147
Test: Total time: 0:00:10 (0.4054 s / it)
* Acc@1 70.304 Acc@5 90.066 loss 1.365
Accuracy of the model on the 50000 test images: 70.3%
Max accuracy: 70.30%
Epoch: [61]  [   0/1251]  eta: 0:56:44  lr: 0.003792  min_lr: 0.003792  loss: 2.9107 (2.9107)  weight_decay: 0.0500 (0.0500)  time: 2.7213  data: 1.8993  max mem: 9147
Epoch: [61]  [ 200/1251]  eta: 0:03:55  lr: 0.003791  min_lr: 0.003791  loss: 2.8364 (3.3134)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7131 (0.7530)  time: 0.2201  data: 0.0007  max mem: 9147
Epoch: [61]  [ 400/1251]  eta: 0:03:05  lr: 0.003789  min_lr: 0.003789  loss: 2.7960 (3.3299)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7137 (0.7827)  time: 0.2045  data: 0.0006  max mem: 9147
Epoch: [61]  [ 600/1251]  eta: 0:02:19  lr: 0.003787  min_lr: 0.003787  loss: 3.3246 (3.3738)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6337 (0.7911)  time: 0.2008  data: 0.0007  max mem: 9147
Epoch: [61]  [ 800/1251]  eta: 0:01:36  lr: 0.003786  min_lr: 0.003786  loss: 2.9734 (3.3686)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7327 (0.7857)  time: 0.2000  data: 0.0007  max mem: 9147
Epoch: [61]  [1000/1251]  eta: 0:00:53  lr: 0.003784  min_lr: 0.003784  loss: 3.6166 (3.4061)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8181 (0.8050)  time: 0.1929  data: 0.0006  max mem: 9147
Epoch: [61]  [1200/1251]  eta: 0:00:10  lr: 0.003782  min_lr: 0.003782  loss: 3.0030 (3.3999)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0363 (0.8329)  time: 0.2151  data: 0.0007  max mem: 9147
Epoch: [61]  [1250/1251]  eta: 0:00:00  lr: 0.003782  min_lr: 0.003782  loss: 2.7966 (3.3975)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7739 (0.8289)  time: 0.1505  data: 0.0014  max mem: 9147
Epoch: [61] Total time: 0:04:26 (0.2128 s / it)
Averaged stats: lr: 0.003782  min_lr: 0.003782  loss: 2.7966 (3.3737)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7739 (0.8289)
Test:  [ 0/25]  eta: 0:02:16  loss: 1.0837 (1.0837)  acc1: 78.8000 (78.8000)  acc5: 93.2000 (93.2000)  time: 5.4715  data: 5.3911  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1542 (1.2098)  acc1: 76.4000 (75.3091)  acc5: 94.8000 (93.4182)  time: 0.7577  data: 0.6721  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4931 (1.4179)  acc1: 68.4000 (70.3810)  acc5: 87.6000 (89.7905)  time: 0.2134  data: 0.1351  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5466 (1.4305)  acc1: 66.8000 (69.9040)  acc5: 88.0000 (89.6640)  time: 0.2202  data: 0.1436  max mem: 9147
Test: Total time: 0:00:10 (0.4108 s / it)
* Acc@1 69.632 Acc@5 89.694 loss 1.428
Accuracy of the model on the 50000 test images: 69.6%
Max accuracy: 70.30%
Epoch: [62]  [   0/1251]  eta: 1:06:05  lr: 0.003782  min_lr: 0.003782  loss: 2.7399 (2.7399)  weight_decay: 0.0500 (0.0500)  time: 3.1695  data: 2.9326  max mem: 9147
Epoch: [62]  [ 200/1251]  eta: 0:03:55  lr: 0.003780  min_lr: 0.003780  loss: 2.7797 (3.3351)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7999 (0.7701)  time: 0.2141  data: 0.0006  max mem: 9147
Epoch: [62]  [ 400/1251]  eta: 0:03:05  lr: 0.003779  min_lr: 0.003779  loss: 2.7926 (3.3336)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8182 (0.7993)  time: 0.1998  data: 0.0005  max mem: 9147
Epoch: [62]  [ 600/1251]  eta: 0:02:21  lr: 0.003777  min_lr: 0.003777  loss: 4.1530 (3.3506)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7737 (0.8045)  time: 0.2339  data: 0.0008  max mem: 9147
Epoch: [62]  [ 800/1251]  eta: 0:01:36  lr: 0.003775  min_lr: 0.003775  loss: 3.9491 (3.3935)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6397 (0.7944)  time: 0.1944  data: 0.0006  max mem: 9147
Epoch: [62]  [1000/1251]  eta: 0:00:53  lr: 0.003774  min_lr: 0.003774  loss: 3.0503 (3.4022)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7036 (0.7914)  time: 0.2293  data: 0.0006  max mem: 9147
Epoch: [62]  [1200/1251]  eta: 0:00:10  lr: 0.003772  min_lr: 0.003772  loss: 3.5557 (3.4164)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7181 (0.7940)  time: 0.2233  data: 0.0006  max mem: 9147
Epoch: [62]  [1250/1251]  eta: 0:00:00  lr: 0.003772  min_lr: 0.003772  loss: 2.8192 (3.4170)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7159 (0.7957)  time: 0.1386  data: 0.0012  max mem: 9147
Epoch: [62] Total time: 0:04:25 (0.2124 s / it)
Averaged stats: lr: 0.003772  min_lr: 0.003772  loss: 2.8192 (3.3837)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7159 (0.7957)
Test:  [ 0/25]  eta: 0:02:26  loss: 0.8253 (0.8253)  acc1: 83.6000 (83.6000)  acc5: 95.2000 (95.2000)  time: 5.8789  data: 5.7986  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1207 (1.0886)  acc1: 76.0000 (76.4364)  acc5: 94.4000 (93.9273)  time: 0.7748  data: 0.6911  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3345 (1.3574)  acc1: 69.6000 (71.3143)  acc5: 90.0000 (90.4000)  time: 0.1963  data: 0.1161  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5150 (1.3812)  acc1: 68.0000 (70.9120)  acc5: 88.0000 (90.1440)  time: 0.2023  data: 0.1234  max mem: 9147
Test: Total time: 0:00:10 (0.4131 s / it)
* Acc@1 70.194 Acc@5 90.040 loss 1.384
Accuracy of the model on the 50000 test images: 70.2%
Max accuracy: 70.30%
Epoch: [63]  [   0/1251]  eta: 1:06:49  lr: 0.003772  min_lr: 0.003772  loss: 4.3495 (4.3495)  weight_decay: 0.0500 (0.0500)  time: 3.2048  data: 2.4466  max mem: 9147
Epoch: [63]  [ 200/1251]  eta: 0:03:52  lr: 0.003770  min_lr: 0.003770  loss: 2.7453 (3.3203)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8727 (0.8976)  time: 0.2004  data: 0.0007  max mem: 9147
Epoch: [63]  [ 400/1251]  eta: 0:03:01  lr: 0.003768  min_lr: 0.003768  loss: 3.4179 (3.3352)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8022 (0.8625)  time: 0.1930  data: 0.0005  max mem: 9147
Epoch: [63]  [ 600/1251]  eta: 0:02:14  lr: 0.003767  min_lr: 0.003767  loss: 2.7588 (3.3475)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7185 (0.8146)  time: 0.1942  data: 0.0006  max mem: 9147
Epoch: [63]  [ 800/1251]  eta: 0:01:33  lr: 0.003765  min_lr: 0.003765  loss: 2.8769 (3.3696)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7346 (0.8116)  time: 0.2206  data: 0.0007  max mem: 9147
Epoch: [63]  [1000/1251]  eta: 0:00:52  lr: 0.003763  min_lr: 0.003763  loss: 3.7597 (3.3735)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7088 (0.7989)  time: 0.2193  data: 0.0013  max mem: 9147
Epoch: [63]  [1200/1251]  eta: 0:00:10  lr: 0.003762  min_lr: 0.003762  loss: 3.7607 (3.3643)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7682 (0.8146)  time: 0.2015  data: 0.0005  max mem: 9147
Epoch: [63]  [1250/1251]  eta: 0:00:00  lr: 0.003761  min_lr: 0.003761  loss: 2.9942 (3.3644)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8130 (0.8133)  time: 0.1404  data: 0.0013  max mem: 9147
Epoch: [63] Total time: 0:04:22 (0.2096 s / it)
Averaged stats: lr: 0.003761  min_lr: 0.003761  loss: 2.9942 (3.3768)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8130 (0.8133)
Test:  [ 0/25]  eta: 0:02:29  loss: 0.8726 (0.8726)  acc1: 81.2000 (81.2000)  acc5: 96.0000 (96.0000)  time: 5.9982  data: 5.9177  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1145 (1.1653)  acc1: 75.2000 (75.6364)  acc5: 94.4000 (94.0364)  time: 0.7643  data: 0.6921  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4380 (1.4191)  acc1: 68.8000 (70.3810)  acc5: 88.4000 (90.1333)  time: 0.1927  data: 0.1225  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.6122 (1.4276)  acc1: 66.4000 (70.0320)  acc5: 86.4000 (89.9360)  time: 0.1920  data: 0.1224  max mem: 9147
Test: Total time: 0:00:10 (0.4095 s / it)
* Acc@1 70.154 Acc@5 89.734 loss 1.428
Accuracy of the model on the 50000 test images: 70.2%
Max accuracy: 70.30%
Epoch: [64]  [   0/1251]  eta: 1:05:58  lr: 0.003761  min_lr: 0.003761  loss: 2.3681 (2.3681)  weight_decay: 0.0500 (0.0500)  time: 3.1643  data: 2.4094  max mem: 9147
Epoch: [64]  [ 200/1251]  eta: 0:03:56  lr: 0.003760  min_lr: 0.003760  loss: 2.7148 (3.3637)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7866 (0.8604)  time: 0.2201  data: 0.0007  max mem: 9147
Epoch: [64]  [ 400/1251]  eta: 0:03:07  lr: 0.003758  min_lr: 0.003758  loss: 3.0502 (3.3454)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7523 (0.8339)  time: 0.2191  data: 0.0006  max mem: 9147
Epoch: [64]  [ 600/1251]  eta: 0:02:21  lr: 0.003756  min_lr: 0.003756  loss: 3.9345 (3.3541)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8005 (0.8702)  time: 0.2242  data: 0.0007  max mem: 9147
Epoch: [64]  [ 800/1251]  eta: 0:01:35  lr: 0.003754  min_lr: 0.003754  loss: 2.8105 (3.3553)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7251 (0.8429)  time: 0.2051  data: 0.0009  max mem: 9147
Epoch: [64]  [1000/1251]  eta: 0:00:53  lr: 0.003753  min_lr: 0.003753  loss: 3.3784 (3.3424)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6707 (0.8168)  time: 0.2058  data: 0.0007  max mem: 9147
Epoch: [64]  [1200/1251]  eta: 0:00:10  lr: 0.003751  min_lr: 0.003751  loss: 2.7645 (3.3471)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8812 (0.8233)  time: 0.2201  data: 0.0007  max mem: 9147
Epoch: [64]  [1250/1251]  eta: 0:00:00  lr: 0.003751  min_lr: 0.003751  loss: 2.8679 (3.3448)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8090 (0.8231)  time: 0.1430  data: 0.0012  max mem: 9147
Epoch: [64] Total time: 0:04:26 (0.2126 s / it)
Averaged stats: lr: 0.003751  min_lr: 0.003751  loss: 2.8679 (3.3662)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8090 (0.8231)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.9127 (0.9127)  acc1: 81.2000 (81.2000)  acc5: 94.8000 (94.8000)  time: 5.4488  data: 5.3684  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0661 (1.1352)  acc1: 76.0000 (75.3091)  acc5: 93.2000 (93.7455)  time: 0.7218  data: 0.6276  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3437 (1.3774)  acc1: 67.2000 (70.1524)  acc5: 88.8000 (90.1143)  time: 0.2088  data: 0.1264  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5812 (1.3911)  acc1: 66.8000 (70.1760)  acc5: 87.2000 (89.8720)  time: 0.2035  data: 0.1263  max mem: 9147
Test: Total time: 0:00:10 (0.4003 s / it)
* Acc@1 70.586 Acc@5 90.170 loss 1.387
Accuracy of the model on the 50000 test images: 70.6%
Max accuracy: 70.59%
Epoch: [65]  [   0/1251]  eta: 0:59:43  lr: 0.003751  min_lr: 0.003751  loss: 2.4545 (2.4545)  weight_decay: 0.0500 (0.0500)  time: 2.8643  data: 2.6600  max mem: 9147
Epoch: [65]  [ 200/1251]  eta: 0:03:35  lr: 0.003749  min_lr: 0.003749  loss: 3.4007 (3.4020)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7331 (0.7931)  time: 0.1925  data: 0.0006  max mem: 9147
Epoch: [65]  [ 400/1251]  eta: 0:02:50  lr: 0.003747  min_lr: 0.003747  loss: 3.9251 (3.4041)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8200 (0.8447)  time: 0.1911  data: 0.0005  max mem: 9147
Epoch: [65]  [ 600/1251]  eta: 0:02:07  lr: 0.003745  min_lr: 0.003745  loss: 2.8000 (3.3624)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6338 (0.8077)  time: 0.1993  data: 0.0011  max mem: 9147
Epoch: [65]  [ 800/1251]  eta: 0:01:28  lr: 0.003744  min_lr: 0.003744  loss: 2.7432 (3.3718)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7455 (0.8008)  time: 0.1867  data: 0.0005  max mem: 9147
Epoch: [65]  [1000/1251]  eta: 0:00:49  lr: 0.003742  min_lr: 0.003742  loss: 2.7334 (3.3678)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8140 (0.8125)  time: 0.2225  data: 0.0007  max mem: 9147
Epoch: [65]  [1200/1251]  eta: 0:00:10  lr: 0.003740  min_lr: 0.003740  loss: 3.6976 (3.3780)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7585 (0.8339)  time: 0.2448  data: 0.0006  max mem: 9147
Epoch: [65]  [1250/1251]  eta: 0:00:00  lr: 0.003740  min_lr: 0.003740  loss: 3.7212 (3.3834)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6629 (0.8269)  time: 0.1419  data: 0.0010  max mem: 9147
Epoch: [65] Total time: 0:04:09 (0.1996 s / it)
Averaged stats: lr: 0.003740  min_lr: 0.003740  loss: 3.7212 (3.3656)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6629 (0.8269)
Test:  [ 0/25]  eta: 0:01:39  loss: 1.0007 (1.0007)  acc1: 81.2000 (81.2000)  acc5: 94.4000 (94.4000)  time: 3.9951  data: 3.9121  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 1.1336 (1.1573)  acc1: 76.4000 (74.9091)  acc5: 94.4000 (93.8182)  time: 0.6204  data: 0.5450  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4175 (1.4321)  acc1: 68.0000 (70.0191)  acc5: 88.8000 (90.1143)  time: 0.2422  data: 0.1679  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.6153 (1.4573)  acc1: 67.2000 (69.5680)  acc5: 86.8000 (89.9680)  time: 0.2142  data: 0.1414  max mem: 9147
Test: Total time: 0:00:10 (0.4012 s / it)
* Acc@1 69.940 Acc@5 89.892 loss 1.453
Accuracy of the model on the 50000 test images: 69.9%
Max accuracy: 70.59%
Epoch: [66]  [   0/1251]  eta: 1:08:35  lr: 0.003740  min_lr: 0.003740  loss: 2.6188 (2.6188)  weight_decay: 0.0500 (0.0500)  time: 3.2898  data: 2.6761  max mem: 9147
Epoch: [66]  [ 200/1251]  eta: 0:03:54  lr: 0.003738  min_lr: 0.003738  loss: 2.8283 (3.3173)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7356 (0.7920)  time: 0.2093  data: 0.0007  max mem: 9147
Epoch: [66]  [ 400/1251]  eta: 0:03:04  lr: 0.003736  min_lr: 0.003736  loss: 2.9250 (3.3432)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7517 (0.7819)  time: 0.2096  data: 0.0006  max mem: 9147
Epoch: [66]  [ 600/1251]  eta: 0:02:19  lr: 0.003734  min_lr: 0.003734  loss: 2.9016 (3.3513)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7695 (0.8183)  time: 0.2000  data: 0.0007  max mem: 9147
Epoch: [66]  [ 800/1251]  eta: 0:01:36  lr: 0.003732  min_lr: 0.003732  loss: 3.1309 (3.3633)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7583 (0.8019)  time: 0.2246  data: 0.0007  max mem: 9147
Epoch: [66]  [1000/1251]  eta: 0:00:53  lr: 0.003731  min_lr: 0.003731  loss: 2.9295 (3.3597)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7165 (nan)  time: 0.1948  data: 0.0005  max mem: 9147
Epoch: [66]  [1200/1251]  eta: 0:00:10  lr: 0.003729  min_lr: 0.003729  loss: 2.7013 (3.3466)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7262 (nan)  time: 0.2246  data: 0.0006  max mem: 9147
Epoch: [66]  [1250/1251]  eta: 0:00:00  lr: 0.003728  min_lr: 0.003728  loss: 2.9994 (3.3499)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7438 (nan)  time: 0.1392  data: 0.0011  max mem: 9147
Epoch: [66] Total time: 0:04:25 (0.2126 s / it)
Averaged stats: lr: 0.003728  min_lr: 0.003728  loss: 2.9994 (3.3766)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7438 (nan)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.9721 (0.9721)  acc1: 80.0000 (80.0000)  acc5: 94.4000 (94.4000)  time: 5.6144  data: 5.5195  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.1857 (1.1652)  acc1: 76.0000 (74.7636)  acc5: 94.4000 (93.7455)  time: 0.6711  data: 0.5876  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4240 (1.3996)  acc1: 66.8000 (70.0571)  acc5: 90.0000 (90.4000)  time: 0.1736  data: 0.0977  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4897 (1.4253)  acc1: 66.8000 (69.4880)  acc5: 88.0000 (90.0640)  time: 0.1937  data: 0.1226  max mem: 9147
Test: Total time: 0:00:09 (0.3992 s / it)
* Acc@1 70.282 Acc@5 90.016 loss 1.418
Accuracy of the model on the 50000 test images: 70.3%
Max accuracy: 70.59%
Epoch: [67]  [   0/1251]  eta: 1:06:04  lr: 0.003728  min_lr: 0.003728  loss: 4.9386 (4.9386)  weight_decay: 0.0500 (0.0500)  time: 3.1689  data: 2.4627  max mem: 9147
Epoch: [67]  [ 200/1251]  eta: 0:03:58  lr: 0.003727  min_lr: 0.003727  loss: 2.7837 (3.3044)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7520 (0.8665)  time: 0.2022  data: 0.0006  max mem: 9147
Epoch: [67]  [ 400/1251]  eta: 0:03:08  lr: 0.003725  min_lr: 0.003725  loss: 3.9885 (3.3590)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9045 (0.8935)  time: 0.2243  data: 0.0007  max mem: 9147
Epoch: [67]  [ 600/1251]  eta: 0:02:22  lr: 0.003723  min_lr: 0.003723  loss: 3.0213 (3.3752)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6681 (0.8307)  time: 0.2261  data: 0.0007  max mem: 9147
Epoch: [67]  [ 800/1251]  eta: 0:01:38  lr: 0.003721  min_lr: 0.003721  loss: 2.8461 (3.3703)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7336 (0.8067)  time: 0.2285  data: 0.0006  max mem: 9147
Epoch: [67]  [1000/1251]  eta: 0:00:54  lr: 0.003719  min_lr: 0.003719  loss: 2.6716 (3.3443)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7373 (0.8028)  time: 0.2229  data: 0.0007  max mem: 9147
Epoch: [67]  [1200/1251]  eta: 0:00:11  lr: 0.003717  min_lr: 0.003717  loss: 2.8189 (3.3480)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6754 (0.7953)  time: 0.2190  data: 0.0231  max mem: 9147
Epoch: [67]  [1250/1251]  eta: 0:00:00  lr: 0.003717  min_lr: 0.003717  loss: 2.8086 (3.3490)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6034 (0.7882)  time: 0.1420  data: 0.0015  max mem: 9147
Epoch: [67] Total time: 0:04:29 (0.2156 s / it)
Averaged stats: lr: 0.003717  min_lr: 0.003717  loss: 2.8086 (3.3477)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6034 (0.7882)
Test:  [ 0/25]  eta: 0:01:21  loss: 0.8907 (0.8907)  acc1: 82.0000 (82.0000)  acc5: 94.4000 (94.4000)  time: 3.2401  data: 3.1411  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 1.0368 (1.0900)  acc1: 78.0000 (75.8546)  acc5: 94.4000 (93.7091)  time: 0.6271  data: 0.5506  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3976 (1.3534)  acc1: 67.6000 (71.1429)  acc5: 88.4000 (90.1714)  time: 0.2995  data: 0.2259  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5273 (1.3639)  acc1: 67.2000 (70.6720)  acc5: 88.0000 (90.1440)  time: 0.2294  data: 0.1573  max mem: 9147
Test: Total time: 0:00:10 (0.4043 s / it)
* Acc@1 70.624 Acc@5 90.326 loss 1.348
Accuracy of the model on the 50000 test images: 70.6%
Max accuracy: 70.62%
Epoch: [68]  [   0/1251]  eta: 0:58:20  lr: 0.003717  min_lr: 0.003717  loss: 2.4604 (2.4604)  weight_decay: 0.0500 (0.0500)  time: 2.7983  data: 2.5610  max mem: 9147
Epoch: [68]  [ 200/1251]  eta: 0:03:52  lr: 0.003715  min_lr: 0.003715  loss: 3.0572 (3.2048)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7671 (0.8199)  time: 0.1918  data: 0.0012  max mem: 9147
Epoch: [68]  [ 400/1251]  eta: 0:02:56  lr: 0.003713  min_lr: 0.003713  loss: 3.0410 (3.2387)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7297 (0.7822)  time: 0.2244  data: 0.0020  max mem: 9147
Epoch: [68]  [ 600/1251]  eta: 0:02:11  lr: 0.003711  min_lr: 0.003711  loss: 3.7160 (3.2884)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6694 (0.7776)  time: 0.1737  data: 0.0004  max mem: 9147
Epoch: [68]  [ 800/1251]  eta: 0:01:30  lr: 0.003710  min_lr: 0.003710  loss: 3.5630 (3.3020)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6762 (0.7713)  time: 0.2296  data: 0.0235  max mem: 9147
Epoch: [68]  [1000/1251]  eta: 0:00:50  lr: 0.003708  min_lr: 0.003708  loss: 2.7694 (3.3331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7610 (0.7908)  time: 0.1930  data: 0.0005  max mem: 9147
Epoch: [68]  [1200/1251]  eta: 0:00:10  lr: 0.003706  min_lr: 0.003706  loss: 3.1957 (3.3443)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7511 (0.7952)  time: 0.1900  data: 0.0005  max mem: 9147
Epoch: [68]  [1250/1251]  eta: 0:00:00  lr: 0.003705  min_lr: 0.003705  loss: 3.2239 (3.3447)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7408 (0.7928)  time: 0.1392  data: 0.0008  max mem: 9147
Epoch: [68] Total time: 0:04:11 (0.2009 s / it)
Averaged stats: lr: 0.003705  min_lr: 0.003705  loss: 3.2239 (3.3423)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7408 (0.7928)
Test:  [ 0/25]  eta: 0:02:20  loss: 1.0350 (1.0350)  acc1: 82.8000 (82.8000)  acc5: 94.8000 (94.8000)  time: 5.6134  data: 5.5158  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.1468 (1.2127)  acc1: 75.2000 (75.8545)  acc5: 94.8000 (93.7818)  time: 0.7052  data: 0.6314  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4894 (1.4340)  acc1: 67.2000 (71.1429)  acc5: 88.4000 (90.2286)  time: 0.1871  data: 0.1166  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.6374 (1.4488)  acc1: 67.2000 (70.7520)  acc5: 88.0000 (90.1600)  time: 0.1958  data: 0.1254  max mem: 9147
Test: Total time: 0:00:09 (0.3992 s / it)
* Acc@1 70.626 Acc@5 90.220 loss 1.439
Accuracy of the model on the 50000 test images: 70.6%
Max accuracy: 70.63%
Epoch: [69]  [   0/1251]  eta: 0:46:49  lr: 0.003705  min_lr: 0.003705  loss: 2.6374 (2.6374)  weight_decay: 0.0500 (0.0500)  time: 2.2458  data: 1.9764  max mem: 9147
Epoch: [69]  [ 200/1251]  eta: 0:03:54  lr: 0.003703  min_lr: 0.003703  loss: 2.8106 (3.2926)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7478 (0.7543)  time: 0.2105  data: 0.0023  max mem: 9147
Epoch: [69]  [ 400/1251]  eta: 0:03:06  lr: 0.003702  min_lr: 0.003702  loss: 2.6456 (3.3577)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7861 (0.7864)  time: 0.1986  data: 0.0007  max mem: 9147
Epoch: [69]  [ 600/1251]  eta: 0:02:20  lr: 0.003700  min_lr: 0.003700  loss: 2.8897 (3.3292)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7078 (0.7812)  time: 0.2240  data: 0.0008  max mem: 9147
Epoch: [69]  [ 800/1251]  eta: 0:01:37  lr: 0.003698  min_lr: 0.003698  loss: 2.7657 (3.3069)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8352 (0.7788)  time: 0.2080  data: 0.0005  max mem: 9147
Epoch: [69]  [1000/1251]  eta: 0:00:53  lr: 0.003696  min_lr: 0.003696  loss: 2.7922 (3.3169)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6997 (0.7809)  time: 0.2346  data: 0.0006  max mem: 9147
Epoch: [69]  [1200/1251]  eta: 0:00:10  lr: 0.003694  min_lr: 0.003694  loss: 2.8949 (3.3301)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6902 (0.7716)  time: 0.1938  data: 0.0006  max mem: 9147
Epoch: [69]  [1250/1251]  eta: 0:00:00  lr: 0.003694  min_lr: 0.003694  loss: 2.8733 (3.3358)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6847 (0.7734)  time: 0.1376  data: 0.0008  max mem: 9147
Epoch: [69] Total time: 0:04:25 (0.2122 s / it)
Averaged stats: lr: 0.003694  min_lr: 0.003694  loss: 2.8733 (3.3344)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6847 (0.7734)
Test:  [ 0/25]  eta: 0:02:12  loss: 1.0817 (1.0817)  acc1: 82.8000 (82.8000)  acc5: 95.2000 (95.2000)  time: 5.2877  data: 5.1519  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.1872 (1.2105)  acc1: 75.2000 (75.8909)  acc5: 95.2000 (93.8182)  time: 0.6959  data: 0.5997  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.5267 (1.4470)  acc1: 68.4000 (71.1429)  acc5: 88.0000 (90.0381)  time: 0.2024  data: 0.1198  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.6337 (1.4551)  acc1: 67.6000 (70.8160)  acc5: 86.8000 (89.8400)  time: 0.2062  data: 0.1271  max mem: 9147
Test: Total time: 0:00:09 (0.3954 s / it)
* Acc@1 70.298 Acc@5 89.906 loss 1.453
Accuracy of the model on the 50000 test images: 70.3%
Max accuracy: 70.63%
Epoch: [70]  [   0/1251]  eta: 1:03:02  lr: 0.003694  min_lr: 0.003694  loss: 3.1646 (3.1646)  weight_decay: 0.0500 (0.0500)  time: 3.0237  data: 2.7767  max mem: 9147
Epoch: [70]  [ 200/1251]  eta: 0:03:53  lr: 0.003692  min_lr: 0.003692  loss: 3.1231 (3.4033)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7647 (0.7708)  time: 0.1951  data: 0.0006  max mem: 9147
Epoch: [70]  [ 400/1251]  eta: 0:02:55  lr: 0.003690  min_lr: 0.003690  loss: 2.9102 (3.3721)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7182 (0.7304)  time: 0.1839  data: 0.0005  max mem: 9147
Epoch: [70]  [ 600/1251]  eta: 0:02:13  lr: 0.003688  min_lr: 0.003688  loss: 2.6576 (3.3817)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6673 (0.7460)  time: 0.2190  data: 0.0111  max mem: 9147
Epoch: [70]  [ 800/1251]  eta: 0:01:32  lr: 0.003686  min_lr: 0.003686  loss: 2.7968 (3.3665)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7856 (0.7688)  time: 0.1900  data: 0.0011  max mem: 9147
Epoch: [70]  [1000/1251]  eta: 0:00:50  lr: 0.003684  min_lr: 0.003684  loss: 3.0513 (3.3587)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6986 (0.7704)  time: 0.1938  data: 0.0006  max mem: 9147
Epoch: [70]  [1200/1251]  eta: 0:00:10  lr: 0.003682  min_lr: 0.003682  loss: 2.7227 (3.3424)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7193 (0.7819)  time: 0.1887  data: 0.0012  max mem: 9147
Epoch: [70]  [1250/1251]  eta: 0:00:00  lr: 0.003682  min_lr: 0.003682  loss: 2.8832 (3.3413)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7193 (0.7800)  time: 0.1392  data: 0.0009  max mem: 9147
Epoch: [70] Total time: 0:04:11 (0.2009 s / it)
Averaged stats: lr: 0.003682  min_lr: 0.003682  loss: 2.8832 (3.3496)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7193 (0.7800)
Test:  [ 0/25]  eta: 0:02:15  loss: 0.9307 (0.9307)  acc1: 83.2000 (83.2000)  acc5: 93.2000 (93.2000)  time: 5.4336  data: 5.3533  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0801 (1.1216)  acc1: 78.4000 (76.2182)  acc5: 94.4000 (94.0727)  time: 0.7108  data: 0.6173  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4219 (1.3958)  acc1: 68.4000 (71.0095)  acc5: 88.0000 (89.9429)  time: 0.1892  data: 0.1072  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5636 (1.4096)  acc1: 68.0000 (70.6240)  acc5: 86.4000 (89.8080)  time: 0.1864  data: 0.1096  max mem: 9147
Test: Total time: 0:00:09 (0.3860 s / it)
* Acc@1 70.630 Acc@5 90.022 loss 1.401
Accuracy of the model on the 50000 test images: 70.6%
Max accuracy: 70.63%
Epoch: [71]  [   0/1251]  eta: 0:57:11  lr: 0.003681  min_lr: 0.003681  loss: 2.6107 (2.6107)  weight_decay: 0.0500 (0.0500)  time: 2.7432  data: 2.5335  max mem: 9147
Epoch: [71]  [ 200/1251]  eta: 0:03:35  lr: 0.003680  min_lr: 0.003680  loss: 2.8415 (3.2912)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6306 (0.7458)  time: 0.1947  data: 0.0006  max mem: 9147
Epoch: [71]  [ 400/1251]  eta: 0:02:54  lr: 0.003678  min_lr: 0.003678  loss: 2.7836 (3.3240)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6602 (0.7383)  time: 0.2245  data: 0.0006  max mem: 9147
Epoch: [71]  [ 600/1251]  eta: 0:02:14  lr: 0.003676  min_lr: 0.003676  loss: 2.6233 (3.3327)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6871 (0.7540)  time: 0.2038  data: 0.0006  max mem: 9147
Epoch: [71]  [ 800/1251]  eta: 0:01:34  lr: 0.003674  min_lr: 0.003674  loss: 2.9408 (3.3178)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6961 (0.7701)  time: 0.2048  data: 0.0006  max mem: 9147
Epoch: [71]  [1000/1251]  eta: 0:00:51  lr: 0.003672  min_lr: 0.003672  loss: 2.8620 (3.3432)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7990 (0.7833)  time: 0.1942  data: 0.0006  max mem: 9147
Epoch: [71]  [1200/1251]  eta: 0:00:10  lr: 0.003670  min_lr: 0.003670  loss: 2.9355 (3.3329)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6883 (0.7795)  time: 0.1907  data: 0.0005  max mem: 9147
Epoch: [71]  [1250/1251]  eta: 0:00:00  lr: 0.003669  min_lr: 0.003669  loss: 3.1717 (3.3374)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7580 (0.7796)  time: 0.1374  data: 0.0008  max mem: 9147
Epoch: [71] Total time: 0:04:15 (0.2040 s / it)
Averaged stats: lr: 0.003669  min_lr: 0.003669  loss: 3.1717 (3.3286)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7580 (0.7796)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.9665 (0.9665)  acc1: 81.6000 (81.6000)  acc5: 96.4000 (96.4000)  time: 5.6708  data: 5.5802  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1045 (1.1768)  acc1: 75.6000 (75.3818)  acc5: 94.8000 (94.4000)  time: 0.7604  data: 0.6824  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4659 (1.4324)  acc1: 67.2000 (70.4000)  acc5: 88.8000 (90.3238)  time: 0.2104  data: 0.1372  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5834 (1.4380)  acc1: 66.8000 (70.0960)  acc5: 87.6000 (90.1760)  time: 0.2073  data: 0.1363  max mem: 9147
Test: Total time: 0:00:10 (0.4101 s / it)
* Acc@1 70.308 Acc@5 90.290 loss 1.430
Accuracy of the model on the 50000 test images: 70.3%
Max accuracy: 70.63%
Epoch: [72]  [   0/1251]  eta: 1:11:17  lr: 0.003669  min_lr: 0.003669  loss: 2.6592 (2.6592)  weight_decay: 0.0500 (0.0500)  time: 3.4189  data: 3.2256  max mem: 9147
Epoch: [72]  [ 200/1251]  eta: 0:03:32  lr: 0.003667  min_lr: 0.003667  loss: 3.0087 (3.2312)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8341 (0.8564)  time: 0.1939  data: 0.0006  max mem: 9147
Epoch: [72]  [ 400/1251]  eta: 0:02:56  lr: 0.003665  min_lr: 0.003665  loss: 2.8798 (3.2862)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7779 (0.8078)  time: 0.2146  data: 0.0083  max mem: 9147
Epoch: [72]  [ 600/1251]  eta: 0:02:15  lr: 0.003663  min_lr: 0.003663  loss: 3.5780 (3.3160)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7686 (0.7867)  time: 0.2172  data: 0.0006  max mem: 9147
Epoch: [72]  [ 800/1251]  eta: 0:01:34  lr: 0.003661  min_lr: 0.003661  loss: 3.8112 (3.3228)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7324 (0.7916)  time: 0.2100  data: 0.0009  max mem: 9147
Epoch: [72]  [1000/1251]  eta: 0:00:52  lr: 0.003659  min_lr: 0.003659  loss: 2.7100 (3.3215)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7167 (0.8020)  time: 0.1807  data: 0.0006  max mem: 9147
Epoch: [72]  [1200/1251]  eta: 0:00:10  lr: 0.003657  min_lr: 0.003657  loss: 3.9972 (3.3176)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6881 (0.7962)  time: 0.2336  data: 0.0118  max mem: 9147
Epoch: [72]  [1250/1251]  eta: 0:00:00  lr: 0.003657  min_lr: 0.003657  loss: 2.8175 (3.3126)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7347 (0.7935)  time: 0.1537  data: 0.0122  max mem: 9147
Epoch: [72] Total time: 0:04:21 (0.2088 s / it)
Averaged stats: lr: 0.003657  min_lr: 0.003657  loss: 2.8175 (3.3385)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7347 (0.7935)
Test:  [ 0/25]  eta: 0:02:25  loss: 0.8812 (0.8812)  acc1: 83.6000 (83.6000)  acc5: 96.4000 (96.4000)  time: 5.8098  data: 5.7109  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1963 (1.1518)  acc1: 74.4000 (76.1818)  acc5: 94.8000 (94.2545)  time: 0.7580  data: 0.6758  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4032 (1.3891)  acc1: 67.6000 (71.1619)  acc5: 88.8000 (90.5524)  time: 0.2024  data: 0.1266  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5342 (1.4037)  acc1: 67.6000 (70.6880)  acc5: 88.8000 (90.3040)  time: 0.2100  data: 0.1386  max mem: 9147
Test: Total time: 0:00:10 (0.4207 s / it)
* Acc@1 70.586 Acc@5 90.128 loss 1.397
Accuracy of the model on the 50000 test images: 70.6%
Max accuracy: 70.63%
Epoch: [73]  [   0/1251]  eta: 1:01:10  lr: 0.003657  min_lr: 0.003657  loss: 3.0475 (3.0475)  weight_decay: 0.0500 (0.0500)  time: 2.9340  data: 1.7326  max mem: 9147
Epoch: [73]  [ 200/1251]  eta: 0:03:53  lr: 0.003655  min_lr: 0.003655  loss: 2.9125 (3.3070)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6705 (0.7451)  time: 0.1978  data: 0.0007  max mem: 9147
Epoch: [73]  [ 400/1251]  eta: 0:03:07  lr: 0.003653  min_lr: 0.003653  loss: 2.8246 (3.3216)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8515 (0.7963)  time: 0.2248  data: 0.0008  max mem: 9147
Epoch: [73]  [ 600/1251]  eta: 0:02:22  lr: 0.003651  min_lr: 0.003651  loss: 2.9482 (3.3376)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7489 (0.7889)  time: 0.1960  data: 0.0007  max mem: 9147
Epoch: [73]  [ 800/1251]  eta: 0:01:37  lr: 0.003649  min_lr: 0.003649  loss: 2.6833 (3.3223)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8192 (0.7880)  time: 0.2101  data: 0.0006  max mem: 9147
Epoch: [73]  [1000/1251]  eta: 0:00:54  lr: 0.003647  min_lr: 0.003647  loss: 2.9210 (3.3077)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7655 (0.7945)  time: 0.2344  data: 0.0007  max mem: 9147
Epoch: [73]  [1200/1251]  eta: 0:00:10  lr: 0.003645  min_lr: 0.003645  loss: 2.6117 (3.3142)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9088 (0.8132)  time: 0.2049  data: 0.0007  max mem: 9147
Epoch: [73]  [1250/1251]  eta: 0:00:00  lr: 0.003644  min_lr: 0.003644  loss: 3.6112 (3.3228)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8168 (0.8119)  time: 0.1392  data: 0.0010  max mem: 9147
Epoch: [73] Total time: 0:04:28 (0.2148 s / it)
Averaged stats: lr: 0.003644  min_lr: 0.003644  loss: 3.6112 (3.3200)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8168 (0.8119)
Test:  [ 0/25]  eta: 0:02:14  loss: 1.0400 (1.0400)  acc1: 82.8000 (82.8000)  acc5: 94.8000 (94.8000)  time: 5.3729  data: 5.2770  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0828 (1.1780)  acc1: 74.8000 (74.9818)  acc5: 94.8000 (94.2545)  time: 0.6878  data: 0.5913  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4303 (1.4000)  acc1: 67.6000 (70.4191)  acc5: 90.4000 (90.3429)  time: 0.2005  data: 0.1171  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5153 (1.4278)  acc1: 67.2000 (69.8240)  acc5: 88.4000 (90.1440)  time: 0.1985  data: 0.1211  max mem: 9147
Test: Total time: 0:00:09 (0.3938 s / it)
* Acc@1 70.202 Acc@5 89.950 loss 1.427
Accuracy of the model on the 50000 test images: 70.2%
Max accuracy: 70.63%
Epoch: [74]  [   0/1251]  eta: 1:03:50  lr: 0.003644  min_lr: 0.003644  loss: 5.2726 (5.2726)  weight_decay: 0.0500 (0.0500)  time: 3.0616  data: 2.4564  max mem: 9147
Epoch: [74]  [ 200/1251]  eta: 0:03:54  lr: 0.003642  min_lr: 0.003642  loss: 2.8158 (3.3014)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6070 (0.6682)  time: 0.2063  data: 0.0007  max mem: 9147
Epoch: [74]  [ 400/1251]  eta: 0:03:03  lr: 0.003640  min_lr: 0.003640  loss: 3.1051 (3.2902)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7077 (0.7023)  time: 0.1932  data: 0.0005  max mem: 9147
Epoch: [74]  [ 600/1251]  eta: 0:02:15  lr: 0.003638  min_lr: 0.003638  loss: 2.8279 (3.2707)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6780 (0.7522)  time: 0.1927  data: 0.0006  max mem: 9147
Epoch: [74]  [ 800/1251]  eta: 0:01:33  lr: 0.003636  min_lr: 0.003636  loss: 2.9250 (3.2844)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6632 (0.7633)  time: 0.2146  data: 0.0007  max mem: 9147
Epoch: [74]  [1000/1251]  eta: 0:00:52  lr: 0.003634  min_lr: 0.003634  loss: 3.2305 (3.2820)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7134 (0.7591)  time: 0.2532  data: 0.0008  max mem: 9147
Epoch: [74]  [1200/1251]  eta: 0:00:10  lr: 0.003632  min_lr: 0.003632  loss: 2.9246 (3.2951)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7472 (0.7570)  time: 0.1989  data: 0.0005  max mem: 9147
Epoch: [74]  [1250/1251]  eta: 0:00:00  lr: 0.003631  min_lr: 0.003631  loss: 2.7006 (3.2937)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8520 (0.7655)  time: 0.1451  data: 0.0011  max mem: 9147
Epoch: [74] Total time: 0:04:18 (0.2065 s / it)
Averaged stats: lr: 0.003631  min_lr: 0.003631  loss: 2.7006 (3.3158)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8520 (0.7655)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.8480 (0.8480)  acc1: 80.4000 (80.4000)  acc5: 96.4000 (96.4000)  time: 5.4641  data: 5.3837  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1430 (1.1266)  acc1: 78.4000 (76.1091)  acc5: 94.4000 (94.5455)  time: 0.7542  data: 0.6616  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3942 (1.3468)  acc1: 69.6000 (71.4476)  acc5: 91.2000 (90.7048)  time: 0.2234  data: 0.1412  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4938 (1.3550)  acc1: 68.8000 (71.2960)  acc5: 88.4000 (90.5440)  time: 0.2191  data: 0.1412  max mem: 9147
Test: Total time: 0:00:10 (0.4118 s / it)
* Acc@1 71.202 Acc@5 90.692 loss 1.349
Accuracy of the model on the 50000 test images: 71.2%
Max accuracy: 71.20%
Epoch: [75]  [   0/1251]  eta: 0:58:04  lr: 0.003631  min_lr: 0.003631  loss: 3.0159 (3.0159)  weight_decay: 0.0500 (0.0500)  time: 2.7853  data: 2.5167  max mem: 9147
Epoch: [75]  [ 200/1251]  eta: 0:03:56  lr: 0.003629  min_lr: 0.003629  loss: 2.9167 (3.2897)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6833 (0.7970)  time: 0.2212  data: 0.0005  max mem: 9147
Epoch: [75]  [ 400/1251]  eta: 0:02:58  lr: 0.003627  min_lr: 0.003627  loss: 3.2256 (3.3030)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8253 (0.8234)  time: 0.1950  data: 0.0005  max mem: 9147
Epoch: [75]  [ 600/1251]  eta: 0:02:12  lr: 0.003625  min_lr: 0.003625  loss: 3.1717 (3.3035)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6969 (0.7756)  time: 0.1950  data: 0.0005  max mem: 9147
Epoch: [75]  [ 800/1251]  eta: 0:01:31  lr: 0.003623  min_lr: 0.003623  loss: 2.7594 (3.3023)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6948 (0.7934)  time: 0.2201  data: 0.0007  max mem: 9147
Epoch: [75]  [1000/1251]  eta: 0:00:51  lr: 0.003621  min_lr: 0.003621  loss: 3.5742 (3.3065)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7048 (0.7777)  time: 0.2070  data: 0.0007  max mem: 9147
Epoch: [75]  [1200/1251]  eta: 0:00:10  lr: 0.003619  min_lr: 0.003619  loss: 2.7660 (3.3138)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8504 (0.7849)  time: 0.2253  data: 0.0006  max mem: 9147
Epoch: [75]  [1250/1251]  eta: 0:00:00  lr: 0.003618  min_lr: 0.003618  loss: 3.1076 (3.3154)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6944 (0.7822)  time: 0.1392  data: 0.0013  max mem: 9147
Epoch: [75] Total time: 0:04:20 (0.2081 s / it)
Averaged stats: lr: 0.003618  min_lr: 0.003618  loss: 3.1076 (3.3206)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6944 (0.7822)
Test:  [ 0/25]  eta: 0:02:16  loss: 1.0550 (1.0550)  acc1: 79.2000 (79.2000)  acc5: 92.0000 (92.0000)  time: 5.4556  data: 5.3740  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0847 (1.1293)  acc1: 77.2000 (76.0727)  acc5: 94.4000 (93.4909)  time: 0.7005  data: 0.6259  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3858 (1.3585)  acc1: 67.6000 (71.0286)  acc5: 88.8000 (90.4571)  time: 0.1962  data: 0.1215  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4703 (1.3840)  acc1: 66.8000 (70.4000)  acc5: 88.0000 (90.2560)  time: 0.1948  data: 0.1214  max mem: 9147
Test: Total time: 0:00:09 (0.3929 s / it)
* Acc@1 70.616 Acc@5 90.324 loss 1.373
Accuracy of the model on the 50000 test images: 70.6%
Max accuracy: 71.20%
Epoch: [76]  [   0/1251]  eta: 1:04:16  lr: 0.003618  min_lr: 0.003618  loss: 3.9593 (3.9593)  weight_decay: 0.0500 (0.0500)  time: 3.0823  data: 1.5218  max mem: 9147
Epoch: [76]  [ 200/1251]  eta: 0:03:57  lr: 0.003616  min_lr: 0.003616  loss: 3.8373 (3.3244)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7932 (0.8631)  time: 0.2062  data: 0.0006  max mem: 9147
Epoch: [76]  [ 400/1251]  eta: 0:03:08  lr: 0.003614  min_lr: 0.003614  loss: 2.6877 (3.3116)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6369 (0.8211)  time: 0.2058  data: 0.0007  max mem: 9147
Epoch: [76]  [ 600/1251]  eta: 0:02:21  lr: 0.003612  min_lr: 0.003612  loss: 2.8365 (3.2937)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6653 (0.8143)  time: 0.1900  data: 0.0017  max mem: 9147
Epoch: [76]  [ 800/1251]  eta: 0:01:35  lr: 0.003610  min_lr: 0.003610  loss: 2.7696 (3.3332)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8800 (0.8199)  time: 0.2093  data: 0.0006  max mem: 9147
Epoch: [76]  [1000/1251]  eta: 0:00:53  lr: 0.003607  min_lr: 0.003607  loss: 2.7913 (3.3369)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7226 (0.8075)  time: 0.1944  data: 0.0010  max mem: 9147
Epoch: [76]  [1200/1251]  eta: 0:00:10  lr: 0.003605  min_lr: 0.003605  loss: 3.1432 (3.3331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7848 (0.8100)  time: 0.1903  data: 0.0010  max mem: 9147
Epoch: [76]  [1250/1251]  eta: 0:00:00  lr: 0.003605  min_lr: 0.003605  loss: 2.6604 (3.3260)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7716 (0.8073)  time: 0.1385  data: 0.0012  max mem: 9147
Epoch: [76] Total time: 0:04:20 (0.2081 s / it)
Averaged stats: lr: 0.003605  min_lr: 0.003605  loss: 2.6604 (3.3371)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7716 (0.8073)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.8981 (0.8981)  acc1: 80.4000 (80.4000)  acc5: 95.2000 (95.2000)  time: 5.4507  data: 5.3703  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0691 (1.1226)  acc1: 74.8000 (75.4182)  acc5: 94.8000 (94.0364)  time: 0.7710  data: 0.6776  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3690 (1.3502)  acc1: 69.6000 (71.0667)  acc5: 90.0000 (90.5714)  time: 0.2323  data: 0.1502  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3984 (1.3699)  acc1: 69.6000 (70.5600)  acc5: 89.6000 (90.4480)  time: 0.2275  data: 0.1501  max mem: 9147
Test: Total time: 0:00:10 (0.4181 s / it)
* Acc@1 70.846 Acc@5 90.346 loss 1.359
Accuracy of the model on the 50000 test images: 70.8%
Max accuracy: 71.20%
Epoch: [77]  [   0/1251]  eta: 1:02:57  lr: 0.003605  min_lr: 0.003605  loss: 3.0670 (3.0670)  weight_decay: 0.0500 (0.0500)  time: 3.0196  data: 2.8251  max mem: 9147
Epoch: [77]  [ 200/1251]  eta: 0:03:58  lr: 0.003603  min_lr: 0.003603  loss: 3.3429 (3.3426)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8316 (0.8127)  time: 0.1953  data: 0.0013  max mem: 9147
Epoch: [77]  [ 400/1251]  eta: 0:03:04  lr: 0.003601  min_lr: 0.003601  loss: 2.9903 (3.3035)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7623 (0.7958)  time: 0.2288  data: 0.0007  max mem: 9147
Epoch: [77]  [ 600/1251]  eta: 0:02:18  lr: 0.003598  min_lr: 0.003598  loss: 2.8140 (3.3226)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9111 (0.8145)  time: 0.2149  data: 0.0006  max mem: 9147
Epoch: [77]  [ 800/1251]  eta: 0:01:36  lr: 0.003596  min_lr: 0.003596  loss: 2.8875 (3.3210)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6898 (0.7962)  time: 0.2050  data: 0.0007  max mem: 9147
Epoch: [77]  [1000/1251]  eta: 0:00:53  lr: 0.003594  min_lr: 0.003594  loss: 2.7465 (3.3053)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7373 (0.7997)  time: 0.2142  data: 0.0095  max mem: 9147
Epoch: [77]  [1200/1251]  eta: 0:00:10  lr: 0.003592  min_lr: 0.003592  loss: 3.0578 (3.3225)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7141 (0.7982)  time: 0.2142  data: 0.0006  max mem: 9147
Epoch: [77]  [1250/1251]  eta: 0:00:00  lr: 0.003591  min_lr: 0.003591  loss: 2.9116 (3.3206)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8975 (0.8018)  time: 0.1420  data: 0.0013  max mem: 9147
Epoch: [77] Total time: 0:04:27 (0.2139 s / it)
Averaged stats: lr: 0.003591  min_lr: 0.003591  loss: 2.9116 (3.3282)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8975 (0.8018)
Test:  [ 0/25]  eta: 0:02:12  loss: 0.9190 (0.9190)  acc1: 81.2000 (81.2000)  acc5: 96.8000 (96.8000)  time: 5.2976  data: 5.2170  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0131 (1.1112)  acc1: 78.0000 (77.1636)  acc5: 95.6000 (94.4000)  time: 0.7501  data: 0.6555  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4187 (1.3574)  acc1: 68.8000 (71.5810)  acc5: 89.2000 (90.9524)  time: 0.2137  data: 0.1309  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5242 (1.3737)  acc1: 68.4000 (71.1360)  acc5: 88.8000 (90.8000)  time: 0.2083  data: 0.1309  max mem: 9147
Test: Total time: 0:00:09 (0.3994 s / it)
* Acc@1 70.904 Acc@5 90.460 loss 1.373
Accuracy of the model on the 50000 test images: 70.9%
Max accuracy: 71.20%
Epoch: [78]  [   0/1251]  eta: 0:59:45  lr: 0.003591  min_lr: 0.003591  loss: 2.4120 (2.4120)  weight_decay: 0.0500 (0.0500)  time: 2.8659  data: 2.2344  max mem: 9147
Epoch: [78]  [ 200/1251]  eta: 0:03:56  lr: 0.003589  min_lr: 0.003589  loss: 2.5820 (3.3620)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7098 (0.7661)  time: 0.2168  data: 0.0007  max mem: 9147
Epoch: [78]  [ 400/1251]  eta: 0:03:07  lr: 0.003587  min_lr: 0.003587  loss: 2.8364 (3.3708)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6610 (0.7327)  time: 0.1998  data: 0.0007  max mem: 9147
Epoch: [78]  [ 600/1251]  eta: 0:02:21  lr: 0.003585  min_lr: 0.003585  loss: 3.9396 (3.3620)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7484 (0.7673)  time: 0.2098  data: 0.0006  max mem: 9147
Epoch: [78]  [ 800/1251]  eta: 0:01:36  lr: 0.003583  min_lr: 0.003583  loss: 3.1060 (3.3348)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6879 (0.7933)  time: 0.1979  data: 0.0005  max mem: 9147
Epoch: [78]  [1000/1251]  eta: 0:00:52  lr: 0.003580  min_lr: 0.003580  loss: 3.8551 (3.3269)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7104 (0.7957)  time: 0.1904  data: 0.0005  max mem: 9147
Epoch: [78]  [1200/1251]  eta: 0:00:10  lr: 0.003578  min_lr: 0.003578  loss: 3.4400 (3.3287)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7359 (0.7923)  time: 0.2001  data: 0.0008  max mem: 9147
Epoch: [78]  [1250/1251]  eta: 0:00:00  lr: 0.003578  min_lr: 0.003578  loss: 2.7453 (3.3289)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7031 (0.7901)  time: 0.1573  data: 0.0010  max mem: 9147
Epoch: [78] Total time: 0:04:18 (0.2069 s / it)
Averaged stats: lr: 0.003578  min_lr: 0.003578  loss: 2.7453 (3.3152)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7031 (0.7901)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.9311 (0.9311)  acc1: 81.6000 (81.6000)  acc5: 95.2000 (95.2000)  time: 5.5277  data: 5.4366  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.1073 (1.1323)  acc1: 76.4000 (76.0727)  acc5: 94.8000 (94.2909)  time: 0.7320  data: 0.6362  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3951 (1.3503)  acc1: 68.4000 (71.2381)  acc5: 89.2000 (90.9333)  time: 0.2055  data: 0.1227  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5325 (1.3597)  acc1: 67.6000 (71.0560)  acc5: 88.4000 (90.9280)  time: 0.2034  data: 0.1266  max mem: 9147
Test: Total time: 0:00:10 (0.4050 s / it)
* Acc@1 71.016 Acc@5 90.662 loss 1.354
Accuracy of the model on the 50000 test images: 71.0%
Max accuracy: 71.20%
Epoch: [79]  [   0/1251]  eta: 1:05:22  lr: 0.003578  min_lr: 0.003578  loss: 4.2560 (4.2560)  weight_decay: 0.0500 (0.0500)  time: 3.1353  data: 1.5922  max mem: 9147
Epoch: [79]  [ 200/1251]  eta: 0:03:49  lr: 0.003575  min_lr: 0.003575  loss: 2.7378 (3.2486)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8049 (0.7700)  time: 0.1926  data: 0.0006  max mem: 9147
Epoch: [79]  [ 400/1251]  eta: 0:02:57  lr: 0.003573  min_lr: 0.003573  loss: 2.9284 (3.2892)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7976 (0.7574)  time: 0.2090  data: 0.0007  max mem: 9147
Epoch: [79]  [ 600/1251]  eta: 0:02:18  lr: 0.003571  min_lr: 0.003571  loss: 2.9351 (3.2664)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7424 (0.7858)  time: 0.2246  data: 0.0006  max mem: 9147
Epoch: [79]  [ 800/1251]  eta: 0:01:36  lr: 0.003569  min_lr: 0.003569  loss: 3.2087 (3.2848)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6831 (0.7844)  time: 0.2337  data: 0.0007  max mem: 9147
Epoch: [79]  [1000/1251]  eta: 0:00:53  lr: 0.003567  min_lr: 0.003567  loss: 3.1423 (3.3117)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6855 (0.7914)  time: 0.2164  data: 0.0007  max mem: 9147
Epoch: [79]  [1200/1251]  eta: 0:00:10  lr: 0.003564  min_lr: 0.003564  loss: 2.7646 (3.3247)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9082 (0.7945)  time: 0.2324  data: 0.0010  max mem: 9147
Epoch: [79]  [1250/1251]  eta: 0:00:00  lr: 0.003564  min_lr: 0.003564  loss: 2.8403 (3.3215)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8594 (0.7918)  time: 0.1468  data: 0.0013  max mem: 9147
Epoch: [79] Total time: 0:04:25 (0.2121 s / it)
Averaged stats: lr: 0.003564  min_lr: 0.003564  loss: 2.8403 (3.3076)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8594 (0.7918)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.9274 (0.9274)  acc1: 82.0000 (82.0000)  acc5: 95.2000 (95.2000)  time: 5.6617  data: 5.5815  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1032 (1.1315)  acc1: 75.2000 (75.5273)  acc5: 95.2000 (94.1091)  time: 0.7867  data: 0.6977  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4431 (1.3696)  acc1: 67.2000 (71.3714)  acc5: 90.0000 (90.5905)  time: 0.2081  data: 0.1280  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5855 (1.3832)  acc1: 67.2000 (71.1040)  acc5: 88.0000 (90.2720)  time: 0.2020  data: 0.1279  max mem: 9147
Test: Total time: 0:00:10 (0.4081 s / it)
* Acc@1 71.026 Acc@5 90.440 loss 1.382
Accuracy of the model on the 50000 test images: 71.0%
Max accuracy: 71.20%
Epoch: [80]  [   0/1251]  eta: 1:00:22  lr: 0.003564  min_lr: 0.003564  loss: 2.3114 (2.3114)  weight_decay: 0.0500 (0.0500)  time: 2.8956  data: 2.3015  max mem: 9147
Epoch: [80]  [ 200/1251]  eta: 0:03:56  lr: 0.003562  min_lr: 0.003562  loss: 2.7099 (3.3518)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7092 (0.8693)  time: 0.1990  data: 0.0005  max mem: 9147
Epoch: [80]  [ 400/1251]  eta: 0:03:06  lr: 0.003559  min_lr: 0.003559  loss: 2.9297 (3.3130)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7334 (0.8263)  time: 0.2050  data: 0.0006  max mem: 9147
Epoch: [80]  [ 600/1251]  eta: 0:02:18  lr: 0.003557  min_lr: 0.003557  loss: 2.9739 (3.2978)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7476 (0.8115)  time: 0.2000  data: 0.0006  max mem: 9147
Epoch: [80]  [ 800/1251]  eta: 0:01:34  lr: 0.003555  min_lr: 0.003555  loss: 3.6843 (3.3099)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6726 (0.8028)  time: 0.2154  data: 0.0007  max mem: 9147
Epoch: [80]  [1000/1251]  eta: 0:00:52  lr: 0.003553  min_lr: 0.003553  loss: 2.8281 (3.3141)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7180 (0.8100)  time: 0.2249  data: 0.0007  max mem: 9147
Epoch: [80]  [1200/1251]  eta: 0:00:10  lr: 0.003550  min_lr: 0.003550  loss: 2.8675 (3.3136)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6665 (0.8080)  time: 0.2003  data: 0.0006  max mem: 9147
Epoch: [80]  [1250/1251]  eta: 0:00:00  lr: 0.003550  min_lr: 0.003550  loss: 2.8045 (3.3098)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7523 (0.8054)  time: 0.1415  data: 0.0014  max mem: 9147
Epoch: [80] Total time: 0:04:24 (0.2115 s / it)
Averaged stats: lr: 0.003550  min_lr: 0.003550  loss: 2.8045 (3.3055)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7523 (0.8054)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.8475 (0.8475)  acc1: 83.6000 (83.6000)  acc5: 96.4000 (96.4000)  time: 5.7088  data: 5.6021  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9943 (1.0582)  acc1: 76.4000 (76.3636)  acc5: 95.2000 (94.2545)  time: 0.6810  data: 0.6028  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3930 (1.3148)  acc1: 67.6000 (71.1810)  acc5: 89.6000 (90.7048)  time: 0.1713  data: 0.0981  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4696 (1.3278)  acc1: 67.6000 (71.1200)  acc5: 88.0000 (90.5760)  time: 0.1997  data: 0.1280  max mem: 9147
Test: Total time: 0:00:10 (0.4066 s / it)
* Acc@1 70.796 Acc@5 90.462 loss 1.328
Accuracy of the model on the 50000 test images: 70.8%
Max accuracy: 71.20%
Epoch: [81]  [   0/1251]  eta: 1:06:44  lr: 0.003550  min_lr: 0.003550  loss: 2.6340 (2.6340)  weight_decay: 0.0500 (0.0500)  time: 3.2013  data: 2.2126  max mem: 9147
Epoch: [81]  [ 200/1251]  eta: 0:03:55  lr: 0.003547  min_lr: 0.003547  loss: 2.6994 (3.3537)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7403 (0.8107)  time: 0.2361  data: 0.0274  max mem: 9147
Epoch: [81]  [ 400/1251]  eta: 0:03:08  lr: 0.003545  min_lr: 0.003545  loss: 3.1604 (3.3079)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7471 (0.8291)  time: 0.2137  data: 0.0005  max mem: 9147
Epoch: [81]  [ 600/1251]  eta: 0:02:21  lr: 0.003543  min_lr: 0.003543  loss: 3.0211 (3.2935)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6495 (0.8197)  time: 0.2140  data: 0.0012  max mem: 9147
Epoch: [81]  [ 800/1251]  eta: 0:01:36  lr: 0.003541  min_lr: 0.003541  loss: 2.8404 (3.2862)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.2155  data: 0.0071  max mem: 9147
Epoch: [81]  [1000/1251]  eta: 0:00:54  lr: 0.003538  min_lr: 0.003538  loss: 3.0022 (3.2866)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7730 (nan)  time: 0.2151  data: 0.0006  max mem: 9147
Epoch: [81]  [1200/1251]  eta: 0:00:10  lr: 0.003536  min_lr: 0.003536  loss: 2.8245 (3.2920)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7282 (nan)  time: 0.2150  data: 0.0008  max mem: 9147
Epoch: [81]  [1250/1251]  eta: 0:00:00  lr: 0.003535  min_lr: 0.003535  loss: 2.7005 (3.2911)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8461 (nan)  time: 0.1403  data: 0.0016  max mem: 9147
Epoch: [81] Total time: 0:04:28 (0.2145 s / it)
Averaged stats: lr: 0.003535  min_lr: 0.003535  loss: 2.7005 (3.2907)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8461 (nan)
Test:  [ 0/25]  eta: 0:02:20  loss: 1.0337 (1.0337)  acc1: 79.2000 (79.2000)  acc5: 95.2000 (95.2000)  time: 5.6360  data: 5.5554  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1079 (1.1923)  acc1: 76.8000 (75.2364)  acc5: 94.4000 (93.6364)  time: 0.7534  data: 0.6643  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4777 (1.3892)  acc1: 69.6000 (70.7619)  acc5: 89.6000 (90.4191)  time: 0.2068  data: 0.1266  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5210 (1.4024)  acc1: 66.8000 (70.5120)  acc5: 88.8000 (90.2880)  time: 0.2042  data: 0.1293  max mem: 9147
Test: Total time: 0:00:10 (0.4072 s / it)
* Acc@1 70.752 Acc@5 90.400 loss 1.394
Accuracy of the model on the 50000 test images: 70.8%
Max accuracy: 71.20%
Epoch: [82]  [   0/1251]  eta: 1:05:27  lr: 0.003535  min_lr: 0.003535  loss: 2.3049 (2.3049)  weight_decay: 0.0500 (0.0500)  time: 3.1393  data: 2.1923  max mem: 9147
Epoch: [82]  [ 200/1251]  eta: 0:03:36  lr: 0.003533  min_lr: 0.003533  loss: 3.3788 (3.2024)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8455 (0.7948)  time: 0.1838  data: 0.0011  max mem: 9147
Epoch: [82]  [ 400/1251]  eta: 0:02:49  lr: 0.003531  min_lr: 0.003531  loss: 3.0435 (3.3031)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8914 (0.8174)  time: 0.2006  data: 0.0007  max mem: 9147
Epoch: [82]  [ 600/1251]  eta: 0:02:12  lr: 0.003528  min_lr: 0.003528  loss: 3.4373 (3.3131)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6917 (0.8135)  time: 0.1907  data: 0.0005  max mem: 9147
Epoch: [82]  [ 800/1251]  eta: 0:01:30  lr: 0.003526  min_lr: 0.003526  loss: 2.7515 (3.3248)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7680 (0.8002)  time: 0.2145  data: 0.0007  max mem: 9147
Epoch: [82]  [1000/1251]  eta: 0:00:51  lr: 0.003524  min_lr: 0.003524  loss: 3.1787 (3.3082)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7857 (0.8040)  time: 0.2250  data: 0.0006  max mem: 9147
Epoch: [82]  [1200/1251]  eta: 0:00:10  lr: 0.003521  min_lr: 0.003521  loss: 2.8661 (3.2970)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8701 (0.8106)  time: 0.2155  data: 0.0006  max mem: 9147
Epoch: [82]  [1250/1251]  eta: 0:00:00  lr: 0.003521  min_lr: 0.003521  loss: 3.5599 (3.3061)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8871 (0.8140)  time: 0.1404  data: 0.0010  max mem: 9147
Epoch: [82] Total time: 0:04:18 (0.2069 s / it)
Averaged stats: lr: 0.003521  min_lr: 0.003521  loss: 3.5599 (3.3149)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8871 (0.8140)
Test:  [ 0/25]  eta: 0:01:20  loss: 0.8556 (0.8556)  acc1: 80.8000 (80.8000)  acc5: 96.4000 (96.4000)  time: 3.2000  data: 3.1198  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 1.1231 (1.1412)  acc1: 78.0000 (76.8364)  acc5: 94.8000 (93.6727)  time: 0.5630  data: 0.4893  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4488 (1.3720)  acc1: 70.8000 (71.9238)  acc5: 89.6000 (90.6095)  time: 0.2710  data: 0.1985  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5210 (1.3876)  acc1: 67.6000 (71.6160)  acc5: 88.8000 (90.5280)  time: 0.2140  data: 0.1424  max mem: 9147
Test: Total time: 0:00:09 (0.3987 s / it)
* Acc@1 71.230 Acc@5 90.532 loss 1.390
Accuracy of the model on the 50000 test images: 71.2%
Max accuracy: 71.23%
Epoch: [83]  [   0/1251]  eta: 0:55:14  lr: 0.003521  min_lr: 0.003521  loss: 3.8108 (3.8108)  weight_decay: 0.0500 (0.0500)  time: 2.6494  data: 1.9855  max mem: 9147
Epoch: [83]  [ 200/1251]  eta: 0:03:56  lr: 0.003519  min_lr: 0.003519  loss: 3.0326 (3.2555)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7512 (0.8286)  time: 0.2146  data: 0.0006  max mem: 9147
Epoch: [83]  [ 400/1251]  eta: 0:03:07  lr: 0.003516  min_lr: 0.003516  loss: 2.8464 (3.2598)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8296 (0.8081)  time: 0.2290  data: 0.0007  max mem: 9147
Epoch: [83]  [ 600/1251]  eta: 0:02:22  lr: 0.003514  min_lr: 0.003514  loss: 3.0775 (3.2561)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7618 (0.8166)  time: 0.2194  data: 0.0196  max mem: 9147
Epoch: [83]  [ 800/1251]  eta: 0:01:38  lr: 0.003512  min_lr: 0.003512  loss: 2.7398 (3.2505)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7685 (0.8113)  time: 0.2201  data: 0.0007  max mem: 9147
Epoch: [83]  [1000/1251]  eta: 0:00:54  lr: 0.003509  min_lr: 0.003509  loss: 3.0643 (3.2542)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6855 (0.8021)  time: 0.1952  data: 0.0005  max mem: 9147
Epoch: [83]  [1200/1251]  eta: 0:00:10  lr: 0.003507  min_lr: 0.003507  loss: 2.7700 (3.2383)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7545 (0.7946)  time: 0.1902  data: 0.0006  max mem: 9147
Epoch: [83]  [1250/1251]  eta: 0:00:00  lr: 0.003506  min_lr: 0.003506  loss: 3.1956 (3.2404)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7955 (0.7976)  time: 0.1382  data: 0.0008  max mem: 9147
Epoch: [83] Total time: 0:04:25 (0.2126 s / it)
Averaged stats: lr: 0.003506  min_lr: 0.003506  loss: 3.1956 (3.2965)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7955 (0.7976)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.9011 (0.9011)  acc1: 83.6000 (83.6000)  acc5: 96.0000 (96.0000)  time: 5.5993  data: 5.5188  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0891 (1.1107)  acc1: 79.6000 (76.5818)  acc5: 95.6000 (94.5091)  time: 0.7665  data: 0.6704  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3686 (1.3308)  acc1: 70.4000 (72.0571)  acc5: 89.6000 (90.7238)  time: 0.2051  data: 0.1202  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4623 (1.3452)  acc1: 70.0000 (71.5840)  acc5: 88.0000 (90.5760)  time: 0.1994  data: 0.1201  max mem: 9147
Test: Total time: 0:00:10 (0.4026 s / it)
* Acc@1 71.488 Acc@5 90.758 loss 1.345
Accuracy of the model on the 50000 test images: 71.5%
Max accuracy: 71.49%
Epoch: [84]  [   0/1251]  eta: 0:59:24  lr: 0.003506  min_lr: 0.003506  loss: 4.4294 (4.4294)  weight_decay: 0.0500 (0.0500)  time: 2.8493  data: 2.4838  max mem: 9147
Epoch: [84]  [ 200/1251]  eta: 0:03:58  lr: 0.003504  min_lr: 0.003504  loss: 3.0504 (3.2698)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7659 (0.8573)  time: 0.2290  data: 0.0102  max mem: 9147
Epoch: [84]  [ 400/1251]  eta: 0:03:06  lr: 0.003502  min_lr: 0.003502  loss: 2.8815 (3.2631)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8749 (0.8885)  time: 0.1952  data: 0.0005  max mem: 9147
Epoch: [84]  [ 600/1251]  eta: 0:02:20  lr: 0.003499  min_lr: 0.003499  loss: 2.9015 (3.2690)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6593 (0.8407)  time: 0.2219  data: 0.0007  max mem: 9147
Epoch: [84]  [ 800/1251]  eta: 0:01:34  lr: 0.003497  min_lr: 0.003497  loss: 3.3012 (3.2728)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7095 (0.8281)  time: 0.1900  data: 0.0006  max mem: 9147
Epoch: [84]  [1000/1251]  eta: 0:00:51  lr: 0.003494  min_lr: 0.003494  loss: 3.1083 (3.2889)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7436 (0.8219)  time: 0.2008  data: 0.0007  max mem: 9147
Epoch: [84]  [1200/1251]  eta: 0:00:10  lr: 0.003492  min_lr: 0.003492  loss: 3.6119 (3.2945)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7066 (0.8227)  time: 0.2150  data: 0.0006  max mem: 9147
Epoch: [84]  [1250/1251]  eta: 0:00:00  lr: 0.003491  min_lr: 0.003491  loss: 2.6995 (3.2897)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6607 (0.8177)  time: 0.1397  data: 0.0015  max mem: 9147
Epoch: [84] Total time: 0:04:21 (0.2090 s / it)
Averaged stats: lr: 0.003491  min_lr: 0.003491  loss: 2.6995 (3.3002)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6607 (0.8177)
Test:  [ 0/25]  eta: 0:02:27  loss: 0.9335 (0.9335)  acc1: 80.0000 (80.0000)  acc5: 94.4000 (94.4000)  time: 5.9148  data: 5.8179  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0356 (1.0504)  acc1: 76.4000 (76.5455)  acc5: 94.4000 (94.5091)  time: 0.7994  data: 0.7171  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3607 (1.2863)  acc1: 69.6000 (71.6762)  acc5: 90.8000 (91.2762)  time: 0.2096  data: 0.1336  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3663 (1.3064)  acc1: 67.2000 (71.0560)  acc5: 88.4000 (91.0400)  time: 0.2091  data: 0.1358  max mem: 9147
Test: Total time: 0:00:10 (0.4205 s / it)
* Acc@1 71.330 Acc@5 90.976 loss 1.303
Accuracy of the model on the 50000 test images: 71.3%
Max accuracy: 71.49%
Epoch: [85]  [   0/1251]  eta: 1:05:23  lr: 0.003491  min_lr: 0.003491  loss: 2.4468 (2.4468)  weight_decay: 0.0500 (0.0500)  time: 3.1361  data: 2.2343  max mem: 9147
Epoch: [85]  [ 200/1251]  eta: 0:03:53  lr: 0.003489  min_lr: 0.003489  loss: 2.8039 (3.2595)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6879 (0.7716)  time: 0.1951  data: 0.0014  max mem: 9147
Epoch: [85]  [ 400/1251]  eta: 0:02:54  lr: 0.003487  min_lr: 0.003487  loss: 2.6594 (3.2438)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7959 (0.7762)  time: 0.1805  data: 0.0006  max mem: 9147
Epoch: [85]  [ 600/1251]  eta: 0:02:09  lr: 0.003484  min_lr: 0.003484  loss: 2.9617 (3.2754)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8229 (0.7954)  time: 0.1904  data: 0.0011  max mem: 9147
Epoch: [85]  [ 800/1251]  eta: 0:01:28  lr: 0.003482  min_lr: 0.003482  loss: 2.6246 (3.2710)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8577 (0.8039)  time: 0.1952  data: 0.0005  max mem: 9147
Epoch: [85]  [1000/1251]  eta: 0:00:49  lr: 0.003479  min_lr: 0.003479  loss: 2.7927 (3.2750)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7241 (0.8102)  time: 0.1910  data: 0.0007  max mem: 9147
Epoch: [85]  [1200/1251]  eta: 0:00:10  lr: 0.003477  min_lr: 0.003477  loss: 2.8667 (3.2858)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6636 (0.7912)  time: 0.2392  data: 0.0007  max mem: 9147
Epoch: [85]  [1250/1251]  eta: 0:00:00  lr: 0.003476  min_lr: 0.003476  loss: 2.6958 (3.2847)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7053 (0.7997)  time: 0.1389  data: 0.0011  max mem: 9147
Epoch: [85] Total time: 0:04:08 (0.1985 s / it)
Averaged stats: lr: 0.003476  min_lr: 0.003476  loss: 2.6958 (3.2977)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7053 (0.7997)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.8891 (0.8891)  acc1: 83.2000 (83.2000)  acc5: 96.0000 (96.0000)  time: 5.6296  data: 5.5493  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0520 (1.1143)  acc1: 78.0000 (76.8000)  acc5: 94.8000 (94.5091)  time: 0.7600  data: 0.6847  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3689 (1.3084)  acc1: 68.8000 (72.0762)  acc5: 91.6000 (91.3524)  time: 0.2134  data: 0.1406  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4400 (1.3276)  acc1: 68.8000 (71.6800)  acc5: 90.0000 (91.1360)  time: 0.2121  data: 0.1406  max mem: 9147
Test: Total time: 0:00:10 (0.4101 s / it)
* Acc@1 71.438 Acc@5 90.772 loss 1.337
Accuracy of the model on the 50000 test images: 71.4%
Max accuracy: 71.49%
Epoch: [86]  [   0/1251]  eta: 1:03:42  lr: 0.003476  min_lr: 0.003476  loss: 2.4486 (2.4486)  weight_decay: 0.0500 (0.0500)  time: 3.0554  data: 2.4871  max mem: 9147
Epoch: [86]  [ 200/1251]  eta: 0:03:52  lr: 0.003474  min_lr: 0.003474  loss: 2.7219 (3.2836)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6615 (0.8567)  time: 0.2005  data: 0.0006  max mem: 9147
Epoch: [86]  [ 400/1251]  eta: 0:03:05  lr: 0.003472  min_lr: 0.003472  loss: 2.5823 (3.2810)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7099 (0.8228)  time: 0.2016  data: 0.0007  max mem: 9147
Epoch: [86]  [ 600/1251]  eta: 0:02:21  lr: 0.003469  min_lr: 0.003469  loss: 2.7733 (3.2833)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8138 (0.8189)  time: 0.2075  data: 0.0006  max mem: 9147
Epoch: [86]  [ 800/1251]  eta: 0:01:37  lr: 0.003467  min_lr: 0.003467  loss: 2.9839 (3.2784)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6523 (0.8073)  time: 0.1906  data: 0.0005  max mem: 9147
Epoch: [86]  [1000/1251]  eta: 0:00:53  lr: 0.003464  min_lr: 0.003464  loss: 2.8261 (3.2777)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6944 (0.8153)  time: 0.2251  data: 0.0007  max mem: 9147
Epoch: [86]  [1200/1251]  eta: 0:00:10  lr: 0.003462  min_lr: 0.003462  loss: 2.8512 (3.2972)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8407 (0.8047)  time: 0.2240  data: 0.0006  max mem: 9147
Epoch: [86]  [1250/1251]  eta: 0:00:00  lr: 0.003461  min_lr: 0.003461  loss: 3.3395 (3.2963)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7948 (0.8033)  time: 0.1422  data: 0.0010  max mem: 9147
Epoch: [86] Total time: 0:04:27 (0.2140 s / it)
Averaged stats: lr: 0.003461  min_lr: 0.003461  loss: 3.3395 (3.2956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7948 (0.8033)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.8767 (0.8767)  acc1: 83.6000 (83.6000)  acc5: 96.8000 (96.8000)  time: 5.5699  data: 5.4857  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.1145 (1.1273)  acc1: 76.8000 (75.6000)  acc5: 94.4000 (93.9636)  time: 0.6877  data: 0.5991  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4057 (1.3467)  acc1: 69.2000 (71.2571)  acc5: 90.4000 (90.7048)  time: 0.1808  data: 0.1017  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5083 (1.3594)  acc1: 69.2000 (70.9920)  acc5: 88.4000 (90.5440)  time: 0.1871  data: 0.1140  max mem: 9147
Test: Total time: 0:00:09 (0.3916 s / it)
* Acc@1 71.302 Acc@5 90.624 loss 1.350
Accuracy of the model on the 50000 test images: 71.3%
Max accuracy: 71.49%
Epoch: [87]  [   0/1251]  eta: 1:04:30  lr: 0.003461  min_lr: 0.003461  loss: 2.1668 (2.1668)  weight_decay: 0.0500 (0.0500)  time: 3.0941  data: 2.3134  max mem: 9147
Epoch: [87]  [ 200/1251]  eta: 0:03:48  lr: 0.003459  min_lr: 0.003459  loss: 3.1717 (3.1992)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7500 (0.7638)  time: 0.1917  data: 0.0006  max mem: 9147
Epoch: [87]  [ 400/1251]  eta: 0:02:56  lr: 0.003456  min_lr: 0.003456  loss: 2.6207 (3.2122)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7188 (0.7757)  time: 0.1992  data: 0.0006  max mem: 9147
Epoch: [87]  [ 600/1251]  eta: 0:02:16  lr: 0.003454  min_lr: 0.003454  loss: 2.9355 (3.2275)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6988 (0.8189)  time: 0.2154  data: 0.0130  max mem: 9147
Epoch: [87]  [ 800/1251]  eta: 0:01:35  lr: 0.003451  min_lr: 0.003451  loss: 3.3550 (3.2647)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7391 (0.8338)  time: 0.2135  data: 0.0007  max mem: 9147
Epoch: [87]  [1000/1251]  eta: 0:00:53  lr: 0.003449  min_lr: 0.003449  loss: 2.8528 (3.2364)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7027 (0.8194)  time: 0.2013  data: 0.0007  max mem: 9147
Epoch: [87]  [1200/1251]  eta: 0:00:10  lr: 0.003446  min_lr: 0.003446  loss: 2.9144 (3.2455)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6374 (0.8038)  time: 0.1999  data: 0.0007  max mem: 9147
Epoch: [87]  [1250/1251]  eta: 0:00:00  lr: 0.003446  min_lr: 0.003446  loss: 2.7046 (3.2504)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6058 (0.7991)  time: 0.1432  data: 0.0014  max mem: 9147
Epoch: [87] Total time: 0:04:24 (0.2111 s / it)
Averaged stats: lr: 0.003446  min_lr: 0.003446  loss: 2.7046 (3.2691)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6058 (0.7991)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.9782 (0.9782)  acc1: 82.4000 (82.4000)  acc5: 93.6000 (93.6000)  time: 5.5895  data: 5.5091  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1009 (1.1327)  acc1: 77.6000 (76.3636)  acc5: 94.4000 (94.0000)  time: 0.7656  data: 0.6693  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4541 (1.3415)  acc1: 68.0000 (71.9810)  acc5: 90.8000 (90.7238)  time: 0.2112  data: 0.1261  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5070 (1.3534)  acc1: 68.0000 (71.6160)  acc5: 88.8000 (90.6720)  time: 0.2106  data: 0.1314  max mem: 9147
Test: Total time: 0:00:10 (0.4109 s / it)
* Acc@1 71.940 Acc@5 90.942 loss 1.346
Accuracy of the model on the 50000 test images: 71.9%
Max accuracy: 71.94%
Epoch: [88]  [   0/1251]  eta: 0:59:38  lr: 0.003446  min_lr: 0.003446  loss: 2.7053 (2.7053)  weight_decay: 0.0500 (0.0500)  time: 2.8608  data: 2.6519  max mem: 9147
Epoch: [88]  [ 200/1251]  eta: 0:03:35  lr: 0.003443  min_lr: 0.003443  loss: 2.8590 (3.3156)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6906 (0.7734)  time: 0.1902  data: 0.0005  max mem: 9147
Epoch: [88]  [ 400/1251]  eta: 0:02:52  lr: 0.003441  min_lr: 0.003441  loss: 2.7863 (3.2929)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7454 (inf)  time: 0.2378  data: 0.0024  max mem: 9147
Epoch: [88]  [ 600/1251]  eta: 0:02:14  lr: 0.003438  min_lr: 0.003438  loss: 2.8703 (3.2768)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7941 (inf)  time: 0.2048  data: 0.0007  max mem: 9147
Epoch: [88]  [ 800/1251]  eta: 0:01:32  lr: 0.003436  min_lr: 0.003436  loss: 2.6259 (3.2587)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7048 (inf)  time: 0.1882  data: 0.0017  max mem: 9147
Epoch: [88]  [1000/1251]  eta: 0:00:51  lr: 0.003433  min_lr: 0.003433  loss: 3.5311 (3.2873)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6722 (inf)  time: 0.2153  data: 0.0006  max mem: 9147
Epoch: [88]  [1200/1251]  eta: 0:00:10  lr: 0.003431  min_lr: 0.003431  loss: 2.6468 (3.3026)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7765 (inf)  time: 0.1756  data: 0.0012  max mem: 9147
Epoch: [88]  [1250/1251]  eta: 0:00:00  lr: 0.003430  min_lr: 0.003430  loss: 3.2793 (3.2997)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7274 (inf)  time: 0.1393  data: 0.0011  max mem: 9147
Epoch: [88] Total time: 0:04:13 (0.2029 s / it)
Averaged stats: lr: 0.003430  min_lr: 0.003430  loss: 3.2793 (3.2863)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7274 (inf)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.9444 (0.9444)  acc1: 83.2000 (83.2000)  acc5: 94.8000 (94.8000)  time: 5.6442  data: 5.5444  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1219 (1.0963)  acc1: 76.8000 (76.3273)  acc5: 95.2000 (94.8727)  time: 0.7488  data: 0.6535  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4142 (1.3304)  acc1: 68.4000 (71.8095)  acc5: 91.2000 (91.4667)  time: 0.2023  data: 0.1196  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4653 (1.3450)  acc1: 67.2000 (71.6320)  acc5: 88.8000 (91.2800)  time: 0.2022  data: 0.1269  max mem: 9147
Test: Total time: 0:00:10 (0.4073 s / it)
* Acc@1 71.350 Acc@5 91.062 loss 1.345
Accuracy of the model on the 50000 test images: 71.4%
Max accuracy: 71.94%
Epoch: [89]  [   0/1251]  eta: 1:08:45  lr: 0.003430  min_lr: 0.003430  loss: 4.0723 (4.0723)  weight_decay: 0.0500 (0.0500)  time: 3.2975  data: 2.2105  max mem: 9147
Epoch: [89]  [ 200/1251]  eta: 0:03:58  lr: 0.003428  min_lr: 0.003428  loss: 2.8173 (3.1955)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6521 (0.8241)  time: 0.2447  data: 0.0007  max mem: 9147
Epoch: [89]  [ 400/1251]  eta: 0:02:59  lr: 0.003425  min_lr: 0.003425  loss: 2.7498 (3.2623)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6586 (0.7730)  time: 0.1964  data: 0.0006  max mem: 9147
Epoch: [89]  [ 600/1251]  eta: 0:02:16  lr: 0.003423  min_lr: 0.003423  loss: 3.4418 (3.2482)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7037 (0.7660)  time: 0.2197  data: 0.0007  max mem: 9147
Epoch: [89]  [ 800/1251]  eta: 0:01:34  lr: 0.003420  min_lr: 0.003420  loss: 2.7255 (3.2555)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6531 (0.7739)  time: 0.2047  data: 0.0007  max mem: 9147
Epoch: [89]  [1000/1251]  eta: 0:00:52  lr: 0.003418  min_lr: 0.003418  loss: 3.2424 (3.2591)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7508 (0.7832)  time: 0.2004  data: 0.0011  max mem: 9147
Epoch: [89]  [1200/1251]  eta: 0:00:10  lr: 0.003415  min_lr: 0.003415  loss: 3.2186 (3.2622)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7003 (0.7809)  time: 0.1911  data: 0.0006  max mem: 9147
Epoch: [89]  [1250/1251]  eta: 0:00:00  lr: 0.003414  min_lr: 0.003414  loss: 3.1956 (3.2662)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6595 (0.7758)  time: 0.1385  data: 0.0020  max mem: 9147
Epoch: [89] Total time: 0:04:20 (0.2082 s / it)
Averaged stats: lr: 0.003414  min_lr: 0.003414  loss: 3.1956 (3.2852)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6595 (0.7758)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.9063 (0.9063)  acc1: 80.8000 (80.8000)  acc5: 96.4000 (96.4000)  time: 5.7191  data: 5.6387  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1316 (1.1468)  acc1: 75.6000 (76.5455)  acc5: 94.8000 (94.8727)  time: 0.7845  data: 0.7007  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3852 (1.3655)  acc1: 70.4000 (71.6762)  acc5: 90.4000 (91.1238)  time: 0.2187  data: 0.1415  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4877 (1.3917)  acc1: 70.4000 (71.2000)  acc5: 88.4000 (90.7840)  time: 0.2172  data: 0.1415  max mem: 9147
Test: Total time: 0:00:10 (0.4179 s / it)
* Acc@1 71.154 Acc@5 90.680 loss 1.389
Accuracy of the model on the 50000 test images: 71.2%
Max accuracy: 71.94%
Epoch: [90]  [   0/1251]  eta: 0:58:19  lr: 0.003414  min_lr: 0.003414  loss: 4.4431 (4.4431)  weight_decay: 0.0500 (0.0500)  time: 2.7972  data: 2.4006  max mem: 9147
Epoch: [90]  [ 200/1251]  eta: 0:03:48  lr: 0.003412  min_lr: 0.003412  loss: 2.6463 (3.2648)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7048 (0.7239)  time: 0.1812  data: 0.0005  max mem: 9147
Epoch: [90]  [ 400/1251]  eta: 0:03:00  lr: 0.003409  min_lr: 0.003409  loss: 2.8287 (3.2670)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7983 (0.7514)  time: 0.2098  data: 0.0007  max mem: 9147
Epoch: [90]  [ 600/1251]  eta: 0:02:18  lr: 0.003407  min_lr: 0.003407  loss: 2.7751 (3.2477)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7811 (0.7804)  time: 0.2086  data: 0.0007  max mem: 9147
Epoch: [90]  [ 800/1251]  eta: 0:01:36  lr: 0.003404  min_lr: 0.003404  loss: 3.5373 (3.2481)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7301 (0.8000)  time: 0.2250  data: 0.0007  max mem: 9147
Epoch: [90]  [1000/1251]  eta: 0:00:53  lr: 0.003402  min_lr: 0.003402  loss: 2.7808 (3.2350)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8070 (0.7934)  time: 0.2335  data: 0.0021  max mem: 9147
Epoch: [90]  [1200/1251]  eta: 0:00:10  lr: 0.003399  min_lr: 0.003399  loss: 3.1024 (3.2600)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7076 (0.7953)  time: 0.2254  data: 0.0007  max mem: 9147
Epoch: [90]  [1250/1251]  eta: 0:00:00  lr: 0.003398  min_lr: 0.003398  loss: 2.7419 (3.2532)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7746 (0.7962)  time: 0.1443  data: 0.0010  max mem: 9147
Epoch: [90] Total time: 0:04:29 (0.2151 s / it)
Averaged stats: lr: 0.003398  min_lr: 0.003398  loss: 2.7419 (3.2660)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7746 (0.7962)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.8382 (0.8382)  acc1: 82.8000 (82.8000)  acc5: 95.6000 (95.6000)  time: 5.7094  data: 5.6104  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9696 (1.0400)  acc1: 77.2000 (76.9818)  acc5: 94.8000 (94.6182)  time: 0.6827  data: 0.6085  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3343 (1.2690)  acc1: 70.4000 (72.6667)  acc5: 90.8000 (91.2381)  time: 0.1686  data: 0.0972  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4372 (1.2930)  acc1: 70.0000 (71.9840)  acc5: 89.6000 (91.1360)  time: 0.1947  data: 0.1246  max mem: 9147
Test: Total time: 0:00:09 (0.3991 s / it)
* Acc@1 71.720 Acc@5 90.994 loss 1.291
Accuracy of the model on the 50000 test images: 71.7%
Max accuracy: 71.94%
Epoch: [91]  [   0/1251]  eta: 1:02:47  lr: 0.003398  min_lr: 0.003398  loss: 2.6582 (2.6582)  weight_decay: 0.0500 (0.0500)  time: 3.0113  data: 1.5750  max mem: 9147
Epoch: [91]  [ 200/1251]  eta: 0:04:00  lr: 0.003396  min_lr: 0.003396  loss: 2.6402 (3.2636)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7215 (0.7910)  time: 0.2207  data: 0.0007  max mem: 9147
Epoch: [91]  [ 400/1251]  eta: 0:03:02  lr: 0.003393  min_lr: 0.003393  loss: 2.8377 (3.2869)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7475 (0.8182)  time: 0.1900  data: 0.0004  max mem: 9147
Epoch: [91]  [ 600/1251]  eta: 0:02:16  lr: 0.003391  min_lr: 0.003391  loss: 2.7294 (3.2924)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7744 (0.7895)  time: 0.2099  data: 0.0007  max mem: 9147
Epoch: [91]  [ 800/1251]  eta: 0:01:35  lr: 0.003388  min_lr: 0.003388  loss: 2.7262 (3.2803)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6751 (0.7960)  time: 0.2299  data: 0.0006  max mem: 9147
Epoch: [91]  [1000/1251]  eta: 0:00:53  lr: 0.003385  min_lr: 0.003385  loss: 2.7126 (3.2646)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7773 (0.7899)  time: 0.2244  data: 0.0007  max mem: 9147
Epoch: [91]  [1200/1251]  eta: 0:00:10  lr: 0.003383  min_lr: 0.003383  loss: 2.6839 (3.2636)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7485 (0.7931)  time: 0.2302  data: 0.0103  max mem: 9147
Epoch: [91]  [1250/1251]  eta: 0:00:00  lr: 0.003382  min_lr: 0.003382  loss: 2.6841 (3.2612)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8708 (0.7973)  time: 0.1416  data: 0.0017  max mem: 9147
Epoch: [91] Total time: 0:04:25 (0.2124 s / it)
Averaged stats: lr: 0.003382  min_lr: 0.003382  loss: 2.6841 (3.2690)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8708 (0.7973)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.8877 (0.8877)  acc1: 82.0000 (82.0000)  acc5: 94.8000 (94.8000)  time: 5.6295  data: 5.5490  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0998 (1.1264)  acc1: 79.2000 (77.0182)  acc5: 94.8000 (94.4000)  time: 0.7442  data: 0.6510  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3778 (1.3605)  acc1: 69.2000 (72.2286)  acc5: 89.6000 (91.2191)  time: 0.2201  data: 0.1383  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5511 (1.3749)  acc1: 68.8000 (71.7920)  acc5: 88.8000 (90.9760)  time: 0.2151  data: 0.1382  max mem: 9147
Test: Total time: 0:00:10 (0.4158 s / it)
* Acc@1 71.706 Acc@5 90.810 loss 1.371
Accuracy of the model on the 50000 test images: 71.7%
Max accuracy: 71.94%
Epoch: [92]  [   0/1251]  eta: 1:03:37  lr: 0.003382  min_lr: 0.003382  loss: 2.4727 (2.4727)  weight_decay: 0.0500 (0.0500)  time: 3.0515  data: 2.6298  max mem: 9147
Epoch: [92]  [ 200/1251]  eta: 0:03:55  lr: 0.003380  min_lr: 0.003380  loss: 2.9433 (3.3330)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6916 (0.7959)  time: 0.1999  data: 0.0006  max mem: 9147
Epoch: [92]  [ 400/1251]  eta: 0:03:01  lr: 0.003377  min_lr: 0.003377  loss: 2.7564 (3.2984)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7641 (0.7688)  time: 0.2105  data: 0.0007  max mem: 9147
Epoch: [92]  [ 600/1251]  eta: 0:02:18  lr: 0.003374  min_lr: 0.003374  loss: 2.6712 (3.2836)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7268 (0.7872)  time: 0.1900  data: 0.0005  max mem: 9147
Epoch: [92]  [ 800/1251]  eta: 0:01:34  lr: 0.003372  min_lr: 0.003372  loss: 2.7165 (3.2847)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6709 (0.7794)  time: 0.2099  data: 0.0008  max mem: 9147
Epoch: [92]  [1000/1251]  eta: 0:00:52  lr: 0.003369  min_lr: 0.003369  loss: 2.6531 (3.2846)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7228 (0.7949)  time: 0.2301  data: 0.0272  max mem: 9147
Epoch: [92]  [1200/1251]  eta: 0:00:10  lr: 0.003367  min_lr: 0.003367  loss: 2.7695 (3.2843)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7028 (0.7863)  time: 0.1856  data: 0.0005  max mem: 9147
Epoch: [92]  [1250/1251]  eta: 0:00:00  lr: 0.003366  min_lr: 0.003366  loss: 2.8121 (3.2835)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6784 (0.7820)  time: 0.1382  data: 0.0016  max mem: 9147
Epoch: [92] Total time: 0:04:20 (0.2079 s / it)
Averaged stats: lr: 0.003366  min_lr: 0.003366  loss: 2.8121 (3.2576)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6784 (0.7820)
Test:  [ 0/25]  eta: 0:01:27  loss: 0.8683 (0.8683)  acc1: 84.8000 (84.8000)  acc5: 95.2000 (95.2000)  time: 3.5113  data: 3.4130  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 1.1102 (1.1275)  acc1: 75.2000 (76.6545)  acc5: 94.8000 (94.3273)  time: 0.6287  data: 0.5375  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3874 (1.3349)  acc1: 70.4000 (71.8476)  acc5: 91.2000 (91.1048)  time: 0.2873  data: 0.2060  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4183 (1.3590)  acc1: 68.8000 (71.4560)  acc5: 89.6000 (90.8640)  time: 0.2264  data: 0.1492  max mem: 9147
Test: Total time: 0:00:09 (0.3905 s / it)
* Acc@1 71.582 Acc@5 90.712 loss 1.357
Accuracy of the model on the 50000 test images: 71.6%
Max accuracy: 71.94%
Epoch: [93]  [   0/1251]  eta: 1:07:08  lr: 0.003366  min_lr: 0.003366  loss: 2.6456 (2.6456)  weight_decay: 0.0500 (0.0500)  time: 3.2204  data: 2.6004  max mem: 9147
Epoch: [93]  [ 200/1251]  eta: 0:03:58  lr: 0.003363  min_lr: 0.003363  loss: 2.7378 (3.2141)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6075 (0.7434)  time: 0.2243  data: 0.0007  max mem: 9147
Epoch: [93]  [ 400/1251]  eta: 0:03:04  lr: 0.003361  min_lr: 0.003361  loss: 2.7129 (3.2465)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7088 (0.7579)  time: 0.1902  data: 0.0005  max mem: 9147
Epoch: [93]  [ 600/1251]  eta: 0:02:15  lr: 0.003358  min_lr: 0.003358  loss: 2.5521 (3.2383)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9831 (0.8351)  time: 0.1931  data: 0.0006  max mem: 9147
Epoch: [93]  [ 800/1251]  eta: 0:01:33  lr: 0.003355  min_lr: 0.003355  loss: 2.6228 (3.2457)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7246 (0.8424)  time: 0.2156  data: 0.0007  max mem: 9147
Epoch: [93]  [1000/1251]  eta: 0:00:52  lr: 0.003353  min_lr: 0.003353  loss: 2.6643 (3.2437)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8317 (0.8322)  time: 0.1865  data: 0.0011  max mem: 9147
Epoch: [93]  [1200/1251]  eta: 0:00:10  lr: 0.003350  min_lr: 0.003350  loss: 2.6960 (3.2527)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7320 (0.8220)  time: 0.2150  data: 0.0006  max mem: 9147
Epoch: [93]  [1250/1251]  eta: 0:00:00  lr: 0.003350  min_lr: 0.003350  loss: 2.7058 (3.2509)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8063 (0.8205)  time: 0.1429  data: 0.0010  max mem: 9147
Epoch: [93] Total time: 0:04:22 (0.2095 s / it)
Averaged stats: lr: 0.003350  min_lr: 0.003350  loss: 2.7058 (3.2604)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8063 (0.8205)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.8893 (0.8893)  acc1: 81.2000 (81.2000)  acc5: 96.4000 (96.4000)  time: 5.5387  data: 5.4583  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0087 (1.0762)  acc1: 79.2000 (76.6909)  acc5: 95.2000 (94.4000)  time: 0.7533  data: 0.6780  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3387 (1.3103)  acc1: 71.2000 (72.3619)  acc5: 91.2000 (90.9333)  time: 0.2177  data: 0.1445  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4225 (1.3213)  acc1: 68.0000 (71.9840)  acc5: 88.0000 (90.7520)  time: 0.2163  data: 0.1444  max mem: 9147
Test: Total time: 0:00:10 (0.4097 s / it)
* Acc@1 71.782 Acc@5 90.914 loss 1.323
Accuracy of the model on the 50000 test images: 71.8%
Max accuracy: 71.94%
Epoch: [94]  [   0/1251]  eta: 1:02:09  lr: 0.003350  min_lr: 0.003350  loss: 3.7837 (3.7837)  weight_decay: 0.0500 (0.0500)  time: 2.9809  data: 2.2559  max mem: 9147
Epoch: [94]  [ 200/1251]  eta: 0:03:52  lr: 0.003347  min_lr: 0.003347  loss: 2.7812 (3.2877)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7205 (0.7397)  time: 0.2051  data: 0.0007  max mem: 9147
Epoch: [94]  [ 400/1251]  eta: 0:02:58  lr: 0.003344  min_lr: 0.003344  loss: 2.6227 (3.2884)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7185 (0.7818)  time: 0.2055  data: 0.0006  max mem: 9147
Epoch: [94]  [ 600/1251]  eta: 0:02:17  lr: 0.003342  min_lr: 0.003342  loss: 2.8236 (3.2815)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8872 (0.8062)  time: 0.2056  data: 0.0006  max mem: 9147
Epoch: [94]  [ 800/1251]  eta: 0:01:34  lr: 0.003339  min_lr: 0.003339  loss: 2.6655 (3.2584)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7415 (0.7996)  time: 0.1950  data: 0.0005  max mem: 9147
Epoch: [94]  [1000/1251]  eta: 0:00:52  lr: 0.003336  min_lr: 0.003336  loss: 2.8934 (3.2492)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7598 (0.7901)  time: 0.2295  data: 0.0006  max mem: 9147
Epoch: [94]  [1200/1251]  eta: 0:00:10  lr: 0.003334  min_lr: 0.003334  loss: 3.2885 (3.2609)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7445 (0.7826)  time: 0.2047  data: 0.0007  max mem: 9147
Epoch: [94]  [1250/1251]  eta: 0:00:00  lr: 0.003333  min_lr: 0.003333  loss: 2.6735 (3.2661)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8023 (0.7849)  time: 0.1420  data: 0.0016  max mem: 9147
Epoch: [94] Total time: 0:04:23 (0.2106 s / it)
Averaged stats: lr: 0.003333  min_lr: 0.003333  loss: 2.6735 (3.2472)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8023 (0.7849)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.8250 (0.8250)  acc1: 83.2000 (83.2000)  acc5: 96.0000 (96.0000)  time: 5.6803  data: 5.5870  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0100 (1.0421)  acc1: 79.6000 (77.4545)  acc5: 95.6000 (94.4727)  time: 0.6759  data: 0.5845  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3421 (1.2961)  acc1: 68.4000 (72.5905)  acc5: 91.2000 (90.9714)  time: 0.1816  data: 0.1014  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4189 (1.3206)  acc1: 68.4000 (71.9840)  acc5: 88.0000 (90.7040)  time: 0.1867  data: 0.1124  max mem: 9147
Test: Total time: 0:00:09 (0.3967 s / it)
* Acc@1 71.774 Acc@5 90.846 loss 1.319
Accuracy of the model on the 50000 test images: 71.8%
Max accuracy: 71.94%
Epoch: [95]  [   0/1251]  eta: 1:02:57  lr: 0.003333  min_lr: 0.003333  loss: 3.9282 (3.9282)  weight_decay: 0.0500 (0.0500)  time: 3.0193  data: 2.8205  max mem: 9147
Epoch: [95]  [ 200/1251]  eta: 0:03:57  lr: 0.003330  min_lr: 0.003330  loss: 2.9272 (3.3488)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6493 (0.8531)  time: 0.2143  data: 0.0110  max mem: 9147
Epoch: [95]  [ 400/1251]  eta: 0:03:05  lr: 0.003327  min_lr: 0.003327  loss: 2.5953 (3.2809)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7489 (0.8086)  time: 0.1899  data: 0.0005  max mem: 9147
Epoch: [95]  [ 600/1251]  eta: 0:02:17  lr: 0.003325  min_lr: 0.003325  loss: 2.7371 (3.2806)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7980 (0.8069)  time: 0.1959  data: 0.0006  max mem: 9147
Epoch: [95]  [ 800/1251]  eta: 0:01:35  lr: 0.003322  min_lr: 0.003322  loss: 3.4021 (3.2647)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6745 (0.8031)  time: 0.2178  data: 0.0005  max mem: 9147
Epoch: [95]  [1000/1251]  eta: 0:00:52  lr: 0.003319  min_lr: 0.003319  loss: 2.7821 (3.2697)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8750 (0.8026)  time: 0.2055  data: 0.0007  max mem: 9147
Epoch: [95]  [1200/1251]  eta: 0:00:10  lr: 0.003317  min_lr: 0.003317  loss: 2.8002 (3.2619)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6522 (0.8041)  time: 0.2202  data: 0.0147  max mem: 9147
Epoch: [95]  [1250/1251]  eta: 0:00:00  lr: 0.003316  min_lr: 0.003316  loss: 2.7653 (3.2536)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6468 (0.7985)  time: 0.1478  data: 0.0011  max mem: 9147
Epoch: [95] Total time: 0:04:24 (0.2117 s / it)
Averaged stats: lr: 0.003316  min_lr: 0.003316  loss: 2.7653 (3.2548)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6468 (0.7985)
Test:  [ 0/25]  eta: 0:02:12  loss: 0.8488 (0.8488)  acc1: 80.8000 (80.8000)  acc5: 95.6000 (95.6000)  time: 5.2803  data: 5.1930  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0275 (1.0573)  acc1: 78.8000 (77.2000)  acc5: 96.0000 (95.0545)  time: 0.7697  data: 0.6739  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3516 (1.2770)  acc1: 70.0000 (72.7619)  acc5: 90.4000 (91.5238)  time: 0.2245  data: 0.1412  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4026 (1.2978)  acc1: 70.0000 (72.2880)  acc5: 88.0000 (91.2160)  time: 0.2267  data: 0.1490  max mem: 9147
Test: Total time: 0:00:10 (0.4112 s / it)
* Acc@1 72.102 Acc@5 91.150 loss 1.296
Accuracy of the model on the 50000 test images: 72.1%
Max accuracy: 72.10%
Epoch: [96]  [   0/1251]  eta: 0:55:34  lr: 0.003316  min_lr: 0.003316  loss: 2.6549 (2.6549)  weight_decay: 0.0500 (0.0500)  time: 2.6652  data: 1.8392  max mem: 9147
Epoch: [96]  [ 200/1251]  eta: 0:03:59  lr: 0.003313  min_lr: 0.003313  loss: 3.1985 (3.2372)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6950 (0.7862)  time: 0.2291  data: 0.0007  max mem: 9147
Epoch: [96]  [ 400/1251]  eta: 0:03:07  lr: 0.003311  min_lr: 0.003311  loss: 3.4166 (3.2738)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6713 (0.7907)  time: 0.2050  data: 0.0007  max mem: 9147
Epoch: [96]  [ 600/1251]  eta: 0:02:21  lr: 0.003308  min_lr: 0.003308  loss: 2.9804 (3.2700)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8249 (0.8219)  time: 0.2005  data: 0.0006  max mem: 9147
Epoch: [96]  [ 800/1251]  eta: 0:01:37  lr: 0.003305  min_lr: 0.003305  loss: 2.6474 (3.2765)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9185 (0.8243)  time: 0.2210  data: 0.0005  max mem: 9147
Epoch: [96]  [1000/1251]  eta: 0:00:53  lr: 0.003302  min_lr: 0.003302  loss: 2.6877 (3.2669)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7078 (0.8129)  time: 0.2193  data: 0.0009  max mem: 9147
Epoch: [96]  [1200/1251]  eta: 0:00:10  lr: 0.003300  min_lr: 0.003300  loss: 2.7672 (3.2735)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7045 (0.8022)  time: 0.1997  data: 0.0006  max mem: 9147
Epoch: [96]  [1250/1251]  eta: 0:00:00  lr: 0.003299  min_lr: 0.003299  loss: 2.8945 (3.2731)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8068 (0.8064)  time: 0.1390  data: 0.0008  max mem: 9147
Epoch: [96] Total time: 0:04:27 (0.2136 s / it)
Averaged stats: lr: 0.003299  min_lr: 0.003299  loss: 2.8945 (3.2623)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8068 (0.8064)
Test:  [ 0/25]  eta: 0:01:24  loss: 0.8209 (0.8209)  acc1: 84.0000 (84.0000)  acc5: 96.0000 (96.0000)  time: 3.3904  data: 3.2967  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 1.0727 (1.1055)  acc1: 76.8000 (77.0182)  acc5: 94.8000 (94.4727)  time: 0.6196  data: 0.5290  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3969 (1.3291)  acc1: 69.2000 (72.0381)  acc5: 90.0000 (91.0476)  time: 0.2793  data: 0.1990  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4760 (1.3415)  acc1: 69.2000 (71.7920)  acc5: 89.2000 (90.9440)  time: 0.2346  data: 0.1554  max mem: 9147
Test: Total time: 0:00:10 (0.4124 s / it)
* Acc@1 71.846 Acc@5 90.988 loss 1.335
Accuracy of the model on the 50000 test images: 71.8%
Max accuracy: 72.10%
Epoch: [97]  [   0/1251]  eta: 1:06:12  lr: 0.003299  min_lr: 0.003299  loss: 2.4747 (2.4747)  weight_decay: 0.0500 (0.0500)  time: 3.1757  data: 2.7320  max mem: 9147
Epoch: [97]  [ 200/1251]  eta: 0:03:33  lr: 0.003296  min_lr: 0.003296  loss: 3.1914 (3.2012)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6741 (0.7571)  time: 0.1930  data: 0.0005  max mem: 9147
Epoch: [97]  [ 400/1251]  eta: 0:02:47  lr: 0.003294  min_lr: 0.003294  loss: 3.0404 (3.1863)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7631 (0.7967)  time: 0.1933  data: 0.0006  max mem: 9147
Epoch: [97]  [ 600/1251]  eta: 0:02:11  lr: 0.003291  min_lr: 0.003291  loss: 3.3342 (3.1974)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8259 (0.8228)  time: 0.2241  data: 0.0007  max mem: 9147
Epoch: [97]  [ 800/1251]  eta: 0:01:30  lr: 0.003288  min_lr: 0.003288  loss: 2.6646 (3.2163)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6467 (0.8130)  time: 0.2052  data: 0.0008  max mem: 9147
Epoch: [97]  [1000/1251]  eta: 0:00:51  lr: 0.003285  min_lr: 0.003285  loss: 2.7519 (3.2421)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9180 (0.8203)  time: 0.2198  data: 0.0007  max mem: 9147
Epoch: [97]  [1200/1251]  eta: 0:00:10  lr: 0.003283  min_lr: 0.003283  loss: 3.8884 (3.2536)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6739 (0.8092)  time: 0.1833  data: 0.0008  max mem: 9147
Epoch: [97]  [1250/1251]  eta: 0:00:00  lr: 0.003282  min_lr: 0.003282  loss: 2.9325 (3.2546)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6881 (0.8067)  time: 0.1408  data: 0.0013  max mem: 9147
Epoch: [97] Total time: 0:04:14 (0.2032 s / it)
Averaged stats: lr: 0.003282  min_lr: 0.003282  loss: 2.9325 (3.2570)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6881 (0.8067)
Test:  [ 0/25]  eta: 0:02:26  loss: 0.9853 (0.9853)  acc1: 82.4000 (82.4000)  acc5: 95.6000 (95.6000)  time: 5.8452  data: 5.7648  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1500 (1.1733)  acc1: 77.6000 (76.5091)  acc5: 94.8000 (94.2909)  time: 0.7576  data: 0.6830  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4933 (1.3798)  acc1: 68.0000 (71.9238)  acc5: 91.2000 (91.1810)  time: 0.2109  data: 0.1393  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5520 (1.3876)  acc1: 67.2000 (71.6160)  acc5: 88.4000 (90.9920)  time: 0.2096  data: 0.1392  max mem: 9147
Test: Total time: 0:00:10 (0.4165 s / it)
* Acc@1 71.730 Acc@5 90.956 loss 1.380
Accuracy of the model on the 50000 test images: 71.7%
Max accuracy: 72.10%
Epoch: [98]  [   0/1251]  eta: 1:05:21  lr: 0.003282  min_lr: 0.003282  loss: 2.3143 (2.3143)  weight_decay: 0.0500 (0.0500)  time: 3.1349  data: 1.6672  max mem: 9147
Epoch: [98]  [ 200/1251]  eta: 0:03:51  lr: 0.003279  min_lr: 0.003279  loss: 2.8359 (3.1949)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7485 (0.7942)  time: 0.1954  data: 0.0006  max mem: 9147
Epoch: [98]  [ 400/1251]  eta: 0:03:05  lr: 0.003276  min_lr: 0.003276  loss: 2.8137 (3.1693)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6909 (0.7934)  time: 0.2132  data: 0.0006  max mem: 9147
Epoch: [98]  [ 600/1251]  eta: 0:02:15  lr: 0.003274  min_lr: 0.003274  loss: 2.8262 (3.2226)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7128 (0.7923)  time: 0.1942  data: 0.0009  max mem: 9147
Epoch: [98]  [ 800/1251]  eta: 0:01:32  lr: 0.003271  min_lr: 0.003271  loss: 2.7736 (3.2194)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7073 (0.7918)  time: 0.1951  data: 0.0015  max mem: 9147
Epoch: [98]  [1000/1251]  eta: 0:00:50  lr: 0.003268  min_lr: 0.003268  loss: 2.6712 (3.2195)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7144 (0.7980)  time: 0.2056  data: 0.0008  max mem: 9147
Epoch: [98]  [1200/1251]  eta: 0:00:10  lr: 0.003265  min_lr: 0.003265  loss: 2.7878 (3.2401)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7648 (0.8098)  time: 0.1998  data: 0.0006  max mem: 9147
Epoch: [98]  [1250/1251]  eta: 0:00:00  lr: 0.003265  min_lr: 0.003265  loss: 2.7079 (3.2398)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8601 (0.8145)  time: 0.1379  data: 0.0010  max mem: 9147
Epoch: [98] Total time: 0:04:15 (0.2043 s / it)
Averaged stats: lr: 0.003265  min_lr: 0.003265  loss: 2.7079 (3.2493)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8601 (0.8145)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.8718 (0.8718)  acc1: 84.0000 (84.0000)  acc5: 95.2000 (95.2000)  time: 5.6927  data: 5.6092  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0080 (1.0592)  acc1: 77.6000 (77.2364)  acc5: 95.6000 (94.5818)  time: 0.7543  data: 0.6771  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3537 (1.3161)  acc1: 68.8000 (72.4000)  acc5: 90.0000 (91.0667)  time: 0.2125  data: 0.1390  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4938 (1.3246)  acc1: 68.8000 (71.9200)  acc5: 89.6000 (91.0720)  time: 0.2086  data: 0.1381  max mem: 9147
Test: Total time: 0:00:10 (0.4129 s / it)
* Acc@1 71.920 Acc@5 90.964 loss 1.313
Accuracy of the model on the 50000 test images: 71.9%
Max accuracy: 72.10%
Epoch: [99]  [   0/1251]  eta: 1:04:26  lr: 0.003265  min_lr: 0.003265  loss: 2.3185 (2.3185)  weight_decay: 0.0500 (0.0500)  time: 3.0910  data: 2.6664  max mem: 9147
Epoch: [99]  [ 200/1251]  eta: 0:03:56  lr: 0.003262  min_lr: 0.003262  loss: 2.6958 (3.2937)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7199 (0.7747)  time: 0.1963  data: 0.0007  max mem: 9147
Epoch: [99]  [ 400/1251]  eta: 0:03:04  lr: 0.003259  min_lr: 0.003259  loss: 3.2263 (3.2975)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7050 (0.7675)  time: 0.1733  data: 0.0019  max mem: 9147
Epoch: [99]  [ 600/1251]  eta: 0:02:18  lr: 0.003256  min_lr: 0.003256  loss: 2.7714 (3.2835)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7437 (0.7672)  time: 0.2007  data: 0.0006  max mem: 9147
Epoch: [99]  [ 800/1251]  eta: 0:01:36  lr: 0.003253  min_lr: 0.003253  loss: 2.4798 (3.2449)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7056 (0.7779)  time: 0.2148  data: 0.0126  max mem: 9147
Epoch: [99]  [1000/1251]  eta: 0:00:53  lr: 0.003251  min_lr: 0.003251  loss: 2.9088 (3.2435)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7294 (0.7809)  time: 0.2142  data: 0.0008  max mem: 9147
Epoch: [99]  [1200/1251]  eta: 0:00:10  lr: 0.003248  min_lr: 0.003248  loss: 2.6164 (3.2426)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7809 (0.7934)  time: 0.2241  data: 0.0010  max mem: 9147
Epoch: [99]  [1250/1251]  eta: 0:00:00  lr: 0.003247  min_lr: 0.003247  loss: 2.8679 (3.2425)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8620 (0.7950)  time: 0.1414  data: 0.0012  max mem: 9147
Epoch: [99] Total time: 0:04:27 (0.2140 s / it)
Averaged stats: lr: 0.003247  min_lr: 0.003247  loss: 2.8679 (3.2419)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8620 (0.7950)
Test:  [ 0/25]  eta: 0:01:21  loss: 0.8921 (0.8921)  acc1: 84.4000 (84.4000)  acc5: 96.8000 (96.8000)  time: 3.2568  data: 3.1765  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 1.0451 (1.0972)  acc1: 80.0000 (77.8182)  acc5: 95.6000 (94.2909)  time: 0.5688  data: 0.4737  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4189 (1.3115)  acc1: 69.6000 (72.9714)  acc5: 89.6000 (90.8571)  time: 0.2667  data: 0.1823  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4220 (1.3233)  acc1: 70.4000 (72.5920)  acc5: 88.4000 (90.6560)  time: 0.2047  data: 0.1264  max mem: 9147
Test: Total time: 0:00:09 (0.3875 s / it)
* Acc@1 72.264 Acc@5 91.108 loss 1.316
Accuracy of the model on the 50000 test images: 72.3%
Max accuracy: 72.26%
Epoch: [100]  [   0/1251]  eta: 1:00:08  lr: 0.003247  min_lr: 0.003247  loss: 4.1512 (4.1512)  weight_decay: 0.0500 (0.0500)  time: 2.8844  data: 2.6548  max mem: 9147
Epoch: [100]  [ 200/1251]  eta: 0:03:56  lr: 0.003244  min_lr: 0.003244  loss: 2.7763 (3.1829)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7037 (0.7435)  time: 0.2347  data: 0.0006  max mem: 9147
Epoch: [100]  [ 400/1251]  eta: 0:03:06  lr: 0.003242  min_lr: 0.003242  loss: 2.6204 (3.1960)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8660 (0.8157)  time: 0.2150  data: 0.0013  max mem: 9147
Epoch: [100]  [ 600/1251]  eta: 0:02:20  lr: 0.003239  min_lr: 0.003239  loss: 2.6787 (3.1939)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7934 (0.8222)  time: 0.2052  data: 0.0006  max mem: 9147
Epoch: [100]  [ 800/1251]  eta: 0:01:35  lr: 0.003236  min_lr: 0.003236  loss: 3.1546 (3.1796)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7606 (0.8155)  time: 0.1999  data: 0.0006  max mem: 9147
Epoch: [100]  [1000/1251]  eta: 0:00:53  lr: 0.003233  min_lr: 0.003233  loss: 2.6824 (3.1957)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8076 (0.8181)  time: 0.2146  data: 0.0007  max mem: 9147
Epoch: [100]  [1200/1251]  eta: 0:00:10  lr: 0.003230  min_lr: 0.003230  loss: 2.7059 (3.1889)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7619 (0.8166)  time: 0.2150  data: 0.0009  max mem: 9147
Epoch: [100]  [1250/1251]  eta: 0:00:00  lr: 0.003230  min_lr: 0.003230  loss: 2.9550 (3.1946)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7826 (0.8193)  time: 0.1438  data: 0.0011  max mem: 9147
Epoch: [100] Total time: 0:04:25 (0.2120 s / it)
Averaged stats: lr: 0.003230  min_lr: 0.003230  loss: 2.9550 (3.2276)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7826 (0.8193)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.8711 (0.8711)  acc1: 85.2000 (85.2000)  acc5: 96.0000 (96.0000)  time: 5.5349  data: 5.4477  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0052 (1.0833)  acc1: 79.6000 (77.7818)  acc5: 95.6000 (94.6546)  time: 0.7553  data: 0.6588  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3248 (1.2945)  acc1: 71.2000 (73.0857)  acc5: 91.2000 (91.4286)  time: 0.2064  data: 0.1210  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3666 (1.3107)  acc1: 70.4000 (72.6240)  acc5: 89.2000 (91.2640)  time: 0.2004  data: 0.1209  max mem: 9147
Test: Total time: 0:00:10 (0.4034 s / it)
* Acc@1 72.312 Acc@5 91.268 loss 1.312
Accuracy of the model on the 50000 test images: 72.3%
Max accuracy: 72.31%
Epoch: [101]  [   0/1251]  eta: 1:01:20  lr: 0.003230  min_lr: 0.003230  loss: 2.2702 (2.2702)  weight_decay: 0.0500 (0.0500)  time: 2.9423  data: 2.7275  max mem: 9147
Epoch: [101]  [ 200/1251]  eta: 0:03:42  lr: 0.003227  min_lr: 0.003227  loss: 2.7957 (3.1643)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8145 (0.7905)  time: 0.2054  data: 0.0008  max mem: 9147
Epoch: [101]  [ 400/1251]  eta: 0:02:58  lr: 0.003224  min_lr: 0.003224  loss: 3.1373 (3.2002)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8118 (0.8284)  time: 0.1946  data: 0.0006  max mem: 9147
Epoch: [101]  [ 600/1251]  eta: 0:02:17  lr: 0.003221  min_lr: 0.003221  loss: 2.6471 (3.1735)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7486 (0.8365)  time: 0.2152  data: 0.0006  max mem: 9147
Epoch: [101]  [ 800/1251]  eta: 0:01:34  lr: 0.003218  min_lr: 0.003218  loss: 3.0242 (3.2017)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7274 (0.8255)  time: 0.2338  data: 0.0044  max mem: 9147
Epoch: [101]  [1000/1251]  eta: 0:00:53  lr: 0.003215  min_lr: 0.003215  loss: 2.6782 (3.2138)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7431 (0.8238)  time: 0.2045  data: 0.0006  max mem: 9147
Epoch: [101]  [1200/1251]  eta: 0:00:10  lr: 0.003212  min_lr: 0.003212  loss: 2.7108 (3.2212)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6984 (0.8183)  time: 0.2246  data: 0.0007  max mem: 9147
Epoch: [101]  [1250/1251]  eta: 0:00:00  lr: 0.003212  min_lr: 0.003212  loss: 2.8491 (3.2346)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7269 (0.8192)  time: 0.1409  data: 0.0009  max mem: 9147
Epoch: [101] Total time: 0:04:25 (0.2120 s / it)
Averaged stats: lr: 0.003212  min_lr: 0.003212  loss: 2.8491 (3.2441)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7269 (0.8192)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.8672 (0.8672)  acc1: 84.0000 (84.0000)  acc5: 96.8000 (96.8000)  time: 5.6441  data: 5.5637  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0780 (1.1183)  acc1: 78.0000 (77.1636)  acc5: 95.6000 (94.4364)  time: 0.7418  data: 0.6591  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4473 (1.3627)  acc1: 70.8000 (72.2476)  acc5: 90.0000 (90.8571)  time: 0.2145  data: 0.1380  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5414 (1.3747)  acc1: 69.6000 (71.9520)  acc5: 88.0000 (90.6080)  time: 0.2116  data: 0.1379  max mem: 9147
Test: Total time: 0:00:10 (0.4120 s / it)
* Acc@1 71.862 Acc@5 90.800 loss 1.366
Accuracy of the model on the 50000 test images: 71.9%
Max accuracy: 72.31%
Epoch: [102]  [   0/1251]  eta: 1:09:02  lr: 0.003212  min_lr: 0.003212  loss: 3.0506 (3.0506)  weight_decay: 0.0500 (0.0500)  time: 3.3110  data: 2.7503  max mem: 9147
Epoch: [102]  [ 200/1251]  eta: 0:03:58  lr: 0.003209  min_lr: 0.003209  loss: 2.6811 (3.2292)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7980 (0.8669)  time: 0.2014  data: 0.0007  max mem: 9147
Epoch: [102]  [ 400/1251]  eta: 0:03:08  lr: 0.003206  min_lr: 0.003206  loss: 3.2894 (3.2582)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7497 (0.8839)  time: 0.2249  data: 0.0007  max mem: 9147
Epoch: [102]  [ 600/1251]  eta: 0:02:19  lr: 0.003203  min_lr: 0.003203  loss: 3.3500 (3.2476)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6476 (0.8425)  time: 0.1915  data: 0.0006  max mem: 9147
Epoch: [102]  [ 800/1251]  eta: 0:01:34  lr: 0.003200  min_lr: 0.003200  loss: 3.6912 (3.2574)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7291 (0.8246)  time: 0.1893  data: 0.0010  max mem: 9147
Epoch: [102]  [1000/1251]  eta: 0:00:51  lr: 0.003197  min_lr: 0.003197  loss: 2.9745 (3.2606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8051 (0.8151)  time: 0.1838  data: 0.0006  max mem: 9147
Epoch: [102]  [1200/1251]  eta: 0:00:10  lr: 0.003195  min_lr: 0.003195  loss: 2.6405 (3.2619)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7238 (0.8023)  time: 0.2236  data: 0.0007  max mem: 9147
Epoch: [102]  [1250/1251]  eta: 0:00:00  lr: 0.003194  min_lr: 0.003194  loss: 2.7301 (3.2661)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6601 (0.7989)  time: 0.1393  data: 0.0009  max mem: 9147
Epoch: [102] Total time: 0:04:18 (0.2065 s / it)
Averaged stats: lr: 0.003194  min_lr: 0.003194  loss: 2.7301 (3.2394)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6601 (0.7989)
Test:  [ 0/25]  eta: 0:02:15  loss: 0.9105 (0.9105)  acc1: 83.2000 (83.2000)  acc5: 98.0000 (98.0000)  time: 5.4286  data: 5.3420  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0866 (1.1224)  acc1: 77.6000 (76.6182)  acc5: 95.2000 (94.0364)  time: 0.7123  data: 0.6315  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3919 (1.3316)  acc1: 70.0000 (72.0571)  acc5: 89.2000 (90.9714)  time: 0.1981  data: 0.1225  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4673 (1.3539)  acc1: 68.0000 (71.6640)  acc5: 88.8000 (90.6880)  time: 0.2028  data: 0.1313  max mem: 9147
Test: Total time: 0:00:10 (0.4004 s / it)
* Acc@1 71.786 Acc@5 90.970 loss 1.345
Accuracy of the model on the 50000 test images: 71.8%
Max accuracy: 72.31%
Epoch: [103]  [   0/1251]  eta: 1:07:20  lr: 0.003194  min_lr: 0.003194  loss: 4.1619 (4.1619)  weight_decay: 0.0500 (0.0500)  time: 3.2299  data: 2.8571  max mem: 9147
Epoch: [103]  [ 200/1251]  eta: 0:03:55  lr: 0.003191  min_lr: 0.003191  loss: 2.9673 (3.2068)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8468 (0.9182)  time: 0.2205  data: 0.0111  max mem: 9147
Epoch: [103]  [ 400/1251]  eta: 0:03:05  lr: 0.003188  min_lr: 0.003188  loss: 2.5884 (3.1935)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7567 (0.8736)  time: 0.1924  data: 0.0006  max mem: 9147
Epoch: [103]  [ 600/1251]  eta: 0:02:16  lr: 0.003185  min_lr: 0.003185  loss: 2.8881 (3.2397)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7363 (0.8331)  time: 0.1907  data: 0.0018  max mem: 9147
Epoch: [103]  [ 800/1251]  eta: 0:01:32  lr: 0.003182  min_lr: 0.003182  loss: 3.0375 (3.2220)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7583 (0.8171)  time: 0.1842  data: 0.0004  max mem: 9147
Epoch: [103]  [1000/1251]  eta: 0:00:51  lr: 0.003179  min_lr: 0.003179  loss: 3.2796 (3.2101)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7233 (inf)  time: 0.2299  data: 0.0260  max mem: 9147
Epoch: [103]  [1200/1251]  eta: 0:00:10  lr: 0.003176  min_lr: 0.003176  loss: 2.8880 (3.2107)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7562 (inf)  time: 0.1946  data: 0.0012  max mem: 9147
Epoch: [103]  [1250/1251]  eta: 0:00:00  lr: 0.003176  min_lr: 0.003176  loss: 2.8995 (3.2122)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6702 (inf)  time: 0.1387  data: 0.0010  max mem: 9147
Epoch: [103] Total time: 0:04:19 (0.2077 s / it)
Averaged stats: lr: 0.003176  min_lr: 0.003176  loss: 2.8995 (3.2411)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6702 (inf)
Test:  [ 0/25]  eta: 0:02:14  loss: 0.8946 (0.8946)  acc1: 83.6000 (83.6000)  acc5: 97.2000 (97.2000)  time: 5.3743  data: 5.2863  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0825 (1.1091)  acc1: 80.8000 (77.5273)  acc5: 95.2000 (94.5818)  time: 0.7515  data: 0.6550  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3206 (1.3124)  acc1: 70.4000 (72.8381)  acc5: 90.4000 (91.5810)  time: 0.2133  data: 0.1296  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4303 (1.3227)  acc1: 70.0000 (72.3680)  acc5: 89.6000 (91.4080)  time: 0.2072  data: 0.1295  max mem: 9147
Test: Total time: 0:00:10 (0.4020 s / it)
* Acc@1 72.214 Acc@5 91.316 loss 1.326
Accuracy of the model on the 50000 test images: 72.2%
Max accuracy: 72.31%
Epoch: [104]  [   0/1251]  eta: 1:04:40  lr: 0.003176  min_lr: 0.003176  loss: 2.3940 (2.3940)  weight_decay: 0.0500 (0.0500)  time: 3.1022  data: 1.6115  max mem: 9147
Epoch: [104]  [ 200/1251]  eta: 0:03:52  lr: 0.003173  min_lr: 0.003173  loss: 3.1931 (3.2037)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7321 (0.8201)  time: 0.2004  data: 0.0007  max mem: 9147
Epoch: [104]  [ 400/1251]  eta: 0:03:03  lr: 0.003170  min_lr: 0.003170  loss: 2.8956 (3.2295)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8052 (0.8351)  time: 0.1939  data: 0.0005  max mem: 9147
Epoch: [104]  [ 600/1251]  eta: 0:02:15  lr: 0.003167  min_lr: 0.003167  loss: 2.6577 (3.2186)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6722 (0.8082)  time: 0.1904  data: 0.0005  max mem: 9147
Epoch: [104]  [ 800/1251]  eta: 0:01:32  lr: 0.003164  min_lr: 0.003164  loss: 2.5761 (3.2258)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7658 (0.7979)  time: 0.1950  data: 0.0006  max mem: 9147
Epoch: [104]  [1000/1251]  eta: 0:00:51  lr: 0.003161  min_lr: 0.003161  loss: 2.7939 (3.2382)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8087 (0.8069)  time: 0.2151  data: 0.0007  max mem: 9147
Epoch: [104]  [1200/1251]  eta: 0:00:10  lr: 0.003158  min_lr: 0.003158  loss: 2.6649 (3.2267)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7468 (0.8053)  time: 0.2061  data: 0.0006  max mem: 9147
Epoch: [104]  [1250/1251]  eta: 0:00:00  lr: 0.003158  min_lr: 0.003158  loss: 2.5581 (3.2208)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6581 (0.8025)  time: 0.1470  data: 0.0015  max mem: 9147
Epoch: [104] Total time: 0:04:16 (0.2054 s / it)
Averaged stats: lr: 0.003158  min_lr: 0.003158  loss: 2.5581 (3.2210)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6581 (0.8025)
Test:  [ 0/25]  eta: 0:02:13  loss: 0.8054 (0.8054)  acc1: 84.0000 (84.0000)  acc5: 96.0000 (96.0000)  time: 5.3491  data: 5.2138  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0535 (1.0678)  acc1: 76.8000 (76.8727)  acc5: 95.6000 (94.4727)  time: 0.7578  data: 0.6589  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3214 (1.3037)  acc1: 69.2000 (72.4191)  acc5: 90.8000 (91.5619)  time: 0.2288  data: 0.1464  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4298 (1.3197)  acc1: 68.0000 (71.9200)  acc5: 89.6000 (91.4080)  time: 0.2227  data: 0.1463  max mem: 9147
Test: Total time: 0:00:10 (0.4112 s / it)
* Acc@1 72.270 Acc@5 91.240 loss 1.313
Accuracy of the model on the 50000 test images: 72.3%
Max accuracy: 72.31%
Epoch: [105]  [   0/1251]  eta: 1:06:40  lr: 0.003158  min_lr: 0.003158  loss: 3.2220 (3.2220)  weight_decay: 0.0500 (0.0500)  time: 3.1982  data: 1.6868  max mem: 9147
Epoch: [105]  [ 200/1251]  eta: 0:03:55  lr: 0.003155  min_lr: 0.003155  loss: 2.8003 (3.2476)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7060 (0.8090)  time: 0.2144  data: 0.0006  max mem: 9147
Epoch: [105]  [ 400/1251]  eta: 0:03:05  lr: 0.003152  min_lr: 0.003152  loss: 2.7130 (3.2217)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7644 (0.8004)  time: 0.2056  data: 0.0007  max mem: 9147
Epoch: [105]  [ 600/1251]  eta: 0:02:22  lr: 0.003149  min_lr: 0.003149  loss: 2.6848 (3.2306)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8131 (0.8266)  time: 0.2159  data: 0.0006  max mem: 9147
Epoch: [105]  [ 800/1251]  eta: 0:01:36  lr: 0.003146  min_lr: 0.003146  loss: 2.6289 (3.2584)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7504 (0.8023)  time: 0.1954  data: 0.0006  max mem: 9147
Epoch: [105]  [1000/1251]  eta: 0:00:53  lr: 0.003143  min_lr: 0.003143  loss: 3.2521 (3.2532)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7789 (0.8081)  time: 0.1949  data: 0.0005  max mem: 9147
Epoch: [105]  [1200/1251]  eta: 0:00:10  lr: 0.003140  min_lr: 0.003140  loss: 3.8705 (3.2523)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7525 (0.8003)  time: 0.1970  data: 0.0006  max mem: 9147
Epoch: [105]  [1250/1251]  eta: 0:00:00  lr: 0.003139  min_lr: 0.003139  loss: 3.8832 (3.2543)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6681 (0.7971)  time: 0.1425  data: 0.0011  max mem: 9147
Epoch: [105] Total time: 0:04:21 (0.2093 s / it)
Averaged stats: lr: 0.003139  min_lr: 0.003139  loss: 3.8832 (3.2308)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6681 (0.7971)
Test:  [ 0/25]  eta: 0:02:27  loss: 0.9832 (0.9832)  acc1: 81.2000 (81.2000)  acc5: 94.4000 (94.4000)  time: 5.8942  data: 5.8137  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1389 (1.1552)  acc1: 77.6000 (77.0182)  acc5: 94.8000 (94.4000)  time: 0.7458  data: 0.6725  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4240 (1.3571)  acc1: 68.8000 (72.7429)  acc5: 91.2000 (91.4095)  time: 0.2067  data: 0.1358  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5140 (1.3701)  acc1: 68.8000 (72.4480)  acc5: 89.2000 (91.3120)  time: 0.2055  data: 0.1358  max mem: 9147
Test: Total time: 0:00:10 (0.4155 s / it)
* Acc@1 72.540 Acc@5 91.278 loss 1.356
Accuracy of the model on the 50000 test images: 72.5%
Max accuracy: 72.54%
Epoch: [106]  [   0/1251]  eta: 0:59:34  lr: 0.003139  min_lr: 0.003139  loss: 2.4883 (2.4883)  weight_decay: 0.0500 (0.0500)  time: 2.8575  data: 1.9654  max mem: 9147
Epoch: [106]  [ 200/1251]  eta: 0:03:50  lr: 0.003136  min_lr: 0.003136  loss: 2.7149 (3.1368)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9151 (0.8216)  time: 0.1885  data: 0.0006  max mem: 9147
Epoch: [106]  [ 400/1251]  eta: 0:02:56  lr: 0.003133  min_lr: 0.003133  loss: 3.1458 (3.1735)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7936 (0.8476)  time: 0.2024  data: 0.0008  max mem: 9147
Epoch: [106]  [ 600/1251]  eta: 0:02:15  lr: 0.003130  min_lr: 0.003130  loss: 2.8808 (3.1703)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7359 (0.8253)  time: 0.2007  data: 0.0007  max mem: 9147
Epoch: [106]  [ 800/1251]  eta: 0:01:34  lr: 0.003127  min_lr: 0.003127  loss: 3.0812 (3.2003)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7755 (0.8262)  time: 0.2004  data: 0.0006  max mem: 9147
Epoch: [106]  [1000/1251]  eta: 0:00:52  lr: 0.003124  min_lr: 0.003124  loss: 2.7832 (3.2139)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9289 (0.8258)  time: 0.2197  data: 0.0009  max mem: 9147
Epoch: [106]  [1200/1251]  eta: 0:00:10  lr: 0.003121  min_lr: 0.003121  loss: 3.6489 (3.2390)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7707 (0.8288)  time: 0.1905  data: 0.0010  max mem: 9147
Epoch: [106]  [1250/1251]  eta: 0:00:00  lr: 0.003121  min_lr: 0.003121  loss: 2.8055 (3.2303)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6994 (0.8230)  time: 0.1392  data: 0.0011  max mem: 9147
Epoch: [106] Total time: 0:04:22 (0.2100 s / it)
Averaged stats: lr: 0.003121  min_lr: 0.003121  loss: 2.8055 (3.2292)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6994 (0.8230)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.8189 (0.8189)  acc1: 83.2000 (83.2000)  acc5: 94.4000 (94.4000)  time: 5.6400  data: 5.5372  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0494 (1.0652)  acc1: 78.0000 (77.3455)  acc5: 94.4000 (94.2909)  time: 0.7021  data: 0.6252  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3384 (1.2930)  acc1: 71.2000 (72.7429)  acc5: 90.4000 (90.9714)  time: 0.1939  data: 0.1208  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4029 (1.3016)  acc1: 68.8000 (72.4480)  acc5: 88.8000 (90.9440)  time: 0.1970  data: 0.1249  max mem: 9147
Test: Total time: 0:00:09 (0.3997 s / it)
* Acc@1 72.206 Acc@5 91.226 loss 1.296
Accuracy of the model on the 50000 test images: 72.2%
Max accuracy: 72.54%
Epoch: [107]  [   0/1251]  eta: 1:00:17  lr: 0.003121  min_lr: 0.003121  loss: 2.4367 (2.4367)  weight_decay: 0.0500 (0.0500)  time: 2.8915  data: 2.2179  max mem: 9147
Epoch: [107]  [ 200/1251]  eta: 0:03:53  lr: 0.003118  min_lr: 0.003118  loss: 2.7351 (3.2991)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6367 (0.7482)  time: 0.2007  data: 0.0008  max mem: 9147
Epoch: [107]  [ 400/1251]  eta: 0:03:01  lr: 0.003115  min_lr: 0.003115  loss: 3.3909 (3.2650)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8292 (0.8310)  time: 0.1907  data: 0.0005  max mem: 9147
Epoch: [107]  [ 600/1251]  eta: 0:02:14  lr: 0.003112  min_lr: 0.003112  loss: 3.3104 (3.2458)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.1947  data: 0.0007  max mem: 9147
Epoch: [107]  [ 800/1251]  eta: 0:01:32  lr: 0.003109  min_lr: 0.003109  loss: 3.1217 (3.2379)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6632 (nan)  time: 0.2019  data: 0.0007  max mem: 9147
Epoch: [107]  [1000/1251]  eta: 0:00:52  lr: 0.003106  min_lr: 0.003106  loss: 2.6339 (3.2292)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7731 (nan)  time: 0.2000  data: 0.0007  max mem: 9147
Epoch: [107]  [1200/1251]  eta: 0:00:10  lr: 0.003103  min_lr: 0.003103  loss: 2.7277 (3.2149)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6937 (nan)  time: 0.1956  data: 0.0006  max mem: 9147
Epoch: [107]  [1250/1251]  eta: 0:00:00  lr: 0.003102  min_lr: 0.003102  loss: 2.7658 (3.2180)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7538 (nan)  time: 0.1381  data: 0.0010  max mem: 9147
Epoch: [107] Total time: 0:04:16 (0.2054 s / it)
Averaged stats: lr: 0.003102  min_lr: 0.003102  loss: 2.7658 (3.2325)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7538 (nan)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.8148 (0.8148)  acc1: 84.4000 (84.4000)  acc5: 96.0000 (96.0000)  time: 5.6479  data: 5.5660  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0616 (1.0766)  acc1: 78.4000 (77.6364)  acc5: 95.2000 (94.4364)  time: 0.7659  data: 0.6898  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3628 (1.2874)  acc1: 70.0000 (72.9714)  acc5: 90.4000 (91.4476)  time: 0.2182  data: 0.1451  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3992 (1.3130)  acc1: 68.8000 (72.5120)  acc5: 89.2000 (91.1520)  time: 0.2176  data: 0.1451  max mem: 9147
Test: Total time: 0:00:10 (0.4144 s / it)
* Acc@1 72.148 Acc@5 91.324 loss 1.304
Accuracy of the model on the 50000 test images: 72.1%
Max accuracy: 72.54%
Epoch: [108]  [   0/1251]  eta: 1:02:18  lr: 0.003102  min_lr: 0.003102  loss: 4.2710 (4.2710)  weight_decay: 0.0500 (0.0500)  time: 2.9886  data: 2.6213  max mem: 9147
Epoch: [108]  [ 200/1251]  eta: 0:03:40  lr: 0.003099  min_lr: 0.003099  loss: 2.6399 (3.1718)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7449 (0.7855)  time: 0.1814  data: 0.0005  max mem: 9147
Epoch: [108]  [ 400/1251]  eta: 0:02:50  lr: 0.003096  min_lr: 0.003096  loss: 2.6301 (3.1941)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7734 (0.8370)  time: 0.1911  data: 0.0012  max mem: 9147
Epoch: [108]  [ 600/1251]  eta: 0:02:09  lr: 0.003093  min_lr: 0.003093  loss: 2.6858 (3.1942)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7780 (0.7986)  time: 0.2049  data: 0.0008  max mem: 9147
Epoch: [108]  [ 800/1251]  eta: 0:01:31  lr: 0.003090  min_lr: 0.003090  loss: 2.5821 (3.1947)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6585 (0.7905)  time: 0.1906  data: 0.0005  max mem: 9147
Epoch: [108]  [1000/1251]  eta: 0:00:51  lr: 0.003087  min_lr: 0.003087  loss: 2.5861 (3.1924)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8190 (0.7988)  time: 0.1906  data: 0.0012  max mem: 9147
Epoch: [108]  [1200/1251]  eta: 0:00:10  lr: 0.003084  min_lr: 0.003084  loss: 2.9357 (3.2103)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8655 (0.8104)  time: 0.2271  data: 0.0140  max mem: 9147
Epoch: [108]  [1250/1251]  eta: 0:00:00  lr: 0.003083  min_lr: 0.003083  loss: 2.7297 (3.2117)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8719 (0.8166)  time: 0.1385  data: 0.0007  max mem: 9147
Epoch: [108] Total time: 0:04:14 (0.2038 s / it)
Averaged stats: lr: 0.003083  min_lr: 0.003083  loss: 2.7297 (3.2365)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8719 (0.8166)
Test:  [ 0/25]  eta: 0:02:13  loss: 0.9393 (0.9393)  acc1: 82.0000 (82.0000)  acc5: 96.0000 (96.0000)  time: 5.3482  data: 5.2648  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1165 (1.1461)  acc1: 78.4000 (77.1636)  acc5: 95.6000 (94.9091)  time: 0.7683  data: 0.6753  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4392 (1.3768)  acc1: 68.4000 (72.2476)  acc5: 91.2000 (91.3524)  time: 0.2336  data: 0.1514  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5588 (1.3913)  acc1: 68.4000 (71.8400)  acc5: 89.2000 (91.1520)  time: 0.2275  data: 0.1513  max mem: 9147
Test: Total time: 0:00:10 (0.4149 s / it)
* Acc@1 71.810 Acc@5 90.986 loss 1.377
Accuracy of the model on the 50000 test images: 71.8%
Max accuracy: 72.54%
Epoch: [109]  [   0/1251]  eta: 1:00:23  lr: 0.003083  min_lr: 0.003083  loss: 4.2595 (4.2595)  weight_decay: 0.0500 (0.0500)  time: 2.8963  data: 1.6173  max mem: 9147
Epoch: [109]  [ 200/1251]  eta: 0:03:55  lr: 0.003080  min_lr: 0.003080  loss: 2.6117 (3.2992)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8360 (0.8078)  time: 0.2008  data: 0.0006  max mem: 9147
Epoch: [109]  [ 400/1251]  eta: 0:03:05  lr: 0.003077  min_lr: 0.003077  loss: 2.6364 (3.2371)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7194 (0.7908)  time: 0.2340  data: 0.0006  max mem: 9147
Epoch: [109]  [ 600/1251]  eta: 0:02:16  lr: 0.003074  min_lr: 0.003074  loss: 2.5868 (3.2206)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7587 (0.8307)  time: 0.1855  data: 0.0009  max mem: 9147
Epoch: [109]  [ 800/1251]  eta: 0:01:32  lr: 0.003071  min_lr: 0.003071  loss: 3.5033 (3.2508)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6714 (0.7987)  time: 0.1919  data: 0.0012  max mem: 9147
Epoch: [109]  [1000/1251]  eta: 0:00:50  lr: 0.003068  min_lr: 0.003068  loss: 2.6460 (3.2458)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7089 (0.7885)  time: 0.1923  data: 0.0012  max mem: 9147
Epoch: [109]  [1200/1251]  eta: 0:00:10  lr: 0.003065  min_lr: 0.003065  loss: 2.6000 (3.2315)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6887 (0.7861)  time: 0.2167  data: 0.0007  max mem: 9147
Epoch: [109]  [1250/1251]  eta: 0:00:00  lr: 0.003064  min_lr: 0.003064  loss: 2.8405 (3.2299)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7501 (0.7879)  time: 0.1485  data: 0.0017  max mem: 9147
Epoch: [109] Total time: 0:04:12 (0.2021 s / it)
Averaged stats: lr: 0.003064  min_lr: 0.003064  loss: 2.8405 (3.2316)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7501 (0.7879)
Test:  [ 0/25]  eta: 0:02:17  loss: 0.8053 (0.8053)  acc1: 82.8000 (82.8000)  acc5: 96.0000 (96.0000)  time: 5.4961  data: 5.4151  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 1.0357 (1.0399)  acc1: 77.6000 (77.4909)  acc5: 95.2000 (94.9091)  time: 0.6640  data: 0.5697  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3216 (1.2830)  acc1: 71.6000 (73.0857)  acc5: 90.8000 (91.6952)  time: 0.1794  data: 0.0959  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4553 (1.2995)  acc1: 70.8000 (72.6720)  acc5: 89.2000 (91.4880)  time: 0.2127  data: 0.1348  max mem: 9147
Test: Total time: 0:00:10 (0.4087 s / it)
* Acc@1 72.762 Acc@5 91.434 loss 1.295
Accuracy of the model on the 50000 test images: 72.8%
Max accuracy: 72.76%
Epoch: [110]  [   0/1251]  eta: 1:07:01  lr: 0.003064  min_lr: 0.003064  loss: 2.8062 (2.8062)  weight_decay: 0.0500 (0.0500)  time: 3.2142  data: 3.0128  max mem: 9147
Epoch: [110]  [ 200/1251]  eta: 0:03:50  lr: 0.003061  min_lr: 0.003061  loss: 2.7138 (3.1831)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7017 (0.7687)  time: 0.2001  data: 0.0006  max mem: 9147
Epoch: [110]  [ 400/1251]  eta: 0:03:03  lr: 0.003058  min_lr: 0.003058  loss: 2.7905 (3.1830)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7224 (0.7689)  time: 0.2003  data: 0.0007  max mem: 9147
Epoch: [110]  [ 600/1251]  eta: 0:02:16  lr: 0.003055  min_lr: 0.003055  loss: 3.1408 (3.2372)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6879 (0.7852)  time: 0.1950  data: 0.0006  max mem: 9147
Epoch: [110]  [ 800/1251]  eta: 0:01:32  lr: 0.003052  min_lr: 0.003052  loss: 2.6212 (3.2237)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8194 (0.8080)  time: 0.1950  data: 0.0005  max mem: 9147
Epoch: [110]  [1000/1251]  eta: 0:00:51  lr: 0.003049  min_lr: 0.003049  loss: 3.2794 (3.2233)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7485 (0.8190)  time: 0.2382  data: 0.0008  max mem: 9147
Epoch: [110]  [1200/1251]  eta: 0:00:10  lr: 0.003046  min_lr: 0.003046  loss: 2.7477 (3.2285)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7438 (0.8113)  time: 0.2055  data: 0.0005  max mem: 9147
Epoch: [110]  [1250/1251]  eta: 0:00:00  lr: 0.003045  min_lr: 0.003045  loss: 2.7918 (3.2310)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6576 (0.8086)  time: 0.1489  data: 0.0009  max mem: 9147
Epoch: [110] Total time: 0:04:17 (0.2061 s / it)
Averaged stats: lr: 0.003045  min_lr: 0.003045  loss: 2.7918 (3.2063)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6576 (0.8086)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.9073 (0.9073)  acc1: 80.4000 (80.4000)  acc5: 98.0000 (98.0000)  time: 5.6555  data: 5.5736  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1122 (1.1250)  acc1: 80.4000 (77.4909)  acc5: 95.2000 (94.8000)  time: 0.7399  data: 0.6447  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4555 (1.3464)  acc1: 69.6000 (72.5333)  acc5: 89.6000 (91.4857)  time: 0.2291  data: 0.1451  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4628 (1.3599)  acc1: 68.4000 (72.1120)  acc5: 89.2000 (91.2800)  time: 0.2230  data: 0.1450  max mem: 9147
Test: Total time: 0:00:10 (0.4235 s / it)
* Acc@1 72.274 Acc@5 91.332 loss 1.345
Accuracy of the model on the 50000 test images: 72.3%
Max accuracy: 72.76%
Epoch: [111]  [   0/1251]  eta: 1:04:33  lr: 0.003045  min_lr: 0.003045  loss: 2.3583 (2.3583)  weight_decay: 0.0500 (0.0500)  time: 3.0967  data: 1.9102  max mem: 9147
Epoch: [111]  [ 200/1251]  eta: 0:03:52  lr: 0.003042  min_lr: 0.003042  loss: 4.0083 (3.2094)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8928 (0.8461)  time: 0.2025  data: 0.0006  max mem: 9147
Epoch: [111]  [ 400/1251]  eta: 0:03:05  lr: 0.003039  min_lr: 0.003039  loss: 3.0512 (3.2182)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8257 (0.8347)  time: 0.2079  data: 0.0020  max mem: 9147
Epoch: [111]  [ 600/1251]  eta: 0:02:21  lr: 0.003036  min_lr: 0.003036  loss: 2.5697 (3.2197)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8022 (0.8336)  time: 0.2117  data: 0.0009  max mem: 9147
Epoch: [111]  [ 800/1251]  eta: 0:01:38  lr: 0.003033  min_lr: 0.003033  loss: 2.8652 (3.2247)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7486 (0.8205)  time: 0.2103  data: 0.0021  max mem: 9147
Epoch: [111]  [1000/1251]  eta: 0:00:54  lr: 0.003030  min_lr: 0.003030  loss: 2.8739 (3.2211)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8213 (0.8190)  time: 0.2049  data: 0.0006  max mem: 9147
Epoch: [111]  [1200/1251]  eta: 0:00:11  lr: 0.003027  min_lr: 0.003027  loss: 2.7254 (3.2213)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8215 (0.8292)  time: 0.2100  data: 0.0007  max mem: 9147
Epoch: [111]  [1250/1251]  eta: 0:00:00  lr: 0.003026  min_lr: 0.003026  loss: 2.7824 (3.2312)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9225 (0.8332)  time: 0.1453  data: 0.0013  max mem: 9147
Epoch: [111] Total time: 0:04:30 (0.2161 s / it)
Averaged stats: lr: 0.003026  min_lr: 0.003026  loss: 2.7824 (3.2178)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9225 (0.8332)
Test:  [ 0/25]  eta: 0:02:24  loss: 1.0042 (1.0042)  acc1: 84.8000 (84.8000)  acc5: 96.4000 (96.4000)  time: 5.7686  data: 5.6882  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0881 (1.1622)  acc1: 76.4000 (78.0364)  acc5: 95.6000 (94.9455)  time: 0.7496  data: 0.6752  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3828 (1.3472)  acc1: 69.6000 (73.1048)  acc5: 91.6000 (91.3714)  time: 0.2052  data: 0.1330  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4758 (1.3612)  acc1: 68.4000 (72.4480)  acc5: 88.8000 (91.1040)  time: 0.2035  data: 0.1329  max mem: 9147
Test: Total time: 0:00:10 (0.4095 s / it)
* Acc@1 72.498 Acc@5 91.336 loss 1.352
Accuracy of the model on the 50000 test images: 72.5%
Max accuracy: 72.76%
Epoch: [112]  [   0/1251]  eta: 1:03:52  lr: 0.003026  min_lr: 0.003026  loss: 2.4867 (2.4867)  weight_decay: 0.0500 (0.0500)  time: 3.0633  data: 2.6943  max mem: 9147
Epoch: [112]  [ 200/1251]  eta: 0:03:54  lr: 0.003023  min_lr: 0.003023  loss: 3.3316 (3.0807)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8187 (0.8162)  time: 0.2055  data: 0.0007  max mem: 9147
Epoch: [112]  [ 400/1251]  eta: 0:03:04  lr: 0.003020  min_lr: 0.003020  loss: 2.7476 (3.1338)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8517 (0.8179)  time: 0.1936  data: 0.0006  max mem: 9147
Epoch: [112]  [ 600/1251]  eta: 0:02:20  lr: 0.003017  min_lr: 0.003017  loss: 2.5939 (3.1390)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8173 (0.8266)  time: 0.2292  data: 0.0006  max mem: 9147
Epoch: [112]  [ 800/1251]  eta: 0:01:35  lr: 0.003014  min_lr: 0.003014  loss: 2.6869 (3.1512)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8619 (0.8172)  time: 0.1918  data: 0.0012  max mem: 9147
Epoch: [112]  [1000/1251]  eta: 0:00:51  lr: 0.003011  min_lr: 0.003011  loss: 2.5838 (3.1561)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6590 (0.8167)  time: 0.1949  data: 0.0006  max mem: 9147
Epoch: [112]  [1200/1251]  eta: 0:00:10  lr: 0.003007  min_lr: 0.003007  loss: 2.7271 (3.1780)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8649 (0.8400)  time: 0.1872  data: 0.0012  max mem: 9147
Epoch: [112]  [1250/1251]  eta: 0:00:00  lr: 0.003007  min_lr: 0.003007  loss: 2.6709 (3.1794)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8003 (0.8384)  time: 0.1398  data: 0.0010  max mem: 9147
Epoch: [112] Total time: 0:04:15 (0.2046 s / it)
Averaged stats: lr: 0.003007  min_lr: 0.003007  loss: 2.6709 (3.2095)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8003 (0.8384)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.9539 (0.9539)  acc1: 82.0000 (82.0000)  acc5: 97.2000 (97.2000)  time: 5.4495  data: 5.3500  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0598 (1.1052)  acc1: 78.0000 (77.8182)  acc5: 95.2000 (94.9818)  time: 0.7509  data: 0.6543  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3683 (1.3307)  acc1: 70.0000 (72.7429)  acc5: 91.2000 (91.6000)  time: 0.2154  data: 0.1314  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4912 (1.3393)  acc1: 68.4000 (72.3840)  acc5: 89.6000 (91.4400)  time: 0.2094  data: 0.1313  max mem: 9147
Test: Total time: 0:00:10 (0.4044 s / it)
* Acc@1 72.650 Acc@5 91.386 loss 1.331
Accuracy of the model on the 50000 test images: 72.7%
Max accuracy: 72.76%
Epoch: [113]  [   0/1251]  eta: 1:04:28  lr: 0.003007  min_lr: 0.003007  loss: 4.6258 (4.6258)  weight_decay: 0.0500 (0.0500)  time: 3.0920  data: 2.2990  max mem: 9147
Epoch: [113]  [ 200/1251]  eta: 0:03:40  lr: 0.003004  min_lr: 0.003004  loss: 3.1752 (3.2061)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7436 (0.7967)  time: 0.1813  data: 0.0005  max mem: 9147
Epoch: [113]  [ 400/1251]  eta: 0:02:51  lr: 0.003000  min_lr: 0.003000  loss: 2.8179 (3.2080)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8487 (0.8052)  time: 0.2143  data: 0.0007  max mem: 9147
Epoch: [113]  [ 600/1251]  eta: 0:02:10  lr: 0.002997  min_lr: 0.002997  loss: 3.7842 (3.2320)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8230 (0.8361)  time: 0.1987  data: 0.0006  max mem: 9147
Epoch: [113]  [ 800/1251]  eta: 0:01:29  lr: 0.002994  min_lr: 0.002994  loss: 2.6198 (3.2114)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7569 (0.8154)  time: 0.1915  data: 0.0012  max mem: 9147
Epoch: [113]  [1000/1251]  eta: 0:00:49  lr: 0.002991  min_lr: 0.002991  loss: 2.9189 (3.2232)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7611 (0.8020)  time: 0.1902  data: 0.0010  max mem: 9147
Epoch: [113]  [1200/1251]  eta: 0:00:10  lr: 0.002988  min_lr: 0.002988  loss: 3.1048 (3.2187)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8496 (0.8079)  time: 0.2099  data: 0.0006  max mem: 9147
Epoch: [113]  [1250/1251]  eta: 0:00:00  lr: 0.002987  min_lr: 0.002987  loss: 2.7919 (3.2187)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7236 (0.8078)  time: 0.1425  data: 0.0012  max mem: 9147
Epoch: [113] Total time: 0:04:10 (0.2004 s / it)
Averaged stats: lr: 0.002987  min_lr: 0.002987  loss: 2.7919 (3.2154)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7236 (0.8078)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.8233 (0.8233)  acc1: 82.8000 (82.8000)  acc5: 97.2000 (97.2000)  time: 5.6346  data: 5.5506  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9974 (1.0251)  acc1: 78.4000 (77.7818)  acc5: 95.6000 (94.5455)  time: 0.7453  data: 0.6677  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2879 (1.2501)  acc1: 71.2000 (72.9714)  acc5: 90.8000 (91.4476)  time: 0.2017  data: 0.1281  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3931 (1.2592)  acc1: 68.8000 (72.5920)  acc5: 90.0000 (91.4720)  time: 0.1991  data: 0.1272  max mem: 9147
Test: Total time: 0:00:10 (0.4047 s / it)
* Acc@1 72.976 Acc@5 91.570 loss 1.246
Accuracy of the model on the 50000 test images: 73.0%
Max accuracy: 72.98%
Epoch: [114]  [   0/1251]  eta: 1:01:52  lr: 0.002987  min_lr: 0.002987  loss: 2.7408 (2.7408)  weight_decay: 0.0500 (0.0500)  time: 2.9673  data: 2.7737  max mem: 9147
Epoch: [114]  [ 200/1251]  eta: 0:03:41  lr: 0.002984  min_lr: 0.002984  loss: 2.6044 (3.1287)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8589 (0.8615)  time: 0.2008  data: 0.0007  max mem: 9147
Epoch: [114]  [ 400/1251]  eta: 0:03:01  lr: 0.002981  min_lr: 0.002981  loss: 2.7494 (3.1487)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8441 (inf)  time: 0.2286  data: 0.0006  max mem: 9147
Epoch: [114]  [ 600/1251]  eta: 0:02:17  lr: 0.002978  min_lr: 0.002978  loss: 3.2724 (3.1890)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7925 (inf)  time: 0.1948  data: 0.0005  max mem: 9147
Epoch: [114]  [ 800/1251]  eta: 0:01:34  lr: 0.002975  min_lr: 0.002975  loss: 2.7828 (3.2027)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7667 (inf)  time: 0.2341  data: 0.0006  max mem: 9147
Epoch: [114]  [1000/1251]  eta: 0:00:52  lr: 0.002972  min_lr: 0.002972  loss: 2.6624 (3.1983)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7087 (inf)  time: 0.2202  data: 0.0009  max mem: 9147
Epoch: [114]  [1200/1251]  eta: 0:00:10  lr: 0.002968  min_lr: 0.002968  loss: 3.2485 (3.1979)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8651 (inf)  time: 0.2067  data: 0.0006  max mem: 9147
Epoch: [114]  [1250/1251]  eta: 0:00:00  lr: 0.002968  min_lr: 0.002968  loss: 2.7796 (3.1992)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9029 (inf)  time: 0.1434  data: 0.0010  max mem: 9147
Epoch: [114] Total time: 0:04:25 (0.2119 s / it)
Averaged stats: lr: 0.002968  min_lr: 0.002968  loss: 2.7796 (3.2306)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9029 (inf)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.8563 (0.8563)  acc1: 85.2000 (85.2000)  acc5: 97.2000 (97.2000)  time: 5.4507  data: 5.3702  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1016 (1.0933)  acc1: 76.0000 (78.0000)  acc5: 95.2000 (94.6182)  time: 0.7430  data: 0.6612  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3395 (1.3094)  acc1: 72.0000 (73.3143)  acc5: 90.4000 (91.5619)  time: 0.2112  data: 0.1333  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4105 (1.3280)  acc1: 70.0000 (72.7840)  acc5: 88.8000 (91.4240)  time: 0.2096  data: 0.1359  max mem: 9147
Test: Total time: 0:00:10 (0.4039 s / it)
* Acc@1 72.750 Acc@5 91.574 loss 1.324
Accuracy of the model on the 50000 test images: 72.8%
Max accuracy: 72.98%
Epoch: [115]  [   0/1251]  eta: 1:04:45  lr: 0.002968  min_lr: 0.002968  loss: 4.1988 (4.1988)  weight_decay: 0.0500 (0.0500)  time: 3.1058  data: 2.2197  max mem: 9147
Epoch: [115]  [ 200/1251]  eta: 0:03:59  lr: 0.002965  min_lr: 0.002965  loss: 3.5341 (3.2614)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9672 (0.9412)  time: 0.2184  data: 0.0092  max mem: 9147
Epoch: [115]  [ 400/1251]  eta: 0:03:07  lr: 0.002961  min_lr: 0.002961  loss: 2.7660 (3.2478)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8925 (0.8824)  time: 0.2078  data: 0.0006  max mem: 9147
Epoch: [115]  [ 600/1251]  eta: 0:02:20  lr: 0.002958  min_lr: 0.002958  loss: 2.6035 (3.2547)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6825 (0.8599)  time: 0.2112  data: 0.0008  max mem: 9147
Epoch: [115]  [ 800/1251]  eta: 0:01:37  lr: 0.002955  min_lr: 0.002955  loss: 4.0570 (3.2648)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7682 (0.8328)  time: 0.2196  data: 0.0008  max mem: 9147
Epoch: [115]  [1000/1251]  eta: 0:00:53  lr: 0.002952  min_lr: 0.002952  loss: 2.7957 (3.2739)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7650 (0.8298)  time: 0.1863  data: 0.0006  max mem: 9147
Epoch: [115]  [1200/1251]  eta: 0:00:10  lr: 0.002949  min_lr: 0.002949  loss: 2.6401 (3.2593)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7398 (0.8176)  time: 0.1876  data: 0.0005  max mem: 9147
Epoch: [115]  [1250/1251]  eta: 0:00:00  lr: 0.002948  min_lr: 0.002948  loss: 2.9999 (3.2605)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7624 (0.8229)  time: 0.1384  data: 0.0012  max mem: 9147
Epoch: [115] Total time: 0:04:21 (0.2089 s / it)
Averaged stats: lr: 0.002948  min_lr: 0.002948  loss: 2.9999 (3.2064)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7624 (0.8229)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.8248 (0.8248)  acc1: 81.2000 (81.2000)  acc5: 97.2000 (97.2000)  time: 5.5808  data: 5.5005  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0298 (1.0594)  acc1: 79.2000 (77.2000)  acc5: 94.8000 (94.4727)  time: 0.7619  data: 0.6719  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3276 (1.2985)  acc1: 69.6000 (72.5333)  acc5: 90.4000 (91.4857)  time: 0.2024  data: 0.1215  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4285 (1.3124)  acc1: 68.0000 (72.0640)  acc5: 90.4000 (91.4560)  time: 0.2034  data: 0.1271  max mem: 9147
Test: Total time: 0:00:10 (0.4046 s / it)
* Acc@1 72.600 Acc@5 91.404 loss 1.309
Accuracy of the model on the 50000 test images: 72.6%
Max accuracy: 72.98%
Epoch: [116]  [   0/1251]  eta: 1:01:37  lr: 0.002948  min_lr: 0.002948  loss: 2.4208 (2.4208)  weight_decay: 0.0500 (0.0500)  time: 2.9559  data: 2.6476  max mem: 9147
Epoch: [116]  [ 200/1251]  eta: 0:03:53  lr: 0.002945  min_lr: 0.002945  loss: 2.8025 (3.3050)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7497 (0.8495)  time: 0.2049  data: 0.0007  max mem: 9147
Epoch: [116]  [ 400/1251]  eta: 0:03:06  lr: 0.002942  min_lr: 0.002942  loss: 2.6669 (3.2246)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6997 (0.8085)  time: 0.2007  data: 0.0006  max mem: 9147
Epoch: [116]  [ 600/1251]  eta: 0:02:21  lr: 0.002938  min_lr: 0.002938  loss: 3.3894 (3.2057)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9879 (0.8449)  time: 0.2158  data: 0.0006  max mem: 9147
Epoch: [116]  [ 800/1251]  eta: 0:01:34  lr: 0.002935  min_lr: 0.002935  loss: 2.8517 (3.2245)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6471 (0.8171)  time: 0.1879  data: 0.0006  max mem: 9147
Epoch: [116]  [1000/1251]  eta: 0:00:51  lr: 0.002932  min_lr: 0.002932  loss: 2.5572 (3.2026)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6664 (0.8145)  time: 0.1900  data: 0.0014  max mem: 9147
Epoch: [116]  [1200/1251]  eta: 0:00:10  lr: 0.002929  min_lr: 0.002929  loss: 2.7714 (3.2040)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8175 (0.8058)  time: 0.2197  data: 0.0007  max mem: 9147
Epoch: [116]  [1250/1251]  eta: 0:00:00  lr: 0.002928  min_lr: 0.002928  loss: 2.8202 (3.2020)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7404 (0.8049)  time: 0.1523  data: 0.0015  max mem: 9147
Epoch: [116] Total time: 0:04:15 (0.2046 s / it)
Averaged stats: lr: 0.002928  min_lr: 0.002928  loss: 2.8202 (3.2152)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7404 (0.8049)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.8795 (0.8795)  acc1: 83.6000 (83.6000)  acc5: 96.8000 (96.8000)  time: 5.7123  data: 5.6313  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0432 (1.0553)  acc1: 78.8000 (77.3455)  acc5: 95.6000 (95.0182)  time: 0.7732  data: 0.6955  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3571 (1.2552)  acc1: 70.0000 (72.5905)  acc5: 91.2000 (91.8857)  time: 0.2028  data: 0.1294  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3727 (1.2677)  acc1: 69.6000 (72.3360)  acc5: 90.0000 (91.6960)  time: 0.2129  data: 0.1407  max mem: 9147
Test: Total time: 0:00:10 (0.4164 s / it)
* Acc@1 72.738 Acc@5 91.608 loss 1.266
Accuracy of the model on the 50000 test images: 72.7%
Max accuracy: 72.98%
Epoch: [117]  [   0/1251]  eta: 1:02:59  lr: 0.002928  min_lr: 0.002928  loss: 4.2433 (4.2433)  weight_decay: 0.0500 (0.0500)  time: 3.0213  data: 1.9878  max mem: 9147
Epoch: [117]  [ 200/1251]  eta: 0:03:59  lr: 0.002925  min_lr: 0.002925  loss: 3.2948 (3.2447)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7893 (0.8337)  time: 0.2056  data: 0.0009  max mem: 9147
Epoch: [117]  [ 400/1251]  eta: 0:03:02  lr: 0.002922  min_lr: 0.002922  loss: 2.8106 (3.2419)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7096 (0.8193)  time: 0.1905  data: 0.0020  max mem: 9147
Epoch: [117]  [ 600/1251]  eta: 0:02:19  lr: 0.002919  min_lr: 0.002919  loss: 3.3295 (3.2352)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8566 (0.8437)  time: 0.2049  data: 0.0006  max mem: 9147
Epoch: [117]  [ 800/1251]  eta: 0:01:36  lr: 0.002915  min_lr: 0.002915  loss: 2.6160 (3.2379)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7002 (0.8205)  time: 0.1997  data: 0.0006  max mem: 9147
Epoch: [117]  [1000/1251]  eta: 0:00:53  lr: 0.002912  min_lr: 0.002912  loss: 2.5475 (3.2100)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7526 (0.8107)  time: 0.2159  data: 0.0007  max mem: 9147
Epoch: [117]  [1200/1251]  eta: 0:00:10  lr: 0.002909  min_lr: 0.002909  loss: 3.6994 (3.2158)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8293 (0.8127)  time: 0.1942  data: 0.0007  max mem: 9147
Epoch: [117]  [1250/1251]  eta: 0:00:00  lr: 0.002908  min_lr: 0.002908  loss: 3.3980 (3.2157)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7926 (0.8134)  time: 0.1383  data: 0.0008  max mem: 9147
Epoch: [117] Total time: 0:04:24 (0.2111 s / it)
Averaged stats: lr: 0.002908  min_lr: 0.002908  loss: 3.3980 (3.1960)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7926 (0.8134)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.9057 (0.9057)  acc1: 84.8000 (84.8000)  acc5: 96.0000 (96.0000)  time: 5.5849  data: 5.5044  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.1044 (1.1485)  acc1: 78.4000 (77.5636)  acc5: 95.6000 (94.5091)  time: 0.7180  data: 0.6422  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4700 (1.3368)  acc1: 70.4000 (73.4286)  acc5: 90.0000 (91.4857)  time: 0.1938  data: 0.1213  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4735 (1.3506)  acc1: 70.0000 (72.8000)  acc5: 89.2000 (91.1680)  time: 0.1959  data: 0.1245  max mem: 9147
Test: Total time: 0:00:09 (0.3953 s / it)
* Acc@1 72.790 Acc@5 91.592 loss 1.347
Accuracy of the model on the 50000 test images: 72.8%
Max accuracy: 72.98%
Epoch: [118]  [   0/1251]  eta: 1:05:38  lr: 0.002908  min_lr: 0.002908  loss: 4.3690 (4.3690)  weight_decay: 0.0500 (0.0500)  time: 3.1486  data: 2.1043  max mem: 9147
Epoch: [118]  [ 200/1251]  eta: 0:03:59  lr: 0.002905  min_lr: 0.002905  loss: 2.7664 (3.1720)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7414 (0.7761)  time: 0.2189  data: 0.0007  max mem: 9147
Epoch: [118]  [ 400/1251]  eta: 0:03:09  lr: 0.002902  min_lr: 0.002902  loss: 2.6341 (3.1820)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7001 (0.7614)  time: 0.2192  data: 0.0006  max mem: 9147
Epoch: [118]  [ 600/1251]  eta: 0:02:23  lr: 0.002899  min_lr: 0.002899  loss: 2.8700 (3.2090)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7195 (0.7818)  time: 0.2025  data: 0.0006  max mem: 9147
Epoch: [118]  [ 800/1251]  eta: 0:01:38  lr: 0.002895  min_lr: 0.002895  loss: 2.5657 (3.1945)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8638 (0.7964)  time: 0.2002  data: 0.0005  max mem: 9147
Epoch: [118]  [1000/1251]  eta: 0:00:54  lr: 0.002892  min_lr: 0.002892  loss: 3.5432 (3.1923)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8230 (0.8029)  time: 0.2105  data: 0.0007  max mem: 9147
Epoch: [118]  [1200/1251]  eta: 0:00:11  lr: 0.002889  min_lr: 0.002889  loss: 2.7028 (3.2010)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6704 (0.7908)  time: 0.2248  data: 0.0006  max mem: 9147
Epoch: [118]  [1250/1251]  eta: 0:00:00  lr: 0.002888  min_lr: 0.002888  loss: 3.4553 (3.2042)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8436 (0.8021)  time: 0.1432  data: 0.0013  max mem: 9147
Epoch: [118] Total time: 0:04:32 (0.2175 s / it)
Averaged stats: lr: 0.002888  min_lr: 0.002888  loss: 3.4553 (3.2039)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8436 (0.8021)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.9010 (0.9010)  acc1: 85.2000 (85.2000)  acc5: 97.2000 (97.2000)  time: 5.5705  data: 5.4901  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1174 (1.1103)  acc1: 78.4000 (78.1455)  acc5: 95.2000 (94.6909)  time: 0.7396  data: 0.6470  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3976 (1.3505)  acc1: 70.8000 (72.9524)  acc5: 91.2000 (91.4476)  time: 0.2037  data: 0.1209  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5160 (1.3650)  acc1: 68.8000 (72.4480)  acc5: 89.6000 (91.2800)  time: 0.1983  data: 0.1208  max mem: 9147
Test: Total time: 0:00:10 (0.4000 s / it)
* Acc@1 72.700 Acc@5 91.400 loss 1.354
Accuracy of the model on the 50000 test images: 72.7%
Max accuracy: 72.98%
Epoch: [119]  [   0/1251]  eta: 1:03:43  lr: 0.002888  min_lr: 0.002888  loss: 2.3886 (2.3886)  weight_decay: 0.0500 (0.0500)  time: 3.0563  data: 2.6478  max mem: 9147
Epoch: [119]  [ 200/1251]  eta: 0:03:57  lr: 0.002885  min_lr: 0.002885  loss: 3.3868 (3.1263)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7203 (0.8412)  time: 0.2103  data: 0.0008  max mem: 9147
Epoch: [119]  [ 400/1251]  eta: 0:02:58  lr: 0.002882  min_lr: 0.002882  loss: 2.8363 (3.1469)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8230 (0.8342)  time: 0.1950  data: 0.0007  max mem: 9147
Epoch: [119]  [ 600/1251]  eta: 0:02:15  lr: 0.002879  min_lr: 0.002879  loss: 2.6446 (3.1705)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6977 (0.8246)  time: 0.2246  data: 0.0006  max mem: 9147
Epoch: [119]  [ 800/1251]  eta: 0:01:34  lr: 0.002875  min_lr: 0.002875  loss: 3.0490 (3.1667)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8489 (0.8289)  time: 0.1831  data: 0.0007  max mem: 9147
Epoch: [119]  [1000/1251]  eta: 0:00:52  lr: 0.002872  min_lr: 0.002872  loss: 2.8482 (3.1675)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6992 (0.8203)  time: 0.2145  data: 0.0006  max mem: 9147
Epoch: [119]  [1200/1251]  eta: 0:00:10  lr: 0.002869  min_lr: 0.002869  loss: 2.8831 (3.1798)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8628 (0.8266)  time: 0.2057  data: 0.0008  max mem: 9147
Epoch: [119]  [1250/1251]  eta: 0:00:00  lr: 0.002868  min_lr: 0.002868  loss: 2.6158 (3.1811)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7854 (0.8245)  time: 0.1483  data: 0.0010  max mem: 9147
Epoch: [119] Total time: 0:04:22 (0.2099 s / it)
Averaged stats: lr: 0.002868  min_lr: 0.002868  loss: 2.6158 (3.1879)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7854 (0.8245)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.7739 (0.7739)  acc1: 85.2000 (85.2000)  acc5: 95.6000 (95.6000)  time: 5.4683  data: 5.3707  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9866 (1.0035)  acc1: 78.8000 (78.6909)  acc5: 95.2000 (94.9455)  time: 0.7534  data: 0.6588  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2508 (1.2324)  acc1: 72.0000 (73.3524)  acc5: 92.0000 (91.8476)  time: 0.2213  data: 0.1381  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3957 (1.2391)  acc1: 69.6000 (73.0560)  acc5: 89.6000 (91.7280)  time: 0.2153  data: 0.1380  max mem: 9147
Test: Total time: 0:00:10 (0.4101 s / it)
* Acc@1 73.130 Acc@5 91.598 loss 1.237
Accuracy of the model on the 50000 test images: 73.1%
Max accuracy: 73.13%
Epoch: [120]  [   0/1251]  eta: 1:08:34  lr: 0.002868  min_lr: 0.002868  loss: 2.6148 (2.6148)  weight_decay: 0.0500 (0.0500)  time: 3.2889  data: 3.1099  max mem: 9147
Epoch: [120]  [ 200/1251]  eta: 0:03:46  lr: 0.002865  min_lr: 0.002865  loss: 2.5659 (3.1123)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8629 (0.8602)  time: 0.2303  data: 0.0006  max mem: 9147
Epoch: [120]  [ 400/1251]  eta: 0:03:00  lr: 0.002862  min_lr: 0.002862  loss: 2.6422 (3.1659)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8925 (0.8392)  time: 0.2009  data: 0.0007  max mem: 9147
Epoch: [120]  [ 600/1251]  eta: 0:02:18  lr: 0.002858  min_lr: 0.002858  loss: 2.5675 (3.1811)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7947 (0.8290)  time: 0.2096  data: 0.0007  max mem: 9147
Epoch: [120]  [ 800/1251]  eta: 0:01:36  lr: 0.002855  min_lr: 0.002855  loss: 2.6961 (3.1752)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7788 (0.8330)  time: 0.2147  data: 0.0096  max mem: 9147
Epoch: [120]  [1000/1251]  eta: 0:00:53  lr: 0.002852  min_lr: 0.002852  loss: 3.8001 (3.1994)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7508 (0.8252)  time: 0.2052  data: 0.0022  max mem: 9147
Epoch: [120]  [1200/1251]  eta: 0:00:10  lr: 0.002849  min_lr: 0.002849  loss: 2.6996 (3.1991)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6569 (0.8129)  time: 0.2071  data: 0.0006  max mem: 9147
Epoch: [120]  [1250/1251]  eta: 0:00:00  lr: 0.002848  min_lr: 0.002848  loss: 3.0036 (3.1987)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6591 (0.8089)  time: 0.1430  data: 0.0010  max mem: 9147
Epoch: [120] Total time: 0:04:27 (0.2140 s / it)
Averaged stats: lr: 0.002848  min_lr: 0.002848  loss: 3.0036 (3.1965)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6591 (0.8089)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.9076 (0.9076)  acc1: 82.0000 (82.0000)  acc5: 96.4000 (96.4000)  time: 5.6909  data: 5.6104  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0497 (1.0619)  acc1: 80.8000 (78.5818)  acc5: 95.6000 (94.7636)  time: 0.7884  data: 0.7120  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2833 (1.2820)  acc1: 70.8000 (73.4476)  acc5: 90.0000 (91.6571)  time: 0.2198  data: 0.1456  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4545 (1.3055)  acc1: 68.8000 (72.8640)  acc5: 89.2000 (91.3600)  time: 0.2233  data: 0.1509  max mem: 9147
Test: Total time: 0:00:10 (0.4224 s / it)
* Acc@1 73.120 Acc@5 91.526 loss 1.290
Accuracy of the model on the 50000 test images: 73.1%
Max accuracy: 73.13%
Epoch: [121]  [   0/1251]  eta: 1:04:59  lr: 0.002848  min_lr: 0.002848  loss: 2.1857 (2.1857)  weight_decay: 0.0500 (0.0500)  time: 3.1169  data: 1.8547  max mem: 9147
Epoch: [121]  [ 200/1251]  eta: 0:03:49  lr: 0.002845  min_lr: 0.002845  loss: 2.6684 (3.1398)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7148 (0.7588)  time: 0.1949  data: 0.0005  max mem: 9147
Epoch: [121]  [ 400/1251]  eta: 0:02:57  lr: 0.002841  min_lr: 0.002841  loss: 2.9712 (3.1954)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8592 (0.8499)  time: 0.2089  data: 0.0006  max mem: 9147
Epoch: [121]  [ 600/1251]  eta: 0:02:16  lr: 0.002838  min_lr: 0.002838  loss: 3.2253 (3.1938)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7665 (0.8138)  time: 0.2199  data: 0.0008  max mem: 9147
Epoch: [121]  [ 800/1251]  eta: 0:01:35  lr: 0.002835  min_lr: 0.002835  loss: 2.5574 (3.1815)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7993 (0.8440)  time: 0.2250  data: 0.0117  max mem: 9147
Epoch: [121]  [1000/1251]  eta: 0:00:53  lr: 0.002831  min_lr: 0.002831  loss: 2.5742 (3.1630)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7880 (0.8386)  time: 0.2227  data: 0.0006  max mem: 9147
Epoch: [121]  [1200/1251]  eta: 0:00:10  lr: 0.002828  min_lr: 0.002828  loss: 2.7198 (3.1745)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8186 (0.8275)  time: 0.2296  data: 0.0007  max mem: 9147
Epoch: [121]  [1250/1251]  eta: 0:00:00  lr: 0.002827  min_lr: 0.002827  loss: 3.8350 (3.1770)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7133 (0.8236)  time: 0.1563  data: 0.0013  max mem: 9147
Epoch: [121] Total time: 0:04:24 (0.2116 s / it)
Averaged stats: lr: 0.002827  min_lr: 0.002827  loss: 3.8350 (3.1942)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7133 (0.8236)
Test:  [ 0/25]  eta: 0:02:23  loss: 0.9116 (0.9116)  acc1: 83.6000 (83.6000)  acc5: 97.2000 (97.2000)  time: 5.7507  data: 5.6560  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0965 (1.1021)  acc1: 78.8000 (77.7091)  acc5: 95.2000 (95.2364)  time: 0.7272  data: 0.6491  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3395 (1.3374)  acc1: 70.4000 (72.7048)  acc5: 91.6000 (91.6571)  time: 0.1886  data: 0.1158  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5305 (1.3499)  acc1: 69.6000 (72.3360)  acc5: 89.2000 (91.6000)  time: 0.2070  data: 0.1367  max mem: 9147
Test: Total time: 0:00:10 (0.4123 s / it)
* Acc@1 72.808 Acc@5 91.588 loss 1.339
Accuracy of the model on the 50000 test images: 72.8%
Max accuracy: 73.13%
Epoch: [122]  [   0/1251]  eta: 1:04:50  lr: 0.002827  min_lr: 0.002827  loss: 2.5627 (2.5627)  weight_decay: 0.0500 (0.0500)  time: 3.1103  data: 2.7663  max mem: 9147
Epoch: [122]  [ 200/1251]  eta: 0:03:54  lr: 0.002824  min_lr: 0.002824  loss: 2.6426 (3.1908)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8045 (0.7903)  time: 0.2153  data: 0.0007  max mem: 9147
Epoch: [122]  [ 400/1251]  eta: 0:03:06  lr: 0.002821  min_lr: 0.002821  loss: 2.8217 (3.1851)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9644 (0.8676)  time: 0.2236  data: 0.0232  max mem: 9147
Epoch: [122]  [ 600/1251]  eta: 0:02:18  lr: 0.002818  min_lr: 0.002818  loss: 2.6167 (3.1965)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7620 (inf)  time: 0.2096  data: 0.0007  max mem: 9147
Epoch: [122]  [ 800/1251]  eta: 0:01:35  lr: 0.002814  min_lr: 0.002814  loss: 2.8122 (3.1702)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7859 (inf)  time: 0.2095  data: 0.0008  max mem: 9147
Epoch: [122]  [1000/1251]  eta: 0:00:53  lr: 0.002811  min_lr: 0.002811  loss: 3.2730 (3.1692)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8686 (inf)  time: 0.2249  data: 0.0007  max mem: 9147
Epoch: [122]  [1200/1251]  eta: 0:00:10  lr: 0.002808  min_lr: 0.002808  loss: 2.6049 (3.1589)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8225 (inf)  time: 0.2201  data: 0.0197  max mem: 9147
Epoch: [122]  [1250/1251]  eta: 0:00:00  lr: 0.002807  min_lr: 0.002807  loss: 2.9365 (3.1674)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8225 (inf)  time: 0.1389  data: 0.0008  max mem: 9147
Epoch: [122] Total time: 0:04:25 (0.2125 s / it)
Averaged stats: lr: 0.002807  min_lr: 0.002807  loss: 2.9365 (3.1865)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8225 (inf)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.9420 (0.9420)  acc1: 84.0000 (84.0000)  acc5: 96.4000 (96.4000)  time: 5.6688  data: 5.5781  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0492 (1.0646)  acc1: 78.4000 (78.2545)  acc5: 95.2000 (94.5455)  time: 0.6936  data: 0.5977  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3374 (1.2870)  acc1: 70.8000 (73.2191)  acc5: 91.2000 (91.5810)  time: 0.1762  data: 0.0933  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4513 (1.2961)  acc1: 69.6000 (72.8160)  acc5: 89.2000 (91.3920)  time: 0.1961  data: 0.1192  max mem: 9147
Test: Total time: 0:00:10 (0.4025 s / it)
* Acc@1 73.030 Acc@5 91.654 loss 1.283
Accuracy of the model on the 50000 test images: 73.0%
Max accuracy: 73.13%
Epoch: [123]  [   0/1251]  eta: 1:03:48  lr: 0.002807  min_lr: 0.002807  loss: 3.8694 (3.8694)  weight_decay: 0.0500 (0.0500)  time: 3.0604  data: 2.2762  max mem: 9147
Epoch: [123]  [ 200/1251]  eta: 0:03:59  lr: 0.002804  min_lr: 0.002804  loss: 2.7212 (3.1478)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7836 (0.7913)  time: 0.2134  data: 0.0007  max mem: 9147
Epoch: [123]  [ 400/1251]  eta: 0:03:04  lr: 0.002800  min_lr: 0.002800  loss: 2.8245 (3.1696)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7354 (0.7964)  time: 0.1939  data: 0.0006  max mem: 9147
Epoch: [123]  [ 600/1251]  eta: 0:02:20  lr: 0.002797  min_lr: 0.002797  loss: 2.7663 (3.1475)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0349 (0.8345)  time: 0.2176  data: 0.0008  max mem: 9147
Epoch: [123]  [ 800/1251]  eta: 0:01:35  lr: 0.002794  min_lr: 0.002794  loss: 2.6637 (3.1547)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7582 (0.8282)  time: 0.2048  data: 0.0007  max mem: 9147
Epoch: [123]  [1000/1251]  eta: 0:00:53  lr: 0.002790  min_lr: 0.002790  loss: 2.9198 (3.1686)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8122 (0.8157)  time: 0.2105  data: 0.0007  max mem: 9147
Epoch: [123]  [1200/1251]  eta: 0:00:10  lr: 0.002787  min_lr: 0.002787  loss: 2.5644 (3.1717)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7954 (0.8259)  time: 0.1812  data: 0.0005  max mem: 9147
Epoch: [123]  [1250/1251]  eta: 0:00:00  lr: 0.002786  min_lr: 0.002786  loss: 3.1047 (3.1736)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8307 (0.8262)  time: 0.1382  data: 0.0007  max mem: 9147
Epoch: [123] Total time: 0:04:22 (0.2098 s / it)
Averaged stats: lr: 0.002786  min_lr: 0.002786  loss: 3.1047 (3.1925)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8307 (0.8262)
Test:  [ 0/25]  eta: 0:01:20  loss: 0.9980 (0.9980)  acc1: 80.0000 (80.0000)  acc5: 96.8000 (96.8000)  time: 3.2372  data: 3.1322  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 1.1251 (1.1588)  acc1: 78.0000 (77.3455)  acc5: 95.6000 (95.1636)  time: 0.5961  data: 0.5136  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3672 (1.3511)  acc1: 70.0000 (72.6857)  acc5: 91.2000 (92.0571)  time: 0.2900  data: 0.2142  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4430 (1.3624)  acc1: 70.0000 (72.3040)  acc5: 90.4000 (91.7120)  time: 0.2118  data: 0.1407  max mem: 9147
Test: Total time: 0:00:10 (0.4095 s / it)
* Acc@1 73.044 Acc@5 91.756 loss 1.347
Accuracy of the model on the 50000 test images: 73.0%
Max accuracy: 73.13%
Epoch: [124]  [   0/1251]  eta: 1:05:01  lr: 0.002786  min_lr: 0.002786  loss: 2.5286 (2.5286)  weight_decay: 0.0500 (0.0500)  time: 3.1184  data: 2.5191  max mem: 9147
Epoch: [124]  [ 200/1251]  eta: 0:03:58  lr: 0.002783  min_lr: 0.002783  loss: 3.7328 (3.2022)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7281 (0.7572)  time: 0.2241  data: 0.0020  max mem: 9147
Epoch: [124]  [ 400/1251]  eta: 0:03:01  lr: 0.002780  min_lr: 0.002780  loss: 2.7291 (3.1995)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7778 (0.7983)  time: 0.1894  data: 0.0005  max mem: 9147
Epoch: [124]  [ 600/1251]  eta: 0:02:13  lr: 0.002776  min_lr: 0.002776  loss: 3.3759 (3.1622)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7258 (0.8019)  time: 0.1889  data: 0.0006  max mem: 9147
Epoch: [124]  [ 800/1251]  eta: 0:01:32  lr: 0.002773  min_lr: 0.002773  loss: 2.7087 (3.1687)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1005 (0.8364)  time: 0.2130  data: 0.0159  max mem: 9147
Epoch: [124]  [1000/1251]  eta: 0:00:51  lr: 0.002770  min_lr: 0.002770  loss: 2.8016 (3.1843)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7951 (0.8301)  time: 0.2340  data: 0.0021  max mem: 9147
Epoch: [124]  [1200/1251]  eta: 0:00:10  lr: 0.002766  min_lr: 0.002766  loss: 2.8556 (3.1889)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7897 (0.8315)  time: 0.2247  data: 0.0273  max mem: 9147
Epoch: [124]  [1250/1251]  eta: 0:00:00  lr: 0.002766  min_lr: 0.002766  loss: 2.6509 (3.1838)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7094 (0.8275)  time: 0.1393  data: 0.0012  max mem: 9147
Epoch: [124] Total time: 0:04:20 (0.2085 s / it)
Averaged stats: lr: 0.002766  min_lr: 0.002766  loss: 2.6509 (3.1866)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7094 (0.8275)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.9139 (0.9139)  acc1: 85.2000 (85.2000)  acc5: 95.2000 (95.2000)  time: 5.6046  data: 5.5242  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0117 (1.0910)  acc1: 80.0000 (78.3636)  acc5: 95.2000 (94.6546)  time: 0.7721  data: 0.6819  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3362 (1.3134)  acc1: 70.0000 (73.2571)  acc5: 90.0000 (91.8286)  time: 0.2186  data: 0.1378  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4491 (1.3355)  acc1: 69.6000 (72.7680)  acc5: 89.2000 (91.4240)  time: 0.2146  data: 0.1378  max mem: 9147
Test: Total time: 0:00:10 (0.4148 s / it)
* Acc@1 73.002 Acc@5 91.556 loss 1.323
Accuracy of the model on the 50000 test images: 73.0%
Max accuracy: 73.13%
Epoch: [125]  [   0/1251]  eta: 1:04:58  lr: 0.002766  min_lr: 0.002766  loss: 3.1573 (3.1573)  weight_decay: 0.0500 (0.0500)  time: 3.1164  data: 2.5187  max mem: 9147
Epoch: [125]  [ 200/1251]  eta: 0:03:54  lr: 0.002762  min_lr: 0.002762  loss: 2.6346 (3.1791)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6935 (0.8033)  time: 0.2151  data: 0.0007  max mem: 9147
Epoch: [125]  [ 400/1251]  eta: 0:03:05  lr: 0.002759  min_lr: 0.002759  loss: 3.6966 (3.2377)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9358 (0.8572)  time: 0.2143  data: 0.0009  max mem: 9147
Epoch: [125]  [ 600/1251]  eta: 0:02:21  lr: 0.002756  min_lr: 0.002756  loss: 3.0466 (3.2330)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8251 (0.8651)  time: 0.2143  data: 0.0007  max mem: 9147
Epoch: [125]  [ 800/1251]  eta: 0:01:37  lr: 0.002752  min_lr: 0.002752  loss: 2.8570 (3.2201)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7315 (0.8483)  time: 0.2182  data: 0.0006  max mem: 9147
Epoch: [125]  [1000/1251]  eta: 0:00:54  lr: 0.002749  min_lr: 0.002749  loss: 2.9626 (3.2013)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7447 (0.8495)  time: 0.2304  data: 0.0007  max mem: 9147
Epoch: [125]  [1200/1251]  eta: 0:00:10  lr: 0.002746  min_lr: 0.002746  loss: 2.6466 (3.2016)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7138 (0.8449)  time: 0.1868  data: 0.0012  max mem: 9147
Epoch: [125]  [1250/1251]  eta: 0:00:00  lr: 0.002745  min_lr: 0.002745  loss: 2.9748 (3.2053)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8164 (0.8494)  time: 0.1381  data: 0.0007  max mem: 9147
Epoch: [125] Total time: 0:04:26 (0.2130 s / it)
Averaged stats: lr: 0.002745  min_lr: 0.002745  loss: 2.9748 (3.1927)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8164 (0.8494)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.9030 (0.9030)  acc1: 82.4000 (82.4000)  acc5: 98.0000 (98.0000)  time: 5.5663  data: 5.4732  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0646 (1.0854)  acc1: 78.4000 (77.7091)  acc5: 96.4000 (95.3455)  time: 0.7016  data: 0.6081  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3667 (1.3105)  acc1: 68.8000 (72.7429)  acc5: 90.8000 (91.9429)  time: 0.1879  data: 0.1065  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4219 (1.3151)  acc1: 68.4000 (72.5440)  acc5: 90.0000 (91.8720)  time: 0.2025  data: 0.1272  max mem: 9147
Test: Total time: 0:00:10 (0.4043 s / it)
* Acc@1 73.192 Acc@5 91.842 loss 1.302
Accuracy of the model on the 50000 test images: 73.2%
Max accuracy: 73.19%
Epoch: [126]  [   0/1251]  eta: 1:00:14  lr: 0.002745  min_lr: 0.002745  loss: 2.3715 (2.3715)  weight_decay: 0.0500 (0.0500)  time: 2.8897  data: 2.4231  max mem: 9147
Epoch: [126]  [ 200/1251]  eta: 0:03:32  lr: 0.002742  min_lr: 0.002742  loss: 2.6945 (3.1298)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7614 (0.8013)  time: 0.1919  data: 0.0012  max mem: 9147
Epoch: [126]  [ 400/1251]  eta: 0:02:45  lr: 0.002738  min_lr: 0.002738  loss: 2.5816 (3.1408)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7275 (0.8055)  time: 0.1859  data: 0.0010  max mem: 9147
Epoch: [126]  [ 600/1251]  eta: 0:02:03  lr: 0.002735  min_lr: 0.002735  loss: 3.1090 (3.1402)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9805 (0.8300)  time: 0.1717  data: 0.0005  max mem: 9147
Epoch: [126]  [ 800/1251]  eta: 0:01:25  lr: 0.002732  min_lr: 0.002732  loss: 2.6511 (3.1761)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7004 (0.8237)  time: 0.2080  data: 0.0010  max mem: 9147
Epoch: [126]  [1000/1251]  eta: 0:00:48  lr: 0.002728  min_lr: 0.002728  loss: 2.6366 (3.1767)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7371 (0.8281)  time: 0.2246  data: 0.0262  max mem: 9147
Epoch: [126]  [1200/1251]  eta: 0:00:10  lr: 0.002725  min_lr: 0.002725  loss: 2.5298 (3.1704)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7215 (0.8170)  time: 0.2004  data: 0.0008  max mem: 9147
Epoch: [126]  [1250/1251]  eta: 0:00:00  lr: 0.002724  min_lr: 0.002724  loss: 2.6166 (3.1720)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7098 (0.8158)  time: 0.1432  data: 0.0010  max mem: 9147
Epoch: [126] Total time: 0:04:06 (0.1968 s / it)
Averaged stats: lr: 0.002724  min_lr: 0.002724  loss: 2.6166 (3.1718)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7098 (0.8158)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.8393 (0.8393)  acc1: 82.4000 (82.4000)  acc5: 96.4000 (96.4000)  time: 5.6568  data: 5.5765  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0654 (1.0799)  acc1: 78.4000 (78.0000)  acc5: 95.6000 (94.9455)  time: 0.7142  data: 0.6396  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3515 (1.2874)  acc1: 70.4000 (73.1238)  acc5: 90.8000 (92.0571)  time: 0.1950  data: 0.1221  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4693 (1.3025)  acc1: 70.0000 (72.8480)  acc5: 89.6000 (91.7760)  time: 0.2077  data: 0.1362  max mem: 9147
Test: Total time: 0:00:10 (0.4082 s / it)
* Acc@1 73.378 Acc@5 91.748 loss 1.306
Accuracy of the model on the 50000 test images: 73.4%
Max accuracy: 73.38%
Epoch: [127]  [   0/1251]  eta: 0:59:49  lr: 0.002724  min_lr: 0.002724  loss: 2.4933 (2.4933)  weight_decay: 0.0500 (0.0500)  time: 2.8693  data: 2.6718  max mem: 9147
Epoch: [127]  [ 200/1251]  eta: 0:03:53  lr: 0.002721  min_lr: 0.002721  loss: 2.6489 (3.1332)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8330 (0.9199)  time: 0.2045  data: 0.0008  max mem: 9147
Epoch: [127]  [ 400/1251]  eta: 0:03:03  lr: 0.002717  min_lr: 0.002717  loss: 2.8392 (3.0989)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7874 (0.8697)  time: 0.2250  data: 0.0006  max mem: 9147
Epoch: [127]  [ 600/1251]  eta: 0:02:20  lr: 0.002714  min_lr: 0.002714  loss: 3.0139 (3.1159)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8835 (0.8817)  time: 0.2152  data: 0.0007  max mem: 9147
Epoch: [127]  [ 800/1251]  eta: 0:01:36  lr: 0.002711  min_lr: 0.002711  loss: 2.5894 (3.1212)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7840 (0.8765)  time: 0.2101  data: 0.0006  max mem: 9147
Epoch: [127]  [1000/1251]  eta: 0:00:53  lr: 0.002707  min_lr: 0.002707  loss: 2.8969 (3.1212)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7450 (0.8506)  time: 0.1896  data: 0.0005  max mem: 9147
Epoch: [127]  [1200/1251]  eta: 0:00:10  lr: 0.002704  min_lr: 0.002704  loss: 2.6003 (3.1330)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6826 (0.8376)  time: 0.2129  data: 0.0024  max mem: 9147
Epoch: [127]  [1250/1251]  eta: 0:00:00  lr: 0.002703  min_lr: 0.002703  loss: 2.7774 (3.1354)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7582 (0.8360)  time: 0.1590  data: 0.0019  max mem: 9147
Epoch: [127] Total time: 0:04:22 (0.2095 s / it)
Averaged stats: lr: 0.002703  min_lr: 0.002703  loss: 2.7774 (3.1755)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7582 (0.8360)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.9302 (0.9302)  acc1: 83.2000 (83.2000)  acc5: 96.0000 (96.0000)  time: 5.4717  data: 5.3908  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0144 (1.0615)  acc1: 78.0000 (77.8909)  acc5: 95.6000 (94.9455)  time: 0.7513  data: 0.6548  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3230 (1.2974)  acc1: 70.8000 (73.1048)  acc5: 91.6000 (91.6191)  time: 0.2070  data: 0.1229  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4604 (1.3109)  acc1: 70.4000 (72.8640)  acc5: 89.6000 (91.5040)  time: 0.2031  data: 0.1248  max mem: 9147
Test: Total time: 0:00:10 (0.4022 s / it)
* Acc@1 73.376 Acc@5 91.762 loss 1.288
Accuracy of the model on the 50000 test images: 73.4%
Max accuracy: 73.38%
Epoch: [128]  [   0/1251]  eta: 1:03:07  lr: 0.002703  min_lr: 0.002703  loss: 2.3056 (2.3056)  weight_decay: 0.0500 (0.0500)  time: 3.0277  data: 2.3921  max mem: 9147
Epoch: [128]  [ 200/1251]  eta: 0:03:56  lr: 0.002700  min_lr: 0.002700  loss: 2.9987 (3.1463)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8146 (0.8785)  time: 0.2100  data: 0.0007  max mem: 9147
Epoch: [128]  [ 400/1251]  eta: 0:03:08  lr: 0.002696  min_lr: 0.002696  loss: 3.4573 (3.1826)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9170 (0.8648)  time: 0.2052  data: 0.0008  max mem: 9147
Epoch: [128]  [ 600/1251]  eta: 0:02:23  lr: 0.002693  min_lr: 0.002693  loss: 2.6179 (3.1523)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7528 (0.8425)  time: 0.2250  data: 0.0006  max mem: 9147
Epoch: [128]  [ 800/1251]  eta: 0:01:38  lr: 0.002690  min_lr: 0.002690  loss: 4.0981 (3.1629)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6826 (0.8257)  time: 0.2000  data: 0.0007  max mem: 9147
Epoch: [128]  [1000/1251]  eta: 0:00:54  lr: 0.002686  min_lr: 0.002686  loss: 3.0823 (3.1696)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7870 (0.8252)  time: 0.2150  data: 0.0123  max mem: 9147
Epoch: [128]  [1200/1251]  eta: 0:00:11  lr: 0.002683  min_lr: 0.002683  loss: 3.4390 (3.1663)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8201 (0.8407)  time: 0.2177  data: 0.0078  max mem: 9147
Epoch: [128]  [1250/1251]  eta: 0:00:00  lr: 0.002682  min_lr: 0.002682  loss: 2.7304 (3.1681)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8201 (0.8401)  time: 0.1409  data: 0.0009  max mem: 9147
Epoch: [128] Total time: 0:04:31 (0.2172 s / it)
Averaged stats: lr: 0.002682  min_lr: 0.002682  loss: 2.7304 (3.1733)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8201 (0.8401)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.8304 (0.8304)  acc1: 83.2000 (83.2000)  acc5: 96.4000 (96.4000)  time: 5.6978  data: 5.6174  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9843 (1.0159)  acc1: 78.8000 (78.3636)  acc5: 96.0000 (95.2364)  time: 0.7791  data: 0.7041  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2970 (1.2402)  acc1: 70.4000 (73.4286)  acc5: 91.6000 (92.0191)  time: 0.2060  data: 0.1340  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3679 (1.2566)  acc1: 70.4000 (72.8960)  acc5: 90.0000 (91.8720)  time: 0.2052  data: 0.1339  max mem: 9147
Test: Total time: 0:00:10 (0.4072 s / it)
* Acc@1 73.666 Acc@5 91.892 loss 1.236
Accuracy of the model on the 50000 test images: 73.7%
Max accuracy: 73.67%
Epoch: [129]  [   0/1251]  eta: 0:58:59  lr: 0.002682  min_lr: 0.002682  loss: 3.7984 (3.7984)  weight_decay: 0.0500 (0.0500)  time: 2.8293  data: 2.6132  max mem: 9147
Epoch: [129]  [ 200/1251]  eta: 0:03:53  lr: 0.002679  min_lr: 0.002679  loss: 2.4735 (3.0945)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8106 (0.7916)  time: 0.2054  data: 0.0046  max mem: 9147
Epoch: [129]  [ 400/1251]  eta: 0:03:07  lr: 0.002675  min_lr: 0.002675  loss: 2.7580 (3.1409)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7847 (0.7991)  time: 0.2099  data: 0.0092  max mem: 9147
Epoch: [129]  [ 600/1251]  eta: 0:02:21  lr: 0.002672  min_lr: 0.002672  loss: 2.6449 (3.1551)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7272 (0.7969)  time: 0.2191  data: 0.0243  max mem: 9147
Epoch: [129]  [ 800/1251]  eta: 0:01:35  lr: 0.002668  min_lr: 0.002668  loss: 3.7209 (3.1639)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9124 (0.8308)  time: 0.1903  data: 0.0005  max mem: 9147
Epoch: [129]  [1000/1251]  eta: 0:00:52  lr: 0.002665  min_lr: 0.002665  loss: 3.6061 (3.1642)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7524 (0.8246)  time: 0.2013  data: 0.0007  max mem: 9147
Epoch: [129]  [1200/1251]  eta: 0:00:10  lr: 0.002662  min_lr: 0.002662  loss: 2.8499 (3.1655)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7732 (0.8240)  time: 0.2290  data: 0.0006  max mem: 9147
Epoch: [129]  [1250/1251]  eta: 0:00:00  lr: 0.002661  min_lr: 0.002661  loss: 4.0871 (3.1785)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7239 (0.8211)  time: 0.1447  data: 0.0012  max mem: 9147
Epoch: [129] Total time: 0:04:22 (0.2097 s / it)
Averaged stats: lr: 0.002661  min_lr: 0.002661  loss: 4.0871 (3.1726)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7239 (0.8211)
Test:  [ 0/25]  eta: 0:02:09  loss: 0.9791 (0.9791)  acc1: 84.8000 (84.8000)  acc5: 96.8000 (96.8000)  time: 5.1619  data: 5.0815  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1442 (1.1560)  acc1: 76.0000 (77.0909)  acc5: 95.2000 (94.7636)  time: 0.7456  data: 0.6658  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3946 (1.3629)  acc1: 71.2000 (72.9714)  acc5: 91.6000 (91.9238)  time: 0.2258  data: 0.1503  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4911 (1.3748)  acc1: 70.8000 (72.8000)  acc5: 90.0000 (91.6640)  time: 0.2362  data: 0.1639  max mem: 9147
Test: Total time: 0:00:10 (0.4135 s / it)
* Acc@1 73.012 Acc@5 91.690 loss 1.372
Accuracy of the model on the 50000 test images: 73.0%
Max accuracy: 73.67%
Epoch: [130]  [   0/1251]  eta: 1:07:25  lr: 0.002661  min_lr: 0.002661  loss: 2.4938 (2.4938)  weight_decay: 0.0500 (0.0500)  time: 3.2341  data: 2.4018  max mem: 9147
Epoch: [130]  [ 200/1251]  eta: 0:03:51  lr: 0.002657  min_lr: 0.002657  loss: 2.8454 (3.0860)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7800 (0.8069)  time: 0.2137  data: 0.0007  max mem: 9147
Epoch: [130]  [ 400/1251]  eta: 0:03:03  lr: 0.002654  min_lr: 0.002654  loss: 3.0133 (3.1251)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7463 (0.7967)  time: 0.1992  data: 0.0006  max mem: 9147
Epoch: [130]  [ 600/1251]  eta: 0:02:18  lr: 0.002651  min_lr: 0.002651  loss: 4.2011 (3.1885)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6660 (0.7958)  time: 0.1961  data: 0.0005  max mem: 9147
Epoch: [130]  [ 800/1251]  eta: 0:01:34  lr: 0.002647  min_lr: 0.002647  loss: 2.5514 (3.1839)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8226 (nan)  time: 0.1940  data: 0.0009  max mem: 9147
Epoch: [130]  [1000/1251]  eta: 0:00:51  lr: 0.002644  min_lr: 0.002644  loss: 2.7474 (3.1828)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9706 (nan)  time: 0.1987  data: 0.0005  max mem: 9147
Epoch: [130]  [1200/1251]  eta: 0:00:10  lr: 0.002640  min_lr: 0.002640  loss: 3.8526 (3.1926)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7918 (nan)  time: 0.2064  data: 0.0009  max mem: 9147
Epoch: [130]  [1250/1251]  eta: 0:00:00  lr: 0.002640  min_lr: 0.002640  loss: 3.5686 (3.1918)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7491 (nan)  time: 0.1494  data: 0.0012  max mem: 9147
Epoch: [130] Total time: 0:04:18 (0.2067 s / it)
Averaged stats: lr: 0.002640  min_lr: 0.002640  loss: 3.5686 (3.1821)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7491 (nan)
Test:  [ 0/25]  eta: 0:02:15  loss: 0.9302 (0.9302)  acc1: 82.0000 (82.0000)  acc5: 95.6000 (95.6000)  time: 5.4383  data: 5.3565  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0137 (1.0533)  acc1: 76.8000 (77.7818)  acc5: 95.6000 (95.2727)  time: 0.7595  data: 0.6672  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3086 (1.2574)  acc1: 72.8000 (73.6191)  acc5: 91.6000 (92.2476)  time: 0.2258  data: 0.1430  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3534 (1.2670)  acc1: 70.8000 (73.3600)  acc5: 90.4000 (92.1600)  time: 0.2217  data: 0.1429  max mem: 9147
Test: Total time: 0:00:10 (0.4122 s / it)
* Acc@1 73.462 Acc@5 91.868 loss 1.264
Accuracy of the model on the 50000 test images: 73.5%
Max accuracy: 73.67%
Epoch: [131]  [   0/1251]  eta: 0:58:39  lr: 0.002640  min_lr: 0.002640  loss: 2.4415 (2.4415)  weight_decay: 0.0500 (0.0500)  time: 2.8131  data: 2.3103  max mem: 9147
Epoch: [131]  [ 200/1251]  eta: 0:03:57  lr: 0.002636  min_lr: 0.002636  loss: 2.9124 (3.1315)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7520 (0.8159)  time: 0.2237  data: 0.0007  max mem: 9147
Epoch: [131]  [ 400/1251]  eta: 0:03:09  lr: 0.002633  min_lr: 0.002633  loss: 2.8479 (3.1572)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8923 (0.8065)  time: 0.2063  data: 0.0007  max mem: 9147
Epoch: [131]  [ 600/1251]  eta: 0:02:22  lr: 0.002629  min_lr: 0.002629  loss: 2.5227 (3.1574)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8952 (0.8500)  time: 0.2245  data: 0.0058  max mem: 9147
Epoch: [131]  [ 800/1251]  eta: 0:01:37  lr: 0.002626  min_lr: 0.002626  loss: 2.7481 (3.1844)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7147 (0.8430)  time: 0.2003  data: 0.0006  max mem: 9147
Epoch: [131]  [1000/1251]  eta: 0:00:54  lr: 0.002623  min_lr: 0.002623  loss: 3.1345 (3.1943)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7813 (0.8416)  time: 0.2048  data: 0.0007  max mem: 9147
Epoch: [131]  [1200/1251]  eta: 0:00:11  lr: 0.002619  min_lr: 0.002619  loss: 2.5935 (3.1941)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7466 (0.8441)  time: 0.2001  data: 0.0006  max mem: 9147
Epoch: [131]  [1250/1251]  eta: 0:00:00  lr: 0.002618  min_lr: 0.002618  loss: 2.7552 (3.1927)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7847 (0.8434)  time: 0.1423  data: 0.0013  max mem: 9147
Epoch: [131] Total time: 0:04:29 (0.2157 s / it)
Averaged stats: lr: 0.002618  min_lr: 0.002618  loss: 2.7552 (3.1816)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7847 (0.8434)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.8696 (0.8696)  acc1: 84.0000 (84.0000)  acc5: 96.4000 (96.4000)  time: 5.6128  data: 5.5153  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0323 (1.0175)  acc1: 78.0000 (78.7636)  acc5: 95.6000 (95.4546)  time: 0.7347  data: 0.6585  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2764 (1.2296)  acc1: 72.4000 (74.0571)  acc5: 92.0000 (92.2286)  time: 0.2044  data: 0.1316  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3569 (1.2490)  acc1: 70.8000 (73.5360)  acc5: 90.0000 (91.9840)  time: 0.2031  data: 0.1315  max mem: 9147
Test: Total time: 0:00:10 (0.4021 s / it)
* Acc@1 73.546 Acc@5 92.008 loss 1.238
Accuracy of the model on the 50000 test images: 73.5%
Max accuracy: 73.67%
Epoch: [132]  [   0/1251]  eta: 1:08:47  lr: 0.002618  min_lr: 0.002618  loss: 3.7623 (3.7623)  weight_decay: 0.0500 (0.0500)  time: 3.2995  data: 1.8291  max mem: 9147
Epoch: [132]  [ 200/1251]  eta: 0:04:01  lr: 0.002615  min_lr: 0.002615  loss: 3.0057 (3.2305)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8723 (0.8724)  time: 0.2267  data: 0.0006  max mem: 9147
Epoch: [132]  [ 400/1251]  eta: 0:03:10  lr: 0.002612  min_lr: 0.002612  loss: 2.8147 (3.1239)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8531 (0.8476)  time: 0.2191  data: 0.0007  max mem: 9147
Epoch: [132]  [ 600/1251]  eta: 0:02:22  lr: 0.002608  min_lr: 0.002608  loss: 3.5640 (3.1472)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8196 (0.8574)  time: 0.2235  data: 0.0006  max mem: 9147
Epoch: [132]  [ 800/1251]  eta: 0:01:37  lr: 0.002605  min_lr: 0.002605  loss: 2.6260 (3.1557)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7627 (0.8556)  time: 0.2080  data: 0.0021  max mem: 9147
Epoch: [132]  [1000/1251]  eta: 0:00:54  lr: 0.002601  min_lr: 0.002601  loss: 2.4725 (3.1555)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7936 (0.8450)  time: 0.1935  data: 0.0005  max mem: 9147
Epoch: [132]  [1200/1251]  eta: 0:00:10  lr: 0.002598  min_lr: 0.002598  loss: 2.5473 (3.1681)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7383 (0.8530)  time: 0.2062  data: 0.0007  max mem: 9147
Epoch: [132]  [1250/1251]  eta: 0:00:00  lr: 0.002597  min_lr: 0.002597  loss: 2.6328 (3.1700)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8187 (0.8532)  time: 0.1558  data: 0.0012  max mem: 9147
Epoch: [132] Total time: 0:04:27 (0.2137 s / it)
Averaged stats: lr: 0.002597  min_lr: 0.002597  loss: 2.6328 (3.1750)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8187 (0.8532)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.8935 (0.8935)  acc1: 81.2000 (81.2000)  acc5: 97.2000 (97.2000)  time: 5.5447  data: 5.4643  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0753 (1.1049)  acc1: 78.8000 (77.6364)  acc5: 96.0000 (95.0545)  time: 0.7651  data: 0.6780  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3612 (1.3233)  acc1: 70.4000 (72.7810)  acc5: 91.2000 (92.0000)  time: 0.2130  data: 0.1336  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4136 (1.3332)  acc1: 70.4000 (72.5760)  acc5: 90.0000 (91.7920)  time: 0.2092  data: 0.1335  max mem: 9147
Test: Total time: 0:00:10 (0.4067 s / it)
* Acc@1 73.130 Acc@5 91.742 loss 1.326
Accuracy of the model on the 50000 test images: 73.1%
Max accuracy: 73.67%
Epoch: [133]  [   0/1251]  eta: 1:04:07  lr: 0.002597  min_lr: 0.002597  loss: 2.7155 (2.7155)  weight_decay: 0.0500 (0.0500)  time: 3.0754  data: 2.8821  max mem: 9147
Epoch: [133]  [ 200/1251]  eta: 0:03:54  lr: 0.002594  min_lr: 0.002594  loss: 2.5140 (3.1535)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7999 (0.8914)  time: 0.1910  data: 0.0005  max mem: 9147
Epoch: [133]  [ 400/1251]  eta: 0:02:55  lr: 0.002590  min_lr: 0.002590  loss: 2.7883 (3.1568)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7360 (0.8339)  time: 0.2000  data: 0.0006  max mem: 9147
Epoch: [133]  [ 600/1251]  eta: 0:02:13  lr: 0.002587  min_lr: 0.002587  loss: 3.0686 (3.1892)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6937 (0.8245)  time: 0.2098  data: 0.0007  max mem: 9147
Epoch: [133]  [ 800/1251]  eta: 0:01:33  lr: 0.002583  min_lr: 0.002583  loss: 2.7036 (3.1556)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7630 (0.8213)  time: 0.2057  data: 0.0006  max mem: 9147
Epoch: [133]  [1000/1251]  eta: 0:00:52  lr: 0.002580  min_lr: 0.002580  loss: 2.7946 (3.1667)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8783 (0.8313)  time: 0.2316  data: 0.0007  max mem: 9147
Epoch: [133]  [1200/1251]  eta: 0:00:10  lr: 0.002576  min_lr: 0.002576  loss: 3.1779 (3.1655)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9136 (0.8414)  time: 0.2052  data: 0.0007  max mem: 9147
Epoch: [133]  [1250/1251]  eta: 0:00:00  lr: 0.002576  min_lr: 0.002576  loss: 2.6126 (3.1627)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8671 (0.8424)  time: 0.1449  data: 0.0033  max mem: 9147
Epoch: [133] Total time: 0:04:20 (0.2083 s / it)
Averaged stats: lr: 0.002576  min_lr: 0.002576  loss: 2.6126 (3.1661)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8671 (0.8424)
Test:  [ 0/25]  eta: 0:02:11  loss: 0.7749 (0.7749)  acc1: 84.0000 (84.0000)  acc5: 97.6000 (97.6000)  time: 5.2540  data: 5.1725  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9795 (0.9935)  acc1: 79.2000 (78.4727)  acc5: 95.6000 (95.2364)  time: 0.7300  data: 0.6346  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2211 (1.2172)  acc1: 71.6000 (73.8286)  acc5: 92.8000 (92.3048)  time: 0.2113  data: 0.1259  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3818 (1.2250)  acc1: 71.2000 (73.5840)  acc5: 90.4000 (92.1120)  time: 0.2054  data: 0.1258  max mem: 9147
Test: Total time: 0:00:09 (0.3944 s / it)
* Acc@1 73.682 Acc@5 92.008 loss 1.221
Accuracy of the model on the 50000 test images: 73.7%
Max accuracy: 73.68%
Epoch: [134]  [   0/1251]  eta: 0:55:12  lr: 0.002576  min_lr: 0.002576  loss: 2.4611 (2.4611)  weight_decay: 0.0500 (0.0500)  time: 2.6477  data: 2.4414  max mem: 9147
Epoch: [134]  [ 200/1251]  eta: 0:03:53  lr: 0.002572  min_lr: 0.002572  loss: 3.1032 (3.1176)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9413 (0.9032)  time: 0.2016  data: 0.0006  max mem: 9147
Epoch: [134]  [ 400/1251]  eta: 0:03:07  lr: 0.002569  min_lr: 0.002569  loss: 2.6789 (3.1222)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7846 (0.8663)  time: 0.2383  data: 0.0370  max mem: 9147
Epoch: [134]  [ 600/1251]  eta: 0:02:18  lr: 0.002565  min_lr: 0.002565  loss: 3.3288 (3.1674)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7439 (0.8375)  time: 0.2011  data: 0.0007  max mem: 9147
Epoch: [134]  [ 800/1251]  eta: 0:01:36  lr: 0.002562  min_lr: 0.002562  loss: 2.6796 (3.1664)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7527 (0.8297)  time: 0.2339  data: 0.0006  max mem: 9147
Epoch: [134]  [1000/1251]  eta: 0:00:52  lr: 0.002558  min_lr: 0.002558  loss: 3.1019 (3.1640)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7353 (0.8142)  time: 0.1998  data: 0.0013  max mem: 9147
Epoch: [134]  [1200/1251]  eta: 0:00:10  lr: 0.002555  min_lr: 0.002555  loss: 2.8943 (3.1511)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7560 (0.8105)  time: 0.1942  data: 0.0012  max mem: 9147
Epoch: [134]  [1250/1251]  eta: 0:00:00  lr: 0.002554  min_lr: 0.002554  loss: 3.6921 (3.1624)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8533 (0.8222)  time: 0.1425  data: 0.0012  max mem: 9147
Epoch: [134] Total time: 0:04:18 (0.2068 s / it)
Averaged stats: lr: 0.002554  min_lr: 0.002554  loss: 3.6921 (3.1650)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8533 (0.8222)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.9042 (0.9042)  acc1: 82.8000 (82.8000)  acc5: 96.8000 (96.8000)  time: 5.6751  data: 5.5945  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0308 (1.0703)  acc1: 80.4000 (78.3273)  acc5: 95.6000 (94.6546)  time: 0.7698  data: 0.6881  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3645 (1.2961)  acc1: 70.8000 (73.3905)  acc5: 90.0000 (91.7714)  time: 0.2175  data: 0.1409  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4588 (1.3156)  acc1: 69.2000 (72.9920)  acc5: 89.6000 (91.5840)  time: 0.2125  data: 0.1408  max mem: 9147
Test: Total time: 0:00:10 (0.4160 s / it)
* Acc@1 73.042 Acc@5 91.760 loss 1.305
Accuracy of the model on the 50000 test images: 73.0%
Max accuracy: 73.68%
Epoch: [135]  [   0/1251]  eta: 1:02:28  lr: 0.002554  min_lr: 0.002554  loss: 3.9721 (3.9721)  weight_decay: 0.0500 (0.0500)  time: 2.9968  data: 2.2905  max mem: 9147
Epoch: [135]  [ 200/1251]  eta: 0:03:56  lr: 0.002551  min_lr: 0.002551  loss: 2.4465 (3.1674)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7327 (0.8356)  time: 0.2107  data: 0.0006  max mem: 9147
Epoch: [135]  [ 400/1251]  eta: 0:03:05  lr: 0.002547  min_lr: 0.002547  loss: 2.9586 (3.1903)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7858 (0.8396)  time: 0.2104  data: 0.0007  max mem: 9147
Epoch: [135]  [ 600/1251]  eta: 0:02:22  lr: 0.002544  min_lr: 0.002544  loss: 3.2060 (3.2002)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8402 (0.8407)  time: 0.2374  data: 0.0006  max mem: 9147
Epoch: [135]  [ 800/1251]  eta: 0:01:37  lr: 0.002540  min_lr: 0.002540  loss: 2.5874 (3.1803)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7847 (0.8296)  time: 0.2004  data: 0.0006  max mem: 9147
Epoch: [135]  [1000/1251]  eta: 0:00:54  lr: 0.002537  min_lr: 0.002537  loss: 2.8704 (3.1859)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7352 (0.8351)  time: 0.2097  data: 0.0007  max mem: 9147
Epoch: [135]  [1200/1251]  eta: 0:00:10  lr: 0.002533  min_lr: 0.002533  loss: 2.7845 (3.1784)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7887 (0.8432)  time: 0.1950  data: 0.0006  max mem: 9147
Epoch: [135]  [1250/1251]  eta: 0:00:00  lr: 0.002533  min_lr: 0.002533  loss: 2.8396 (3.1802)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.1389  data: 0.0018  max mem: 9147
Epoch: [135] Total time: 0:04:25 (0.2122 s / it)
Averaged stats: lr: 0.002533  min_lr: 0.002533  loss: 2.8396 (3.1664)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)
Test:  [ 0/25]  eta: 0:02:15  loss: 0.8502 (0.8502)  acc1: 86.4000 (86.4000)  acc5: 98.0000 (98.0000)  time: 5.4219  data: 5.3318  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0355 (1.0389)  acc1: 78.8000 (78.8000)  acc5: 95.6000 (94.9455)  time: 0.7118  data: 0.6385  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2905 (1.2346)  acc1: 71.6000 (74.3238)  acc5: 90.4000 (91.8667)  time: 0.2216  data: 0.1506  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3523 (1.2479)  acc1: 70.4000 (73.9360)  acc5: 89.6000 (91.6000)  time: 0.2205  data: 0.1505  max mem: 9147
Test: Total time: 0:00:10 (0.4082 s / it)
* Acc@1 73.700 Acc@5 91.898 loss 1.249
Accuracy of the model on the 50000 test images: 73.7%
Max accuracy: 73.70%
Epoch: [136]  [   0/1251]  eta: 1:01:51  lr: 0.002532  min_lr: 0.002532  loss: 2.5093 (2.5093)  weight_decay: 0.0500 (0.0500)  time: 2.9668  data: 2.7565  max mem: 9147
Epoch: [136]  [ 200/1251]  eta: 0:03:31  lr: 0.002529  min_lr: 0.002529  loss: 2.6529 (3.0905)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8463 (0.8593)  time: 0.1872  data: 0.0005  max mem: 9147
Epoch: [136]  [ 400/1251]  eta: 0:02:50  lr: 0.002526  min_lr: 0.002526  loss: 2.8257 (3.1539)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7542 (0.8617)  time: 0.2103  data: 0.0006  max mem: 9147
Epoch: [136]  [ 600/1251]  eta: 0:02:13  lr: 0.002522  min_lr: 0.002522  loss: 3.9017 (3.1565)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7331 (0.8386)  time: 0.2155  data: 0.0006  max mem: 9147
Epoch: [136]  [ 800/1251]  eta: 0:01:34  lr: 0.002519  min_lr: 0.002519  loss: 3.1415 (3.1612)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8405 (0.8367)  time: 0.2294  data: 0.0005  max mem: 9147
Epoch: [136]  [1000/1251]  eta: 0:00:51  lr: 0.002515  min_lr: 0.002515  loss: 2.5449 (3.1592)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7803 (0.8257)  time: 0.2056  data: 0.0006  max mem: 9147
Epoch: [136]  [1200/1251]  eta: 0:00:10  lr: 0.002512  min_lr: 0.002512  loss: 2.4943 (3.1555)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9837 (0.8508)  time: 0.2001  data: 0.0008  max mem: 9147
Epoch: [136]  [1250/1251]  eta: 0:00:00  lr: 0.002511  min_lr: 0.002511  loss: 2.8104 (3.1557)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8676 (0.8491)  time: 0.1578  data: 0.0009  max mem: 9147
Epoch: [136] Total time: 0:04:18 (0.2068 s / it)
Averaged stats: lr: 0.002511  min_lr: 0.002511  loss: 2.8104 (3.1624)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8676 (0.8491)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.8067 (0.8067)  acc1: 82.4000 (82.4000)  acc5: 96.8000 (96.8000)  time: 5.5713  data: 5.4894  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9574 (0.9941)  acc1: 78.4000 (78.8727)  acc5: 95.2000 (94.9091)  time: 0.7203  data: 0.6279  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2817 (1.2180)  acc1: 71.6000 (73.9810)  acc5: 90.8000 (92.1524)  time: 0.1908  data: 0.1084  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3671 (1.2343)  acc1: 70.4000 (73.6800)  acc5: 90.8000 (92.0480)  time: 0.1938  data: 0.1174  max mem: 9147
Test: Total time: 0:00:09 (0.3971 s / it)
* Acc@1 73.686 Acc@5 91.978 loss 1.223
Accuracy of the model on the 50000 test images: 73.7%
Max accuracy: 73.70%
Epoch: [137]  [   0/1251]  eta: 1:04:40  lr: 0.002511  min_lr: 0.002511  loss: 2.4335 (2.4335)  weight_decay: 0.0500 (0.0500)  time: 3.1023  data: 1.5739  max mem: 9147
Epoch: [137]  [ 200/1251]  eta: 0:03:32  lr: 0.002507  min_lr: 0.002507  loss: 2.5600 (3.2442)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7937 (0.8123)  time: 0.1900  data: 0.0018  max mem: 9147
Epoch: [137]  [ 400/1251]  eta: 0:02:45  lr: 0.002504  min_lr: 0.002504  loss: 3.7030 (3.2174)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7790 (0.8253)  time: 0.1898  data: 0.0005  max mem: 9147
Epoch: [137]  [ 600/1251]  eta: 0:02:06  lr: 0.002500  min_lr: 0.002500  loss: 2.8226 (3.2147)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7511 (0.8379)  time: 0.2000  data: 0.0006  max mem: 9147
Epoch: [137]  [ 800/1251]  eta: 0:01:29  lr: 0.002497  min_lr: 0.002497  loss: 2.7491 (3.1941)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7584 (0.8324)  time: 0.2184  data: 0.0109  max mem: 9147
Epoch: [137]  [1000/1251]  eta: 0:00:50  lr: 0.002493  min_lr: 0.002493  loss: 2.6692 (3.1951)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7939 (0.8430)  time: 0.2005  data: 0.0007  max mem: 9147
Epoch: [137]  [1200/1251]  eta: 0:00:10  lr: 0.002490  min_lr: 0.002490  loss: 2.6103 (3.1837)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7898 (0.8375)  time: 0.2158  data: 0.0007  max mem: 9147
Epoch: [137]  [1250/1251]  eta: 0:00:00  lr: 0.002489  min_lr: 0.002489  loss: 2.7104 (3.1828)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7771 (0.8374)  time: 0.1451  data: 0.0019  max mem: 9147
Epoch: [137] Total time: 0:04:13 (0.2026 s / it)
Averaged stats: lr: 0.002489  min_lr: 0.002489  loss: 2.7104 (3.1623)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7771 (0.8374)
Test:  [ 0/25]  eta: 0:02:26  loss: 0.8694 (0.8694)  acc1: 84.0000 (84.0000)  acc5: 97.6000 (97.6000)  time: 5.8721  data: 5.7917  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.1288 (1.1033)  acc1: 78.8000 (77.4182)  acc5: 95.2000 (94.6546)  time: 0.7505  data: 0.6686  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.4101 (1.3277)  acc1: 70.0000 (73.2571)  acc5: 90.8000 (91.7714)  time: 0.1906  data: 0.1150  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.5256 (1.3405)  acc1: 69.6000 (72.7200)  acc5: 90.4000 (91.7760)  time: 0.1855  data: 0.1141  max mem: 9147
Test: Total time: 0:00:10 (0.4020 s / it)
* Acc@1 73.154 Acc@5 91.876 loss 1.327
Accuracy of the model on the 50000 test images: 73.2%
Max accuracy: 73.70%
Epoch: [138]  [   0/1251]  eta: 1:06:15  lr: 0.002489  min_lr: 0.002489  loss: 4.3196 (4.3196)  weight_decay: 0.0500 (0.0500)  time: 3.1778  data: 2.5930  max mem: 9147
Epoch: [138]  [ 200/1251]  eta: 0:03:52  lr: 0.002486  min_lr: 0.002486  loss: 2.5678 (3.0841)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8815 (0.9034)  time: 0.2135  data: 0.0007  max mem: 9147
Epoch: [138]  [ 400/1251]  eta: 0:03:06  lr: 0.002482  min_lr: 0.002482  loss: 2.7116 (3.1417)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8330 (0.8604)  time: 0.2257  data: 0.0007  max mem: 9147
Epoch: [138]  [ 600/1251]  eta: 0:02:22  lr: 0.002479  min_lr: 0.002479  loss: 2.7540 (3.1484)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8092 (0.8715)  time: 0.2194  data: 0.0169  max mem: 9147
Epoch: [138]  [ 800/1251]  eta: 0:01:37  lr: 0.002475  min_lr: 0.002475  loss: 2.6039 (3.1371)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7366 (0.8539)  time: 0.1884  data: 0.0011  max mem: 9147
Epoch: [138]  [1000/1251]  eta: 0:00:53  lr: 0.002472  min_lr: 0.002472  loss: 3.3389 (3.1502)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7308 (0.8497)  time: 0.1960  data: 0.0006  max mem: 9147
Epoch: [138]  [1200/1251]  eta: 0:00:10  lr: 0.002468  min_lr: 0.002468  loss: 2.7020 (3.1504)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7497 (0.8369)  time: 0.2187  data: 0.0007  max mem: 9147
Epoch: [138]  [1250/1251]  eta: 0:00:00  lr: 0.002467  min_lr: 0.002467  loss: 3.0295 (3.1534)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8324 (0.8428)  time: 0.1422  data: 0.0013  max mem: 9147
Epoch: [138] Total time: 0:04:24 (0.2112 s / it)
Averaged stats: lr: 0.002467  min_lr: 0.002467  loss: 3.0295 (3.1485)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8324 (0.8428)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.8293 (0.8293)  acc1: 84.4000 (84.4000)  acc5: 97.2000 (97.2000)  time: 5.6423  data: 5.5620  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0613 (1.0541)  acc1: 77.6000 (78.0364)  acc5: 95.2000 (94.8364)  time: 0.7705  data: 0.6866  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3067 (1.2557)  acc1: 71.2000 (73.1048)  acc5: 90.8000 (92.2095)  time: 0.2229  data: 0.1452  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3430 (1.2696)  acc1: 69.6000 (72.7520)  acc5: 90.4000 (92.0480)  time: 0.2214  data: 0.1452  max mem: 9147
Test: Total time: 0:00:10 (0.4182 s / it)
* Acc@1 73.722 Acc@5 92.028 loss 1.260
Accuracy of the model on the 50000 test images: 73.7%
Max accuracy: 73.72%
Epoch: [139]  [   0/1251]  eta: 0:57:09  lr: 0.002467  min_lr: 0.002467  loss: 2.6450 (2.6450)  weight_decay: 0.0500 (0.0500)  time: 2.7410  data: 2.5514  max mem: 9147
Epoch: [139]  [ 200/1251]  eta: 0:03:48  lr: 0.002464  min_lr: 0.002464  loss: 2.7186 (3.2073)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7747 (0.9296)  time: 0.1902  data: 0.0005  max mem: 9147
Epoch: [139]  [ 400/1251]  eta: 0:02:54  lr: 0.002460  min_lr: 0.002460  loss: 2.6091 (3.1273)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7917 (0.8693)  time: 0.1932  data: 0.0012  max mem: 9147
Epoch: [139]  [ 600/1251]  eta: 0:02:14  lr: 0.002457  min_lr: 0.002457  loss: 2.5813 (3.1101)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8190 (0.8607)  time: 0.2192  data: 0.0098  max mem: 9147
Epoch: [139]  [ 800/1251]  eta: 0:01:33  lr: 0.002453  min_lr: 0.002453  loss: 2.6669 (3.1330)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8631 (0.8577)  time: 0.2098  data: 0.0006  max mem: 9147
Epoch: [139]  [1000/1251]  eta: 0:00:52  lr: 0.002450  min_lr: 0.002450  loss: 2.7202 (3.1324)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8026 (0.8624)  time: 0.2197  data: 0.0006  max mem: 9147
Epoch: [139]  [1200/1251]  eta: 0:00:10  lr: 0.002446  min_lr: 0.002446  loss: 2.7154 (3.1480)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9041 (0.8661)  time: 0.2061  data: 0.0006  max mem: 9147
Epoch: [139]  [1250/1251]  eta: 0:00:00  lr: 0.002446  min_lr: 0.002446  loss: 2.6636 (3.1451)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7187 (0.8620)  time: 0.1425  data: 0.0018  max mem: 9147
Epoch: [139] Total time: 0:04:23 (0.2103 s / it)
Averaged stats: lr: 0.002446  min_lr: 0.002446  loss: 2.6636 (3.1601)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7187 (0.8620)
Test:  [ 0/25]  eta: 0:01:18  loss: 0.8373 (0.8373)  acc1: 83.6000 (83.6000)  acc5: 96.4000 (96.4000)  time: 3.1486  data: 3.0681  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 0.9792 (1.0251)  acc1: 79.2000 (79.0909)  acc5: 95.2000 (94.4727)  time: 0.5947  data: 0.5142  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2703 (1.2394)  acc1: 72.0000 (74.0000)  acc5: 90.8000 (91.8667)  time: 0.2804  data: 0.2034  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4261 (1.2537)  acc1: 71.2000 (73.6640)  acc5: 90.0000 (91.6160)  time: 0.1955  data: 0.1226  max mem: 9147
Test: Total time: 0:00:09 (0.3929 s / it)
* Acc@1 73.940 Acc@5 91.986 loss 1.241
Accuracy of the model on the 50000 test images: 73.9%
Max accuracy: 73.94%
Epoch: [140]  [   0/1251]  eta: 0:57:58  lr: 0.002445  min_lr: 0.002445  loss: 4.0945 (4.0945)  weight_decay: 0.0500 (0.0500)  time: 2.7804  data: 2.5680  max mem: 9147
Epoch: [140]  [ 200/1251]  eta: 0:03:45  lr: 0.002442  min_lr: 0.002442  loss: 2.4867 (3.1211)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7718 (0.8544)  time: 0.2010  data: 0.0007  max mem: 9147
Epoch: [140]  [ 400/1251]  eta: 0:03:02  lr: 0.002438  min_lr: 0.002438  loss: 2.9135 (3.1141)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7382 (0.8050)  time: 0.2051  data: 0.0006  max mem: 9147
Epoch: [140]  [ 600/1251]  eta: 0:02:18  lr: 0.002435  min_lr: 0.002435  loss: 2.5301 (3.1269)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7490 (0.8241)  time: 0.2226  data: 0.0006  max mem: 9147
Epoch: [140]  [ 800/1251]  eta: 0:01:33  lr: 0.002431  min_lr: 0.002431  loss: 2.7554 (3.1397)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8852 (0.8330)  time: 0.1936  data: 0.0004  max mem: 9147
Epoch: [140]  [1000/1251]  eta: 0:00:51  lr: 0.002428  min_lr: 0.002428  loss: 2.6570 (3.1391)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7826 (0.8284)  time: 0.1899  data: 0.0006  max mem: 9147
Epoch: [140]  [1200/1251]  eta: 0:00:10  lr: 0.002424  min_lr: 0.002424  loss: 2.4710 (3.1293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7538 (0.8374)  time: 0.2057  data: 0.0006  max mem: 9147
Epoch: [140]  [1250/1251]  eta: 0:00:00  lr: 0.002424  min_lr: 0.002424  loss: 2.5782 (3.1261)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8385 (0.8395)  time: 0.1490  data: 0.0014  max mem: 9147
Epoch: [140] Total time: 0:04:16 (0.2051 s / it)
Averaged stats: lr: 0.002424  min_lr: 0.002424  loss: 2.5782 (3.1404)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8385 (0.8395)
Test:  [ 0/25]  eta: 0:02:14  loss: 0.8152 (0.8152)  acc1: 82.0000 (82.0000)  acc5: 96.8000 (96.8000)  time: 5.3842  data: 5.3037  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9713 (1.0018)  acc1: 78.0000 (78.6909)  acc5: 95.6000 (95.0182)  time: 0.7369  data: 0.6575  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2329 (1.1878)  acc1: 74.0000 (74.6667)  acc5: 91.2000 (92.2476)  time: 0.2080  data: 0.1327  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2670 (1.2073)  acc1: 73.2000 (74.0000)  acc5: 90.8000 (92.0480)  time: 0.2086  data: 0.1358  max mem: 9147
Test: Total time: 0:00:09 (0.3991 s / it)
* Acc@1 73.988 Acc@5 92.138 loss 1.202
Accuracy of the model on the 50000 test images: 74.0%
Max accuracy: 73.99%
Epoch: [141]  [   0/1251]  eta: 1:03:42  lr: 0.002424  min_lr: 0.002424  loss: 3.6041 (3.6041)  weight_decay: 0.0500 (0.0500)  time: 3.0557  data: 2.8657  max mem: 9147
Epoch: [141]  [ 200/1251]  eta: 0:03:38  lr: 0.002420  min_lr: 0.002420  loss: 2.5902 (3.0550)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6716 (0.7742)  time: 0.1974  data: 0.0013  max mem: 9147
Epoch: [141]  [ 400/1251]  eta: 0:02:58  lr: 0.002417  min_lr: 0.002417  loss: 3.3494 (3.1207)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7276 (0.7823)  time: 0.2121  data: 0.0006  max mem: 9147
Epoch: [141]  [ 600/1251]  eta: 0:02:17  lr: 0.002413  min_lr: 0.002413  loss: 2.5360 (3.0945)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7287 (0.7983)  time: 0.2255  data: 0.0046  max mem: 9147
Epoch: [141]  [ 800/1251]  eta: 0:01:34  lr: 0.002409  min_lr: 0.002409  loss: 2.5076 (3.1054)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8579 (0.8237)  time: 0.1864  data: 0.0006  max mem: 9147
Epoch: [141]  [1000/1251]  eta: 0:00:51  lr: 0.002406  min_lr: 0.002406  loss: 3.1123 (3.1127)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7561 (0.8304)  time: 0.1856  data: 0.0005  max mem: 9147
Epoch: [141]  [1200/1251]  eta: 0:00:10  lr: 0.002402  min_lr: 0.002402  loss: 3.5775 (3.1207)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8453 (0.8398)  time: 0.2010  data: 0.0007  max mem: 9147
Epoch: [141]  [1250/1251]  eta: 0:00:00  lr: 0.002402  min_lr: 0.002402  loss: 2.5638 (3.1249)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8103 (0.8441)  time: 0.1465  data: 0.0010  max mem: 9147
Epoch: [141] Total time: 0:04:14 (0.2035 s / it)
Averaged stats: lr: 0.002402  min_lr: 0.002402  loss: 2.5638 (3.1384)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8103 (0.8441)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.7667 (0.7667)  acc1: 83.6000 (83.6000)  acc5: 96.8000 (96.8000)  time: 5.4504  data: 5.3578  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9359 (0.9911)  acc1: 79.6000 (79.2364)  acc5: 96.0000 (95.2000)  time: 0.7442  data: 0.6488  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2753 (1.2139)  acc1: 72.4000 (74.6286)  acc5: 90.4000 (92.2667)  time: 0.2054  data: 0.1221  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3810 (1.2304)  acc1: 71.6000 (74.1280)  acc5: 90.0000 (92.0800)  time: 0.2016  data: 0.1243  max mem: 9147
Test: Total time: 0:00:09 (0.3984 s / it)
* Acc@1 74.010 Acc@5 92.016 loss 1.226
Accuracy of the model on the 50000 test images: 74.0%
Max accuracy: 74.01%
Epoch: [142]  [   0/1251]  eta: 1:08:58  lr: 0.002402  min_lr: 0.002402  loss: 4.1644 (4.1644)  weight_decay: 0.0500 (0.0500)  time: 3.3080  data: 3.1369  max mem: 9147
Epoch: [142]  [ 200/1251]  eta: 0:03:41  lr: 0.002398  min_lr: 0.002398  loss: 2.7414 (3.1159)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8168 (0.8499)  time: 0.2003  data: 0.0007  max mem: 9147
Epoch: [142]  [ 400/1251]  eta: 0:03:02  lr: 0.002395  min_lr: 0.002395  loss: 2.8509 (3.1539)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8548 (0.8700)  time: 0.1998  data: 0.0006  max mem: 9147
Epoch: [142]  [ 600/1251]  eta: 0:02:18  lr: 0.002391  min_lr: 0.002391  loss: 2.5806 (3.1266)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7163 (0.8522)  time: 0.2222  data: 0.0007  max mem: 9147
Epoch: [142]  [ 800/1251]  eta: 0:01:36  lr: 0.002387  min_lr: 0.002387  loss: 2.7792 (3.1552)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7129 (0.8518)  time: 0.2232  data: 0.0016  max mem: 9147
Epoch: [142]  [1000/1251]  eta: 0:00:53  lr: 0.002384  min_lr: 0.002384  loss: 3.0067 (3.1528)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7185 (0.8460)  time: 0.2296  data: 0.0020  max mem: 9147
Epoch: [142]  [1200/1251]  eta: 0:00:10  lr: 0.002380  min_lr: 0.002380  loss: 2.5065 (3.1584)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8906 (0.8614)  time: 0.2049  data: 0.0007  max mem: 9147
Epoch: [142]  [1250/1251]  eta: 0:00:00  lr: 0.002380  min_lr: 0.002380  loss: 2.6461 (3.1534)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8320 (0.8645)  time: 0.1422  data: 0.0008  max mem: 9147
Epoch: [142] Total time: 0:04:25 (0.2123 s / it)
Averaged stats: lr: 0.002380  min_lr: 0.002380  loss: 2.6461 (3.1440)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8320 (0.8645)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.7520 (0.7520)  acc1: 85.6000 (85.6000)  acc5: 96.8000 (96.8000)  time: 5.6925  data: 5.6041  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8534 (0.9635)  acc1: 81.6000 (79.0546)  acc5: 95.2000 (94.6909)  time: 0.7423  data: 0.6682  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2673 (1.1902)  acc1: 74.0000 (74.4000)  acc5: 90.8000 (91.8667)  time: 0.1967  data: 0.1246  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2969 (1.2042)  acc1: 70.4000 (73.7920)  acc5: 89.6000 (91.8240)  time: 0.1983  data: 0.1273  max mem: 9147
Test: Total time: 0:00:10 (0.4014 s / it)
* Acc@1 73.672 Acc@5 92.032 loss 1.195
Accuracy of the model on the 50000 test images: 73.7%
Max accuracy: 74.01%
Epoch: [143]  [   0/1251]  eta: 1:03:41  lr: 0.002380  min_lr: 0.002380  loss: 3.9931 (3.9931)  weight_decay: 0.0500 (0.0500)  time: 3.0544  data: 2.4107  max mem: 9147
Epoch: [143]  [ 200/1251]  eta: 0:03:56  lr: 0.002376  min_lr: 0.002376  loss: 2.6540 (3.2727)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8139 (0.8606)  time: 0.2154  data: 0.0138  max mem: 9147
Epoch: [143]  [ 400/1251]  eta: 0:03:08  lr: 0.002373  min_lr: 0.002373  loss: 2.6295 (3.2154)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8499 (0.8880)  time: 0.2309  data: 0.0118  max mem: 9147
Epoch: [143]  [ 600/1251]  eta: 0:02:21  lr: 0.002369  min_lr: 0.002369  loss: 2.5276 (3.1848)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9611 (0.9070)  time: 0.1934  data: 0.0006  max mem: 9147
Epoch: [143]  [ 800/1251]  eta: 0:01:35  lr: 0.002365  min_lr: 0.002365  loss: 2.7180 (3.1744)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8341 (0.8916)  time: 0.1947  data: 0.0011  max mem: 9147
Epoch: [143]  [1000/1251]  eta: 0:00:53  lr: 0.002362  min_lr: 0.002362  loss: 2.7889 (3.1851)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8023 (0.8768)  time: 0.2007  data: 0.0008  max mem: 9147
Epoch: [143]  [1200/1251]  eta: 0:00:10  lr: 0.002358  min_lr: 0.002358  loss: 3.6583 (3.1817)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7964 (inf)  time: 0.1933  data: 0.0011  max mem: 9147
Epoch: [143]  [1250/1251]  eta: 0:00:00  lr: 0.002358  min_lr: 0.002358  loss: 2.5727 (3.1787)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8662 (inf)  time: 0.1371  data: 0.0008  max mem: 9147
Epoch: [143] Total time: 0:04:25 (0.2118 s / it)
Averaged stats: lr: 0.002358  min_lr: 0.002358  loss: 2.5727 (3.1477)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8662 (inf)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.7955 (0.7955)  acc1: 86.4000 (86.4000)  acc5: 97.2000 (97.2000)  time: 5.6340  data: 5.5387  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9796 (1.0318)  acc1: 79.2000 (78.6182)  acc5: 96.0000 (95.1273)  time: 0.7539  data: 0.6749  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3228 (1.2311)  acc1: 72.0000 (74.4571)  acc5: 91.6000 (92.5714)  time: 0.2001  data: 0.1262  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3548 (1.2432)  acc1: 70.8000 (73.9840)  acc5: 90.0000 (92.2880)  time: 0.1984  data: 0.1262  max mem: 9147
Test: Total time: 0:00:10 (0.4003 s / it)
* Acc@1 73.842 Acc@5 92.080 loss 1.248
Accuracy of the model on the 50000 test images: 73.8%
Max accuracy: 74.01%
Epoch: [144]  [   0/1251]  eta: 1:06:37  lr: 0.002358  min_lr: 0.002358  loss: 3.0895 (3.0895)  weight_decay: 0.0500 (0.0500)  time: 3.1951  data: 2.2020  max mem: 9147
Epoch: [144]  [ 200/1251]  eta: 0:03:45  lr: 0.002354  min_lr: 0.002354  loss: 2.4425 (3.1364)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9348 (1.0374)  time: 0.1934  data: 0.0006  max mem: 9147
Epoch: [144]  [ 400/1251]  eta: 0:02:59  lr: 0.002350  min_lr: 0.002350  loss: 2.5708 (3.1260)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7225 (0.9089)  time: 0.2353  data: 0.0058  max mem: 9147
Epoch: [144]  [ 600/1251]  eta: 0:02:18  lr: 0.002347  min_lr: 0.002347  loss: 2.7644 (3.1376)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7670 (0.9028)  time: 0.2047  data: 0.0006  max mem: 9147
Epoch: [144]  [ 800/1251]  eta: 0:01:34  lr: 0.002343  min_lr: 0.002343  loss: 2.5902 (3.1291)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0772 (0.9097)  time: 0.2001  data: 0.0006  max mem: 9147
Epoch: [144]  [1000/1251]  eta: 0:00:52  lr: 0.002340  min_lr: 0.002340  loss: 3.5552 (3.1385)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7814 (0.9050)  time: 0.1957  data: 0.0006  max mem: 9147
Epoch: [144]  [1200/1251]  eta: 0:00:10  lr: 0.002336  min_lr: 0.002336  loss: 3.0541 (3.1339)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7633 (0.9011)  time: 0.2303  data: 0.0008  max mem: 9147
Epoch: [144]  [1250/1251]  eta: 0:00:00  lr: 0.002335  min_lr: 0.002335  loss: 3.2809 (3.1367)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7005 (0.8932)  time: 0.1406  data: 0.0013  max mem: 9147
Epoch: [144] Total time: 0:04:22 (0.2099 s / it)
Averaged stats: lr: 0.002335  min_lr: 0.002335  loss: 3.2809 (3.1420)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7005 (0.8932)
Test:  [ 0/25]  eta: 0:02:25  loss: 0.9023 (0.9023)  acc1: 84.0000 (84.0000)  acc5: 97.2000 (97.2000)  time: 5.8195  data: 5.7389  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0018 (1.0624)  acc1: 79.2000 (79.1273)  acc5: 96.0000 (95.1636)  time: 0.7578  data: 0.6804  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3097 (1.2730)  acc1: 71.6000 (74.2476)  acc5: 92.0000 (92.3619)  time: 0.2083  data: 0.1346  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4175 (1.2920)  acc1: 71.2000 (73.7120)  acc5: 90.0000 (92.1120)  time: 0.2065  data: 0.1346  max mem: 9147
Test: Total time: 0:00:10 (0.4138 s / it)
* Acc@1 73.834 Acc@5 92.004 loss 1.290
Accuracy of the model on the 50000 test images: 73.8%
Max accuracy: 74.01%
Epoch: [145]  [   0/1251]  eta: 1:02:22  lr: 0.002335  min_lr: 0.002335  loss: 2.3144 (2.3144)  weight_decay: 0.0500 (0.0500)  time: 2.9913  data: 2.7318  max mem: 9147
Epoch: [145]  [ 200/1251]  eta: 0:03:55  lr: 0.002332  min_lr: 0.002332  loss: 2.8908 (3.0661)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8523 (0.8254)  time: 0.2356  data: 0.0006  max mem: 9147
Epoch: [145]  [ 400/1251]  eta: 0:03:06  lr: 0.002328  min_lr: 0.002328  loss: 2.8163 (3.0535)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7506 (0.8409)  time: 0.2168  data: 0.0017  max mem: 9147
Epoch: [145]  [ 600/1251]  eta: 0:02:19  lr: 0.002325  min_lr: 0.002325  loss: 2.5205 (3.0881)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8654 (0.8681)  time: 0.1907  data: 0.0005  max mem: 9147
Epoch: [145]  [ 800/1251]  eta: 0:01:35  lr: 0.002321  min_lr: 0.002321  loss: 2.6725 (3.0875)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8303 (0.8638)  time: 0.2195  data: 0.0091  max mem: 9147
Epoch: [145]  [1000/1251]  eta: 0:00:53  lr: 0.002318  min_lr: 0.002318  loss: 2.6881 (3.1107)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9858 (0.8675)  time: 0.2184  data: 0.0129  max mem: 9147
Epoch: [145]  [1200/1251]  eta: 0:00:10  lr: 0.002314  min_lr: 0.002314  loss: 2.9904 (3.1344)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8069 (0.8649)  time: 0.2062  data: 0.0007  max mem: 9147
Epoch: [145]  [1250/1251]  eta: 0:00:00  lr: 0.002313  min_lr: 0.002313  loss: 2.5934 (3.1335)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8069 (0.8638)  time: 0.1407  data: 0.0008  max mem: 9147
Epoch: [145] Total time: 0:04:26 (0.2132 s / it)
Averaged stats: lr: 0.002313  min_lr: 0.002313  loss: 2.5934 (3.1440)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8069 (0.8638)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.7739 (0.7739)  acc1: 84.0000 (84.0000)  acc5: 97.2000 (97.2000)  time: 5.6581  data: 5.5714  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0162 (1.0104)  acc1: 77.6000 (78.2909)  acc5: 95.2000 (94.9091)  time: 0.6955  data: 0.6170  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2958 (1.2204)  acc1: 71.6000 (73.6952)  acc5: 91.2000 (91.8095)  time: 0.1839  data: 0.1082  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3600 (1.2325)  acc1: 70.8000 (73.4240)  acc5: 90.4000 (91.4720)  time: 0.2018  data: 0.1281  max mem: 9147
Test: Total time: 0:00:10 (0.4040 s / it)
* Acc@1 73.652 Acc@5 91.960 loss 1.218
Accuracy of the model on the 50000 test images: 73.7%
Max accuracy: 74.01%
Epoch: [146]  [   0/1251]  eta: 1:04:17  lr: 0.002313  min_lr: 0.002313  loss: 4.4574 (4.4574)  weight_decay: 0.0500 (0.0500)  time: 3.0833  data: 2.6716  max mem: 9147
Epoch: [146]  [ 200/1251]  eta: 0:03:58  lr: 0.002310  min_lr: 0.002310  loss: 3.6709 (3.1895)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6696 (0.8830)  time: 0.1990  data: 0.0013  max mem: 9147
Epoch: [146]  [ 400/1251]  eta: 0:03:07  lr: 0.002306  min_lr: 0.002306  loss: 3.0507 (3.1842)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8044 (0.8684)  time: 0.2249  data: 0.0007  max mem: 9147
Epoch: [146]  [ 600/1251]  eta: 0:02:20  lr: 0.002303  min_lr: 0.002303  loss: 3.0354 (3.1781)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8456 (0.8573)  time: 0.1854  data: 0.0005  max mem: 9147
Epoch: [146]  [ 800/1251]  eta: 0:01:35  lr: 0.002299  min_lr: 0.002299  loss: 2.5166 (3.1731)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8286 (0.8530)  time: 0.2090  data: 0.0007  max mem: 9147
Epoch: [146]  [1000/1251]  eta: 0:00:53  lr: 0.002296  min_lr: 0.002296  loss: 2.7048 (3.1606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8942 (0.8758)  time: 0.2018  data: 0.0008  max mem: 9147
Epoch: [146]  [1200/1251]  eta: 0:00:10  lr: 0.002292  min_lr: 0.002292  loss: 2.6051 (3.1448)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8643 (0.8840)  time: 0.2103  data: 0.0006  max mem: 9147
Epoch: [146]  [1250/1251]  eta: 0:00:00  lr: 0.002291  min_lr: 0.002291  loss: 4.0061 (3.1522)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8407 (0.8830)  time: 0.1423  data: 0.0017  max mem: 9147
Epoch: [146] Total time: 0:04:27 (0.2135 s / it)
Averaged stats: lr: 0.002291  min_lr: 0.002291  loss: 4.0061 (3.1332)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8407 (0.8830)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.9492 (0.9492)  acc1: 83.6000 (83.6000)  acc5: 97.2000 (97.2000)  time: 5.5614  data: 5.4718  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0487 (1.0875)  acc1: 80.0000 (79.2000)  acc5: 96.0000 (94.9818)  time: 0.7342  data: 0.6377  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3731 (1.3055)  acc1: 72.4000 (73.9810)  acc5: 91.2000 (91.7333)  time: 0.2016  data: 0.1183  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4444 (1.3209)  acc1: 70.4000 (73.4880)  acc5: 89.6000 (91.5520)  time: 0.1998  data: 0.1225  max mem: 9147
Test: Total time: 0:00:10 (0.4014 s / it)
* Acc@1 73.376 Acc@5 91.820 loss 1.311
Accuracy of the model on the 50000 test images: 73.4%
Max accuracy: 74.01%
Epoch: [147]  [   0/1251]  eta: 1:14:39  lr: 0.002291  min_lr: 0.002291  loss: 2.5848 (2.5848)  weight_decay: 0.0500 (0.0500)  time: 3.5807  data: 1.6888  max mem: 9147
Epoch: [147]  [ 200/1251]  eta: 0:03:38  lr: 0.002288  min_lr: 0.002288  loss: 3.2385 (3.1739)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7959 (0.8690)  time: 0.1999  data: 0.0006  max mem: 9147
Epoch: [147]  [ 400/1251]  eta: 0:02:50  lr: 0.002284  min_lr: 0.002284  loss: 2.4440 (3.1662)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9506 (0.8819)  time: 0.1764  data: 0.0005  max mem: 9147
Epoch: [147]  [ 600/1251]  eta: 0:02:11  lr: 0.002280  min_lr: 0.002280  loss: 3.1281 (3.1712)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7948 (0.8538)  time: 0.1893  data: 0.0005  max mem: 9147
Epoch: [147]  [ 800/1251]  eta: 0:01:30  lr: 0.002277  min_lr: 0.002277  loss: 2.6378 (3.1594)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8063 (0.8606)  time: 0.2152  data: 0.0007  max mem: 9147
Epoch: [147]  [1000/1251]  eta: 0:00:51  lr: 0.002273  min_lr: 0.002273  loss: 2.8380 (3.1601)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9038 (0.8731)  time: 0.2346  data: 0.0009  max mem: 9147
Epoch: [147]  [1200/1251]  eta: 0:00:10  lr: 0.002270  min_lr: 0.002270  loss: 2.6739 (3.1548)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8769 (0.8646)  time: 0.2273  data: 0.0008  max mem: 9147
Epoch: [147]  [1250/1251]  eta: 0:00:00  lr: 0.002269  min_lr: 0.002269  loss: 2.7863 (3.1553)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8895 (0.8679)  time: 0.1407  data: 0.0014  max mem: 9147
Epoch: [147] Total time: 0:04:18 (0.2063 s / it)
Averaged stats: lr: 0.002269  min_lr: 0.002269  loss: 2.7863 (3.1325)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8895 (0.8679)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.8841 (0.8841)  acc1: 82.8000 (82.8000)  acc5: 96.0000 (96.0000)  time: 5.7009  data: 5.6205  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9860 (1.0261)  acc1: 80.8000 (78.4727)  acc5: 95.6000 (94.6909)  time: 0.7474  data: 0.6718  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2829 (1.2291)  acc1: 72.0000 (74.2286)  acc5: 91.6000 (92.0571)  time: 0.2137  data: 0.1415  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3745 (1.2404)  acc1: 71.6000 (73.9200)  acc5: 91.6000 (92.0320)  time: 0.2125  data: 0.1414  max mem: 9147
Test: Total time: 0:00:10 (0.4130 s / it)
* Acc@1 73.842 Acc@5 92.156 loss 1.241
Accuracy of the model on the 50000 test images: 73.8%
Max accuracy: 74.01%
Epoch: [148]  [   0/1251]  eta: 1:00:06  lr: 0.002269  min_lr: 0.002269  loss: 2.4206 (2.4206)  weight_decay: 0.0500 (0.0500)  time: 2.8827  data: 1.5729  max mem: 9147
Epoch: [148]  [ 200/1251]  eta: 0:03:56  lr: 0.002265  min_lr: 0.002265  loss: 3.0285 (3.2052)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8695 (0.8358)  time: 0.2058  data: 0.0007  max mem: 9147
Epoch: [148]  [ 400/1251]  eta: 0:03:06  lr: 0.002262  min_lr: 0.002262  loss: 2.7548 (3.1732)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8317 (0.8431)  time: 0.2053  data: 0.0007  max mem: 9147
Epoch: [148]  [ 600/1251]  eta: 0:02:22  lr: 0.002258  min_lr: 0.002258  loss: 2.4866 (3.1145)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6806 (0.8316)  time: 0.2245  data: 0.0006  max mem: 9147
Epoch: [148]  [ 800/1251]  eta: 0:01:38  lr: 0.002255  min_lr: 0.002255  loss: 2.4804 (3.0872)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8269 (0.8310)  time: 0.2314  data: 0.0276  max mem: 9147
Epoch: [148]  [1000/1251]  eta: 0:00:54  lr: 0.002251  min_lr: 0.002251  loss: 3.1723 (3.0852)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7676 (0.8258)  time: 0.2246  data: 0.0007  max mem: 9147
Epoch: [148]  [1200/1251]  eta: 0:00:11  lr: 0.002248  min_lr: 0.002248  loss: 2.5646 (3.0916)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8108 (0.8383)  time: 0.2160  data: 0.0006  max mem: 9147
Epoch: [148]  [1250/1251]  eta: 0:00:00  lr: 0.002247  min_lr: 0.002247  loss: 2.5626 (3.0920)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6838 (0.8344)  time: 0.1528  data: 0.0013  max mem: 9147
Epoch: [148] Total time: 0:04:30 (0.2166 s / it)
Averaged stats: lr: 0.002247  min_lr: 0.002247  loss: 2.5626 (3.1160)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6838 (0.8344)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.8272 (0.8272)  acc1: 82.0000 (82.0000)  acc5: 96.4000 (96.4000)  time: 5.4722  data: 5.3916  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9750 (1.0108)  acc1: 80.8000 (78.9091)  acc5: 95.6000 (95.0909)  time: 0.7491  data: 0.6546  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2524 (1.2323)  acc1: 72.0000 (74.3429)  acc5: 91.6000 (92.1714)  time: 0.2107  data: 0.1258  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3605 (1.2436)  acc1: 71.6000 (73.7920)  acc5: 91.2000 (92.0960)  time: 0.2144  data: 0.1352  max mem: 9147
Test: Total time: 0:00:10 (0.4094 s / it)
* Acc@1 74.026 Acc@5 92.214 loss 1.234
Accuracy of the model on the 50000 test images: 74.0%
Max accuracy: 74.03%
Epoch: [149]  [   0/1251]  eta: 1:04:23  lr: 0.002247  min_lr: 0.002247  loss: 3.8874 (3.8874)  weight_decay: 0.0500 (0.0500)  time: 3.0885  data: 2.8887  max mem: 9147
Epoch: [149]  [ 200/1251]  eta: 0:03:58  lr: 0.002243  min_lr: 0.002243  loss: 2.4933 (3.1047)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8808 (0.8747)  time: 0.2340  data: 0.0007  max mem: 9147
Epoch: [149]  [ 400/1251]  eta: 0:03:02  lr: 0.002240  min_lr: 0.002240  loss: 3.1759 (3.0654)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6844 (0.8311)  time: 0.2166  data: 0.0007  max mem: 9147
Epoch: [149]  [ 600/1251]  eta: 0:02:18  lr: 0.002236  min_lr: 0.002236  loss: 2.4729 (3.0540)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8317 (0.8976)  time: 0.2191  data: 0.0006  max mem: 9147
Epoch: [149]  [ 800/1251]  eta: 0:01:36  lr: 0.002232  min_lr: 0.002232  loss: 2.7265 (3.0673)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8219 (0.8839)  time: 0.2006  data: 0.0006  max mem: 9147
Epoch: [149]  [1000/1251]  eta: 0:00:53  lr: 0.002229  min_lr: 0.002229  loss: 2.5954 (3.0734)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7874 (0.8729)  time: 0.2143  data: 0.0110  max mem: 9147
Epoch: [149]  [1200/1251]  eta: 0:00:10  lr: 0.002225  min_lr: 0.002225  loss: 2.5913 (3.0679)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7525 (inf)  time: 0.1937  data: 0.0011  max mem: 9147
Epoch: [149]  [1250/1251]  eta: 0:00:00  lr: 0.002224  min_lr: 0.002224  loss: 3.3085 (3.0711)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8049 (inf)  time: 0.1382  data: 0.0012  max mem: 9147
Epoch: [149] Total time: 0:04:22 (0.2099 s / it)
Averaged stats: lr: 0.002224  min_lr: 0.002224  loss: 3.3085 (3.1245)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8049 (inf)
Test:  [ 0/25]  eta: 0:02:23  loss: 0.8632 (0.8632)  acc1: 85.2000 (85.2000)  acc5: 97.2000 (97.2000)  time: 5.7319  data: 5.6517  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0329 (1.0305)  acc1: 80.0000 (79.6727)  acc5: 96.0000 (95.5636)  time: 0.6828  data: 0.6093  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2945 (1.2465)  acc1: 72.4000 (74.7238)  acc5: 91.6000 (92.4571)  time: 0.1732  data: 0.1014  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4151 (1.2634)  acc1: 70.4000 (74.3200)  acc5: 90.0000 (92.2560)  time: 0.2078  data: 0.1373  max mem: 9147
Test: Total time: 0:00:10 (0.4108 s / it)
* Acc@1 74.224 Acc@5 92.180 loss 1.261
Accuracy of the model on the 50000 test images: 74.2%
Max accuracy: 74.22%
Epoch: [150]  [   0/1251]  eta: 0:56:44  lr: 0.002224  min_lr: 0.002224  loss: 4.1357 (4.1357)  weight_decay: 0.0500 (0.0500)  time: 2.7212  data: 2.3874  max mem: 9147
Epoch: [150]  [ 200/1251]  eta: 0:03:42  lr: 0.002221  min_lr: 0.002221  loss: 3.4690 (3.0088)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9298 (0.9146)  time: 0.1910  data: 0.0006  max mem: 9147
Epoch: [150]  [ 400/1251]  eta: 0:02:52  lr: 0.002217  min_lr: 0.002217  loss: 3.7043 (3.0721)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8028 (0.8808)  time: 0.1953  data: 0.0005  max mem: 9147
Epoch: [150]  [ 600/1251]  eta: 0:02:11  lr: 0.002214  min_lr: 0.002214  loss: 2.5141 (3.0884)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7104 (0.8826)  time: 0.2050  data: 0.0007  max mem: 9147
Epoch: [150]  [ 800/1251]  eta: 0:01:32  lr: 0.002210  min_lr: 0.002210  loss: 2.7323 (3.0926)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7217 (0.8581)  time: 0.2002  data: 0.0006  max mem: 9147
Epoch: [150]  [1000/1251]  eta: 0:00:51  lr: 0.002207  min_lr: 0.002207  loss: 2.7101 (3.0957)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8101 (0.8618)  time: 0.1853  data: 0.0007  max mem: 9147
Epoch: [150]  [1200/1251]  eta: 0:00:10  lr: 0.002203  min_lr: 0.002203  loss: 3.3625 (3.1137)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7343 (0.8540)  time: 0.1794  data: 0.0005  max mem: 9147
Epoch: [150]  [1250/1251]  eta: 0:00:00  lr: 0.002202  min_lr: 0.002202  loss: 2.8062 (3.1183)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8016 (0.8529)  time: 0.1380  data: 0.0010  max mem: 9147
Epoch: [150] Total time: 0:04:10 (0.2005 s / it)
Averaged stats: lr: 0.002202  min_lr: 0.002202  loss: 2.8062 (3.1154)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8016 (0.8529)
Test:  [ 0/25]  eta: 0:02:28  loss: 0.9171 (0.9171)  acc1: 85.2000 (85.2000)  acc5: 96.8000 (96.8000)  time: 5.9459  data: 5.8655  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0174 (1.0705)  acc1: 80.4000 (79.2000)  acc5: 95.6000 (95.4909)  time: 0.7805  data: 0.7019  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3594 (1.2733)  acc1: 73.6000 (74.7429)  acc5: 91.6000 (92.4762)  time: 0.2001  data: 0.1253  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3733 (1.2805)  acc1: 72.0000 (74.3360)  acc5: 90.8000 (92.2720)  time: 0.2035  data: 0.1312  max mem: 9147
Test: Total time: 0:00:10 (0.4170 s / it)
* Acc@1 74.072 Acc@5 92.254 loss 1.282
Accuracy of the model on the 50000 test images: 74.1%
Max accuracy: 74.22%
Epoch: [151]  [   0/1251]  eta: 0:59:37  lr: 0.002202  min_lr: 0.002202  loss: 2.7904 (2.7904)  weight_decay: 0.0500 (0.0500)  time: 2.8597  data: 2.3239  max mem: 9147
Epoch: [151]  [ 200/1251]  eta: 0:03:57  lr: 0.002198  min_lr: 0.002198  loss: 2.8213 (3.1404)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0174 (0.9027)  time: 0.1926  data: 0.0012  max mem: 9147
Epoch: [151]  [ 400/1251]  eta: 0:02:58  lr: 0.002195  min_lr: 0.002195  loss: 2.6655 (3.1475)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7416 (0.8809)  time: 0.2071  data: 0.0007  max mem: 9147
Epoch: [151]  [ 600/1251]  eta: 0:02:18  lr: 0.002191  min_lr: 0.002191  loss: 3.4304 (3.1487)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7602 (0.8580)  time: 0.2158  data: 0.0006  max mem: 9147
Epoch: [151]  [ 800/1251]  eta: 0:01:35  lr: 0.002188  min_lr: 0.002188  loss: 2.8493 (3.1478)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7859 (0.8601)  time: 0.2193  data: 0.0006  max mem: 9147
Epoch: [151]  [1000/1251]  eta: 0:00:53  lr: 0.002184  min_lr: 0.002184  loss: 2.5886 (3.1164)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0057 (0.8624)  time: 0.2246  data: 0.0008  max mem: 9147
Epoch: [151]  [1200/1251]  eta: 0:00:10  lr: 0.002181  min_lr: 0.002181  loss: 3.1078 (3.1082)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8566 (0.8703)  time: 0.1803  data: 0.0005  max mem: 9147
Epoch: [151]  [1250/1251]  eta: 0:00:00  lr: 0.002180  min_lr: 0.002180  loss: 3.2492 (3.1120)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7985 (0.8705)  time: 0.1403  data: 0.0012  max mem: 9147
Epoch: [151] Total time: 0:04:23 (0.2107 s / it)
Averaged stats: lr: 0.002180  min_lr: 0.002180  loss: 3.2492 (3.1153)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7985 (0.8705)
Test:  [ 0/25]  eta: 0:01:18  loss: 0.8557 (0.8557)  acc1: 84.4000 (84.4000)  acc5: 97.6000 (97.6000)  time: 3.1539  data: 3.0736  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 1.1247 (1.1574)  acc1: 78.8000 (77.9273)  acc5: 94.8000 (95.2727)  time: 0.5897  data: 0.5114  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3701 (1.3510)  acc1: 71.2000 (73.4667)  acc5: 91.6000 (92.0381)  time: 0.2939  data: 0.2189  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4455 (1.3524)  acc1: 72.0000 (73.2800)  acc5: 90.0000 (91.9840)  time: 0.2254  data: 0.1537  max mem: 9147
Test: Total time: 0:00:10 (0.4080 s / it)
* Acc@1 73.894 Acc@5 92.170 loss 1.338
Accuracy of the model on the 50000 test images: 73.9%
Max accuracy: 74.22%
Epoch: [152]  [   0/1251]  eta: 1:07:35  lr: 0.002180  min_lr: 0.002180  loss: 3.4893 (3.4893)  weight_decay: 0.0500 (0.0500)  time: 3.2419  data: 2.3007  max mem: 9147
Epoch: [152]  [ 200/1251]  eta: 0:03:38  lr: 0.002176  min_lr: 0.002176  loss: 2.8693 (3.1176)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7614 (0.7947)  time: 0.1948  data: 0.0005  max mem: 9147
Epoch: [152]  [ 400/1251]  eta: 0:02:55  lr: 0.002173  min_lr: 0.002173  loss: 3.4835 (3.1207)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8832 (0.8636)  time: 0.2172  data: 0.0005  max mem: 9147
Epoch: [152]  [ 600/1251]  eta: 0:02:13  lr: 0.002169  min_lr: 0.002169  loss: 2.5593 (3.0861)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7884 (0.8469)  time: 0.2000  data: 0.0006  max mem: 9147
Epoch: [152]  [ 800/1251]  eta: 0:01:32  lr: 0.002165  min_lr: 0.002165  loss: 2.5305 (3.0857)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8501 (0.8605)  time: 0.2100  data: 0.0008  max mem: 9147
Epoch: [152]  [1000/1251]  eta: 0:00:52  lr: 0.002162  min_lr: 0.002162  loss: 2.5418 (3.0947)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7952 (0.8550)  time: 0.2239  data: 0.0006  max mem: 9147
Epoch: [152]  [1200/1251]  eta: 0:00:10  lr: 0.002158  min_lr: 0.002158  loss: 2.9205 (3.0959)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9289 (0.8521)  time: 0.2137  data: 0.0008  max mem: 9147
Epoch: [152]  [1250/1251]  eta: 0:00:00  lr: 0.002157  min_lr: 0.002157  loss: 3.1161 (3.1044)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8489 (0.8525)  time: 0.1455  data: 0.0016  max mem: 9147
Epoch: [152] Total time: 0:04:19 (0.2071 s / it)
Averaged stats: lr: 0.002157  min_lr: 0.002157  loss: 3.1161 (3.1133)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8489 (0.8525)
Test:  [ 0/25]  eta: 0:02:11  loss: 0.8886 (0.8886)  acc1: 84.8000 (84.8000)  acc5: 96.4000 (96.4000)  time: 5.2592  data: 5.1787  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0072 (1.0390)  acc1: 78.4000 (78.8000)  acc5: 95.2000 (95.2000)  time: 0.7486  data: 0.6550  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3146 (1.2519)  acc1: 71.6000 (74.6286)  acc5: 92.4000 (92.5524)  time: 0.2240  data: 0.1411  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3821 (1.2588)  acc1: 71.2000 (74.2240)  acc5: 91.2000 (92.4640)  time: 0.2191  data: 0.1411  max mem: 9147
Test: Total time: 0:00:10 (0.4035 s / it)
* Acc@1 74.396 Acc@5 92.296 loss 1.255
Accuracy of the model on the 50000 test images: 74.4%
Max accuracy: 74.40%
Epoch: [153]  [   0/1251]  eta: 1:02:41  lr: 0.002157  min_lr: 0.002157  loss: 3.9733 (3.9733)  weight_decay: 0.0500 (0.0500)  time: 3.0070  data: 2.8252  max mem: 9147
Epoch: [153]  [ 200/1251]  eta: 0:03:56  lr: 0.002154  min_lr: 0.002154  loss: 2.5860 (3.0954)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7598 (0.8181)  time: 0.2055  data: 0.0007  max mem: 9147
Epoch: [153]  [ 400/1251]  eta: 0:03:07  lr: 0.002150  min_lr: 0.002150  loss: 3.2588 (3.0910)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8844 (0.8370)  time: 0.2263  data: 0.0051  max mem: 9147
Epoch: [153]  [ 600/1251]  eta: 0:02:23  lr: 0.002147  min_lr: 0.002147  loss: 2.6812 (3.1042)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8975 (0.8602)  time: 0.2231  data: 0.0008  max mem: 9147
Epoch: [153]  [ 800/1251]  eta: 0:01:38  lr: 0.002143  min_lr: 0.002143  loss: 2.3918 (3.1069)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7676 (0.8592)  time: 0.2201  data: 0.0179  max mem: 9147
Epoch: [153]  [1000/1251]  eta: 0:00:54  lr: 0.002139  min_lr: 0.002139  loss: 2.5369 (3.1163)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8961 (0.8683)  time: 0.1873  data: 0.0005  max mem: 9147
Epoch: [153]  [1200/1251]  eta: 0:00:10  lr: 0.002136  min_lr: 0.002136  loss: 3.4637 (3.1258)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8512 (0.8671)  time: 0.1721  data: 0.0005  max mem: 9147
Epoch: [153]  [1250/1251]  eta: 0:00:00  lr: 0.002135  min_lr: 0.002135  loss: 2.6475 (3.1234)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7474 (0.8616)  time: 0.1389  data: 0.0019  max mem: 9147
Epoch: [153] Total time: 0:04:23 (0.2108 s / it)
Averaged stats: lr: 0.002135  min_lr: 0.002135  loss: 2.6475 (3.1160)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7474 (0.8616)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.7732 (0.7732)  acc1: 84.8000 (84.8000)  acc5: 96.0000 (96.0000)  time: 5.4796  data: 5.3524  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9806 (1.0001)  acc1: 80.4000 (79.2727)  acc5: 95.6000 (95.1273)  time: 0.7532  data: 0.6568  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2592 (1.2036)  acc1: 72.0000 (74.4952)  acc5: 92.4000 (92.6095)  time: 0.2124  data: 0.1310  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3103 (1.2210)  acc1: 71.6000 (74.0640)  acc5: 92.0000 (92.4160)  time: 0.2082  data: 0.1324  max mem: 9147
Test: Total time: 0:00:10 (0.4047 s / it)
* Acc@1 74.216 Acc@5 92.310 loss 1.219
Accuracy of the model on the 50000 test images: 74.2%
Max accuracy: 74.40%
Epoch: [154]  [   0/1251]  eta: 1:04:55  lr: 0.002135  min_lr: 0.002135  loss: 3.9665 (3.9665)  weight_decay: 0.0500 (0.0500)  time: 3.1142  data: 2.7646  max mem: 9147
Epoch: [154]  [ 200/1251]  eta: 0:03:56  lr: 0.002131  min_lr: 0.002131  loss: 2.7531 (3.0459)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7657 (0.8369)  time: 0.2006  data: 0.0007  max mem: 9147
Epoch: [154]  [ 400/1251]  eta: 0:03:08  lr: 0.002128  min_lr: 0.002128  loss: 2.5761 (3.0707)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8045 (0.8824)  time: 0.2257  data: 0.0007  max mem: 9147
Epoch: [154]  [ 600/1251]  eta: 0:02:21  lr: 0.002124  min_lr: 0.002124  loss: 3.7119 (3.0973)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8472 (0.8898)  time: 0.1950  data: 0.0012  max mem: 9147
Epoch: [154]  [ 800/1251]  eta: 0:01:37  lr: 0.002121  min_lr: 0.002121  loss: 2.8086 (3.0879)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9106 (0.8943)  time: 0.2061  data: 0.0006  max mem: 9147
Epoch: [154]  [1000/1251]  eta: 0:00:53  lr: 0.002117  min_lr: 0.002117  loss: 2.8094 (3.0989)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8542 (0.8953)  time: 0.1950  data: 0.0011  max mem: 9147
Epoch: [154]  [1200/1251]  eta: 0:00:10  lr: 0.002113  min_lr: 0.002113  loss: 2.6274 (3.0960)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8136 (0.8952)  time: 0.2006  data: 0.0007  max mem: 9147
Epoch: [154]  [1250/1251]  eta: 0:00:00  lr: 0.002113  min_lr: 0.002113  loss: 3.9909 (3.1036)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8641 (0.8951)  time: 0.1459  data: 0.0014  max mem: 9147
Epoch: [154] Total time: 0:04:27 (0.2140 s / it)
Averaged stats: lr: 0.002113  min_lr: 0.002113  loss: 3.9909 (3.1102)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8641 (0.8951)
Test:  [ 0/25]  eta: 0:02:23  loss: 0.8519 (0.8519)  acc1: 86.0000 (86.0000)  acc5: 98.0000 (98.0000)  time: 5.7213  data: 5.6244  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.1587 (1.1003)  acc1: 78.8000 (78.9455)  acc5: 96.4000 (95.5636)  time: 0.7213  data: 0.6430  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3413 (1.3329)  acc1: 71.2000 (74.2476)  acc5: 91.6000 (92.4191)  time: 0.1963  data: 0.1234  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4946 (1.3446)  acc1: 71.2000 (73.8560)  acc5: 89.6000 (92.1920)  time: 0.1947  data: 0.1233  max mem: 9147
Test: Total time: 0:00:10 (0.4002 s / it)
* Acc@1 73.812 Acc@5 91.988 loss 1.346
Accuracy of the model on the 50000 test images: 73.8%
Max accuracy: 74.40%
Epoch: [155]  [   0/1251]  eta: 1:02:56  lr: 0.002113  min_lr: 0.002113  loss: 3.9715 (3.9715)  weight_decay: 0.0500 (0.0500)  time: 3.0187  data: 2.3997  max mem: 9147
Epoch: [155]  [ 200/1251]  eta: 0:03:54  lr: 0.002109  min_lr: 0.002109  loss: 2.6702 (3.0576)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7707 (0.7886)  time: 0.1998  data: 0.0007  max mem: 9147
Epoch: [155]  [ 400/1251]  eta: 0:03:05  lr: 0.002105  min_lr: 0.002105  loss: 2.5096 (3.0283)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8380 (0.8469)  time: 0.2010  data: 0.0007  max mem: 9147
Epoch: [155]  [ 600/1251]  eta: 0:02:21  lr: 0.002102  min_lr: 0.002102  loss: 2.6867 (3.0686)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7394 (0.8431)  time: 0.2134  data: 0.0007  max mem: 9147
Epoch: [155]  [ 800/1251]  eta: 0:01:35  lr: 0.002098  min_lr: 0.002098  loss: 2.7717 (3.0877)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8248 (0.8638)  time: 0.2006  data: 0.0006  max mem: 9147
Epoch: [155]  [1000/1251]  eta: 0:00:52  lr: 0.002095  min_lr: 0.002095  loss: 3.4806 (3.0936)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8505 (0.8834)  time: 0.1997  data: 0.0006  max mem: 9147
Epoch: [155]  [1200/1251]  eta: 0:00:10  lr: 0.002091  min_lr: 0.002091  loss: 2.5527 (3.1060)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8357 (0.8883)  time: 0.1878  data: 0.0006  max mem: 9147
Epoch: [155]  [1250/1251]  eta: 0:00:00  lr: 0.002090  min_lr: 0.002090  loss: 2.6138 (3.1072)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9183 (0.8940)  time: 0.1398  data: 0.0009  max mem: 9147
Epoch: [155] Total time: 0:04:22 (0.2100 s / it)
Averaged stats: lr: 0.002090  min_lr: 0.002090  loss: 2.6138 (3.0933)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9183 (0.8940)
Test:  [ 0/25]  eta: 0:02:11  loss: 0.8495 (0.8495)  acc1: 83.6000 (83.6000)  acc5: 97.2000 (97.2000)  time: 5.2758  data: 5.1953  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 1.0046 (1.0343)  acc1: 79.2000 (78.6909)  acc5: 96.0000 (95.5636)  time: 0.7100  data: 0.6173  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3017 (1.2407)  acc1: 71.6000 (74.4571)  acc5: 92.0000 (92.8762)  time: 0.2025  data: 0.1206  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3844 (1.2537)  acc1: 71.2000 (74.0960)  acc5: 91.2000 (92.6400)  time: 0.1976  data: 0.1205  max mem: 9147
Test: Total time: 0:00:09 (0.3872 s / it)
* Acc@1 74.366 Acc@5 92.398 loss 1.252
Accuracy of the model on the 50000 test images: 74.4%
Max accuracy: 74.40%
Epoch: [156]  [   0/1251]  eta: 1:04:44  lr: 0.002090  min_lr: 0.002090  loss: 3.8457 (3.8457)  weight_decay: 0.0500 (0.0500)  time: 3.1055  data: 1.6629  max mem: 9147
Epoch: [156]  [ 200/1251]  eta: 0:03:54  lr: 0.002087  min_lr: 0.002087  loss: 2.7547 (3.0120)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6986 (0.7933)  time: 0.2001  data: 0.0006  max mem: 9147
Epoch: [156]  [ 400/1251]  eta: 0:02:56  lr: 0.002083  min_lr: 0.002083  loss: 2.5812 (3.0467)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9371 (0.8469)  time: 0.1950  data: 0.0012  max mem: 9147
Epoch: [156]  [ 600/1251]  eta: 0:02:16  lr: 0.002079  min_lr: 0.002079  loss: 2.6290 (3.0503)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7922 (0.8601)  time: 0.2255  data: 0.0011  max mem: 9147
Epoch: [156]  [ 800/1251]  eta: 0:01:34  lr: 0.002076  min_lr: 0.002076  loss: 2.6902 (3.0806)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8253 (0.8557)  time: 0.1923  data: 0.0006  max mem: 9147
Epoch: [156]  [1000/1251]  eta: 0:00:52  lr: 0.002072  min_lr: 0.002072  loss: 2.9261 (3.0922)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7449 (0.8605)  time: 0.2108  data: 0.0021  max mem: 9147
Epoch: [156]  [1200/1251]  eta: 0:00:10  lr: 0.002069  min_lr: 0.002069  loss: 2.6099 (3.1076)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7195 (0.8563)  time: 0.2140  data: 0.0006  max mem: 9147
Epoch: [156]  [1250/1251]  eta: 0:00:00  lr: 0.002068  min_lr: 0.002068  loss: 3.6863 (3.1156)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8648 (0.8576)  time: 0.1395  data: 0.0012  max mem: 9147
Epoch: [156] Total time: 0:04:24 (0.2114 s / it)
Averaged stats: lr: 0.002068  min_lr: 0.002068  loss: 3.6863 (3.1066)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8648 (0.8576)
Test:  [ 0/25]  eta: 0:02:12  loss: 0.8550 (0.8550)  acc1: 85.2000 (85.2000)  acc5: 98.0000 (98.0000)  time: 5.3139  data: 5.2226  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0920 (1.1221)  acc1: 79.6000 (79.4545)  acc5: 96.4000 (95.2727)  time: 0.7560  data: 0.6782  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3671 (1.3191)  acc1: 74.0000 (74.7048)  acc5: 92.0000 (92.5143)  time: 0.2124  data: 0.1388  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3835 (1.3309)  acc1: 72.0000 (74.3680)  acc5: 90.4000 (92.2880)  time: 0.2262  data: 0.1537  max mem: 9147
Test: Total time: 0:00:10 (0.4086 s / it)
* Acc@1 74.232 Acc@5 92.488 loss 1.323
Accuracy of the model on the 50000 test images: 74.2%
Max accuracy: 74.40%
Epoch: [157]  [   0/1251]  eta: 1:04:44  lr: 0.002068  min_lr: 0.002068  loss: 4.2863 (4.2863)  weight_decay: 0.0500 (0.0500)  time: 3.1054  data: 1.7468  max mem: 9147
Epoch: [157]  [ 200/1251]  eta: 0:03:54  lr: 0.002064  min_lr: 0.002064  loss: 2.9400 (3.1282)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9064 (0.9119)  time: 0.1852  data: 0.0005  max mem: 9147
Epoch: [157]  [ 400/1251]  eta: 0:02:56  lr: 0.002061  min_lr: 0.002061  loss: 2.8577 (3.0986)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8969 (0.9008)  time: 0.1902  data: 0.0005  max mem: 9147
Epoch: [157]  [ 600/1251]  eta: 0:02:10  lr: 0.002057  min_lr: 0.002057  loss: 2.8864 (3.1056)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8212 (0.8967)  time: 0.1870  data: 0.0006  max mem: 9147
Epoch: [157]  [ 800/1251]  eta: 0:01:30  lr: 0.002053  min_lr: 0.002053  loss: 3.5830 (3.1315)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9313 (0.9089)  time: 0.1845  data: 0.0005  max mem: 9147
Epoch: [157]  [1000/1251]  eta: 0:00:49  lr: 0.002050  min_lr: 0.002050  loss: 3.0504 (3.1230)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8442 (0.9075)  time: 0.1785  data: 0.0005  max mem: 9147
Epoch: [157]  [1200/1251]  eta: 0:00:10  lr: 0.002046  min_lr: 0.002046  loss: 2.5310 (3.1244)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8945 (0.9011)  time: 0.2249  data: 0.0016  max mem: 9147
Epoch: [157]  [1250/1251]  eta: 0:00:00  lr: 0.002045  min_lr: 0.002045  loss: 2.7638 (3.1232)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8672 (0.9031)  time: 0.1503  data: 0.0007  max mem: 9147
Epoch: [157] Total time: 0:04:11 (0.2007 s / it)
Averaged stats: lr: 0.002045  min_lr: 0.002045  loss: 2.7638 (3.1029)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8672 (0.9031)
Test:  [ 0/25]  eta: 0:01:16  loss: 0.8720 (0.8720)  acc1: 84.4000 (84.4000)  acc5: 98.4000 (98.4000)  time: 3.0503  data: 2.9702  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 1.0758 (1.0681)  acc1: 78.4000 (78.8364)  acc5: 96.0000 (95.1636)  time: 0.5821  data: 0.4994  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2495 (1.2510)  acc1: 72.4000 (74.3048)  acc5: 92.0000 (92.6667)  time: 0.2846  data: 0.2060  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3814 (1.2676)  acc1: 72.0000 (73.6960)  acc5: 90.8000 (92.3360)  time: 0.2148  data: 0.1397  max mem: 9147
Test: Total time: 0:00:09 (0.3972 s / it)
* Acc@1 74.258 Acc@5 92.542 loss 1.258
Accuracy of the model on the 50000 test images: 74.3%
Max accuracy: 74.40%
Epoch: [158]  [   0/1251]  eta: 1:04:59  lr: 0.002045  min_lr: 0.002045  loss: 2.2450 (2.2450)  weight_decay: 0.0500 (0.0500)  time: 3.1169  data: 2.9250  max mem: 9147
Epoch: [158]  [ 200/1251]  eta: 0:03:49  lr: 0.002042  min_lr: 0.002042  loss: 2.4415 (3.0255)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0605 (0.9408)  time: 0.1945  data: 0.0007  max mem: 9147
Epoch: [158]  [ 400/1251]  eta: 0:03:03  lr: 0.002038  min_lr: 0.002038  loss: 2.5221 (3.0260)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9410 (0.9593)  time: 0.2095  data: 0.0007  max mem: 9147
Epoch: [158]  [ 600/1251]  eta: 0:02:21  lr: 0.002035  min_lr: 0.002035  loss: 2.8351 (3.0641)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7558 (0.9075)  time: 0.2025  data: 0.0008  max mem: 9147
Epoch: [158]  [ 800/1251]  eta: 0:01:37  lr: 0.002031  min_lr: 0.002031  loss: 2.5642 (3.1015)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8189 (0.8884)  time: 0.2005  data: 0.0007  max mem: 9147
Epoch: [158]  [1000/1251]  eta: 0:00:53  lr: 0.002027  min_lr: 0.002027  loss: 2.6596 (3.0971)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9246 (0.8963)  time: 0.1986  data: 0.0005  max mem: 9147
Epoch: [158]  [1200/1251]  eta: 0:00:10  lr: 0.002024  min_lr: 0.002024  loss: 2.5834 (3.0981)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9189 (0.9077)  time: 0.2345  data: 0.0007  max mem: 9147
Epoch: [158]  [1250/1251]  eta: 0:00:00  lr: 0.002023  min_lr: 0.002023  loss: 2.6554 (3.1000)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7839 (0.9050)  time: 0.1444  data: 0.0009  max mem: 9147
Epoch: [158] Total time: 0:04:23 (0.2108 s / it)
Averaged stats: lr: 0.002023  min_lr: 0.002023  loss: 2.6554 (3.1012)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7839 (0.9050)
Test:  [ 0/25]  eta: 0:02:10  loss: 0.8489 (0.8489)  acc1: 83.6000 (83.6000)  acc5: 98.0000 (98.0000)  time: 5.2371  data: 5.1467  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9912 (1.0537)  acc1: 80.0000 (79.2364)  acc5: 95.6000 (95.3818)  time: 0.7045  data: 0.6126  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2639 (1.2349)  acc1: 74.0000 (74.9524)  acc5: 92.8000 (92.7619)  time: 0.2064  data: 0.1256  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3347 (1.2465)  acc1: 72.4000 (74.5440)  acc5: 90.8000 (92.5600)  time: 0.2035  data: 0.1265  max mem: 9147
Test: Total time: 0:00:09 (0.3918 s / it)
* Acc@1 74.440 Acc@5 92.448 loss 1.243
Accuracy of the model on the 50000 test images: 74.4%
Max accuracy: 74.44%
Epoch: [159]  [   0/1251]  eta: 0:57:13  lr: 0.002023  min_lr: 0.002023  loss: 2.5073 (2.5073)  weight_decay: 0.0500 (0.0500)  time: 2.7449  data: 2.5073  max mem: 9147
Epoch: [159]  [ 200/1251]  eta: 0:03:55  lr: 0.002019  min_lr: 0.002019  loss: 2.9275 (3.1503)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8887 (0.9147)  time: 0.2292  data: 0.0006  max mem: 9147
Epoch: [159]  [ 400/1251]  eta: 0:03:06  lr: 0.002016  min_lr: 0.002016  loss: 2.4251 (3.2018)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8132 (0.8778)  time: 0.2093  data: 0.0008  max mem: 9147
Epoch: [159]  [ 600/1251]  eta: 0:02:21  lr: 0.002012  min_lr: 0.002012  loss: 2.6856 (3.1516)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7771 (0.8681)  time: 0.2101  data: 0.0006  max mem: 9147
Epoch: [159]  [ 800/1251]  eta: 0:01:37  lr: 0.002009  min_lr: 0.002009  loss: 2.8661 (3.1363)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8155 (0.8607)  time: 0.2147  data: 0.0150  max mem: 9147
Epoch: [159]  [1000/1251]  eta: 0:00:53  lr: 0.002005  min_lr: 0.002005  loss: 3.4916 (3.1143)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8588 (0.8652)  time: 0.1950  data: 0.0007  max mem: 9147
Epoch: [159]  [1200/1251]  eta: 0:00:10  lr: 0.002001  min_lr: 0.002001  loss: 2.7338 (3.1221)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8731 (0.8776)  time: 0.2143  data: 0.0008  max mem: 9147
Epoch: [159]  [1250/1251]  eta: 0:00:00  lr: 0.002001  min_lr: 0.002001  loss: 3.5429 (3.1227)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8534 (0.8778)  time: 0.1447  data: 0.0014  max mem: 9147
Epoch: [159] Total time: 0:04:24 (0.2111 s / it)
Averaged stats: lr: 0.002001  min_lr: 0.002001  loss: 3.5429 (3.0913)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8534 (0.8778)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.8921 (0.8921)  acc1: 86.4000 (86.4000)  acc5: 96.0000 (96.0000)  time: 5.6059  data: 5.5256  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9668 (1.0383)  acc1: 82.0000 (80.5818)  acc5: 95.6000 (95.4546)  time: 0.7557  data: 0.6786  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.3165 (1.2491)  acc1: 71.2000 (74.6667)  acc5: 92.0000 (92.6095)  time: 0.2127  data: 0.1391  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3754 (1.2641)  acc1: 70.8000 (74.3040)  acc5: 90.4000 (92.4320)  time: 0.2108  data: 0.1390  max mem: 9147
Test: Total time: 0:00:10 (0.4086 s / it)
* Acc@1 74.584 Acc@5 92.578 loss 1.254
Accuracy of the model on the 50000 test images: 74.6%
Max accuracy: 74.58%
Epoch: [160]  [   0/1251]  eta: 1:03:14  lr: 0.002001  min_lr: 0.002001  loss: 2.2248 (2.2248)  weight_decay: 0.0500 (0.0500)  time: 3.0329  data: 2.8396  max mem: 9147
Epoch: [160]  [ 200/1251]  eta: 0:03:45  lr: 0.001997  min_lr: 0.001997  loss: 2.6643 (3.1173)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7913 (0.9097)  time: 0.2373  data: 0.0008  max mem: 9147
Epoch: [160]  [ 400/1251]  eta: 0:03:03  lr: 0.001993  min_lr: 0.001993  loss: 2.7212 (3.1042)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8671 (0.9138)  time: 0.2189  data: 0.0007  max mem: 9147
Epoch: [160]  [ 600/1251]  eta: 0:02:20  lr: 0.001990  min_lr: 0.001990  loss: 2.5443 (3.0719)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9161 (0.9017)  time: 0.1996  data: 0.0006  max mem: 9147
Epoch: [160]  [ 800/1251]  eta: 0:01:35  lr: 0.001986  min_lr: 0.001986  loss: 2.5316 (3.0533)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8439 (0.8877)  time: 0.2001  data: 0.0006  max mem: 9147
Epoch: [160]  [1000/1251]  eta: 0:00:53  lr: 0.001983  min_lr: 0.001983  loss: 2.6096 (3.0593)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8073 (0.9002)  time: 0.2288  data: 0.0006  max mem: 9147
Epoch: [160]  [1200/1251]  eta: 0:00:10  lr: 0.001979  min_lr: 0.001979  loss: 2.6376 (3.0871)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9524 (0.9259)  time: 0.1814  data: 0.0005  max mem: 9147
Epoch: [160]  [1250/1251]  eta: 0:00:00  lr: 0.001978  min_lr: 0.001978  loss: 2.6129 (3.0790)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8737 (0.9247)  time: 0.1399  data: 0.0006  max mem: 9147
Epoch: [160] Total time: 0:04:21 (0.2094 s / it)
Averaged stats: lr: 0.001978  min_lr: 0.001978  loss: 2.6129 (3.1075)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8737 (0.9247)
Test:  [ 0/25]  eta: 0:02:13  loss: 0.8277 (0.8277)  acc1: 84.8000 (84.8000)  acc5: 96.8000 (96.8000)  time: 5.3497  data: 5.2492  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9800 (1.0116)  acc1: 81.6000 (79.6364)  acc5: 95.6000 (94.8727)  time: 0.6821  data: 0.5900  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2959 (1.2454)  acc1: 71.2000 (74.1143)  acc5: 91.2000 (91.9619)  time: 0.1914  data: 0.1105  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4346 (1.2549)  acc1: 68.4000 (73.6960)  acc5: 90.0000 (91.9040)  time: 0.2175  data: 0.1400  max mem: 9147
Test: Total time: 0:00:10 (0.4064 s / it)
* Acc@1 73.824 Acc@5 91.926 loss 1.254
Accuracy of the model on the 50000 test images: 73.8%
Max accuracy: 74.58%
Epoch: [161]  [   0/1251]  eta: 1:05:57  lr: 0.001978  min_lr: 0.001978  loss: 4.1864 (4.1864)  weight_decay: 0.0500 (0.0500)  time: 3.1636  data: 2.4826  max mem: 9147
Epoch: [161]  [ 200/1251]  eta: 0:03:47  lr: 0.001974  min_lr: 0.001974  loss: 2.4270 (3.0571)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9138 (0.9420)  time: 0.2052  data: 0.0007  max mem: 9147
Epoch: [161]  [ 400/1251]  eta: 0:03:00  lr: 0.001971  min_lr: 0.001971  loss: 2.7763 (3.0892)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9805 (0.9481)  time: 0.2103  data: 0.0062  max mem: 9147
Epoch: [161]  [ 600/1251]  eta: 0:02:18  lr: 0.001967  min_lr: 0.001967  loss: 2.9414 (3.1423)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9701 (0.9617)  time: 0.1966  data: 0.0007  max mem: 9147
Epoch: [161]  [ 800/1251]  eta: 0:01:34  lr: 0.001964  min_lr: 0.001964  loss: 2.6249 (3.1404)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8184 (0.9395)  time: 0.2056  data: 0.0006  max mem: 9147
Epoch: [161]  [1000/1251]  eta: 0:00:52  lr: 0.001960  min_lr: 0.001960  loss: 3.4021 (3.1503)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8024 (0.9139)  time: 0.2194  data: 0.0174  max mem: 9147
Epoch: [161]  [1200/1251]  eta: 0:00:10  lr: 0.001956  min_lr: 0.001956  loss: 2.5165 (3.1375)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8158 (0.9067)  time: 0.2086  data: 0.0008  max mem: 9147
Epoch: [161]  [1250/1251]  eta: 0:00:00  lr: 0.001956  min_lr: 0.001956  loss: 2.6560 (3.1397)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9495 (0.9058)  time: 0.1531  data: 0.0071  max mem: 9147
Epoch: [161] Total time: 0:04:24 (0.2113 s / it)
Averaged stats: lr: 0.001956  min_lr: 0.001956  loss: 2.6560 (3.1238)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9495 (0.9058)
Test:  [ 0/25]  eta: 0:02:09  loss: 0.8046 (0.8046)  acc1: 86.0000 (86.0000)  acc5: 97.6000 (97.6000)  time: 5.1788  data: 5.0984  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9594 (1.0014)  acc1: 79.6000 (79.2000)  acc5: 96.0000 (95.6364)  time: 0.6757  data: 0.5910  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2332 (1.2071)  acc1: 72.0000 (74.5524)  acc5: 92.0000 (92.7238)  time: 0.2023  data: 0.1249  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3331 (1.2138)  acc1: 72.0000 (74.5760)  acc5: 90.8000 (92.6240)  time: 0.2124  data: 0.1403  max mem: 9147
Test: Total time: 0:00:09 (0.3967 s / it)
* Acc@1 74.528 Acc@5 92.452 loss 1.214
Accuracy of the model on the 50000 test images: 74.5%
Max accuracy: 74.58%
Epoch: [162]  [   0/1251]  eta: 1:00:24  lr: 0.001956  min_lr: 0.001956  loss: 2.2852 (2.2852)  weight_decay: 0.0500 (0.0500)  time: 2.8971  data: 2.2777  max mem: 9147
Epoch: [162]  [ 200/1251]  eta: 0:03:55  lr: 0.001952  min_lr: 0.001952  loss: 3.7534 (3.1551)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8792 (1.0125)  time: 0.2046  data: 0.0006  max mem: 9147
Epoch: [162]  [ 400/1251]  eta: 0:03:06  lr: 0.001948  min_lr: 0.001948  loss: 2.7261 (3.0931)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9456 (0.9822)  time: 0.2003  data: 0.0008  max mem: 9147
Epoch: [162]  [ 600/1251]  eta: 0:02:21  lr: 0.001945  min_lr: 0.001945  loss: 2.4504 (3.0628)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8770 (0.9488)  time: 0.2402  data: 0.0007  max mem: 9147
Epoch: [162]  [ 800/1251]  eta: 0:01:37  lr: 0.001941  min_lr: 0.001941  loss: 2.8417 (3.0659)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7896 (0.9236)  time: 0.2197  data: 0.0007  max mem: 9147
Epoch: [162]  [1000/1251]  eta: 0:00:54  lr: 0.001938  min_lr: 0.001938  loss: 2.6716 (3.0572)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9424 (0.9320)  time: 0.2008  data: 0.0006  max mem: 9147
Epoch: [162]  [1200/1251]  eta: 0:00:11  lr: 0.001934  min_lr: 0.001934  loss: 2.7395 (3.0740)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8459 (0.9260)  time: 0.2198  data: 0.0007  max mem: 9147
Epoch: [162]  [1250/1251]  eta: 0:00:00  lr: 0.001933  min_lr: 0.001933  loss: 2.5216 (3.0753)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9192 (0.9239)  time: 0.1400  data: 0.0014  max mem: 9147
Epoch: [162] Total time: 0:04:29 (0.2153 s / it)
Averaged stats: lr: 0.001933  min_lr: 0.001933  loss: 2.5216 (3.0767)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9192 (0.9239)
Test:  [ 0/25]  eta: 0:02:28  loss: 0.7892 (0.7892)  acc1: 84.0000 (84.0000)  acc5: 96.8000 (96.8000)  time: 5.9523  data: 5.8718  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9322 (0.9820)  acc1: 79.6000 (78.9091)  acc5: 95.2000 (95.2727)  time: 0.7592  data: 0.6841  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2407 (1.1910)  acc1: 71.6000 (74.5714)  acc5: 92.0000 (92.7048)  time: 0.2172  data: 0.1443  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3087 (1.2047)  acc1: 70.8000 (73.9520)  acc5: 91.6000 (92.5280)  time: 0.2158  data: 0.1442  max mem: 9147
Test: Total time: 0:00:10 (0.4265 s / it)
* Acc@1 74.672 Acc@5 92.526 loss 1.198
Accuracy of the model on the 50000 test images: 74.7%
Max accuracy: 74.67%
Epoch: [163]  [   0/1251]  eta: 1:05:36  lr: 0.001933  min_lr: 0.001933  loss: 3.9865 (3.9865)  weight_decay: 0.0500 (0.0500)  time: 3.1466  data: 2.9355  max mem: 9147
Epoch: [163]  [ 200/1251]  eta: 0:03:56  lr: 0.001930  min_lr: 0.001930  loss: 2.5948 (3.0799)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9110 (0.9014)  time: 0.2201  data: 0.0007  max mem: 9147
Epoch: [163]  [ 400/1251]  eta: 0:03:07  lr: 0.001926  min_lr: 0.001926  loss: 3.0192 (3.1019)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9175 (0.9643)  time: 0.2200  data: 0.0007  max mem: 9147
Epoch: [163]  [ 600/1251]  eta: 0:02:23  lr: 0.001922  min_lr: 0.001922  loss: 2.4595 (3.0898)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8297 (0.9697)  time: 0.2153  data: 0.0022  max mem: 9147
Epoch: [163]  [ 800/1251]  eta: 0:01:38  lr: 0.001919  min_lr: 0.001919  loss: 2.5935 (3.0895)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8148 (0.9344)  time: 0.2118  data: 0.0007  max mem: 9147
Epoch: [163]  [1000/1251]  eta: 0:00:54  lr: 0.001915  min_lr: 0.001915  loss: 2.7742 (3.0970)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9689 (0.9388)  time: 0.2098  data: 0.0006  max mem: 9147
Epoch: [163]  [1200/1251]  eta: 0:00:11  lr: 0.001912  min_lr: 0.001912  loss: 2.7318 (3.0974)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9064 (0.9235)  time: 0.2110  data: 0.0007  max mem: 9147
Epoch: [163]  [1250/1251]  eta: 0:00:00  lr: 0.001911  min_lr: 0.001911  loss: 2.3839 (3.0965)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8260 (0.9205)  time: 0.1393  data: 0.0007  max mem: 9147
Epoch: [163] Total time: 0:04:32 (0.2178 s / it)
Averaged stats: lr: 0.001911  min_lr: 0.001911  loss: 2.3839 (3.0828)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8260 (0.9205)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.7969 (0.7969)  acc1: 83.2000 (83.2000)  acc5: 98.0000 (98.0000)  time: 5.6906  data: 5.6027  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9109 (0.9740)  acc1: 80.0000 (79.3455)  acc5: 96.0000 (95.3091)  time: 0.7632  data: 0.6868  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2354 (1.1740)  acc1: 72.8000 (74.6857)  acc5: 92.0000 (92.8571)  time: 0.1915  data: 0.1186  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2861 (1.1885)  acc1: 71.2000 (74.3360)  acc5: 91.6000 (92.6720)  time: 0.2016  data: 0.1307  max mem: 9147
Test: Total time: 0:00:10 (0.4050 s / it)
* Acc@1 74.592 Acc@5 92.552 loss 1.186
Accuracy of the model on the 50000 test images: 74.6%
Max accuracy: 74.67%
Epoch: [164]  [   0/1251]  eta: 1:06:08  lr: 0.001911  min_lr: 0.001911  loss: 4.0525 (4.0525)  weight_decay: 0.0500 (0.0500)  time: 3.1723  data: 2.2844  max mem: 9147
Epoch: [164]  [ 200/1251]  eta: 0:03:44  lr: 0.001907  min_lr: 0.001907  loss: 2.6966 (3.0054)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9315 (0.9463)  time: 0.2096  data: 0.0012  max mem: 9147
Epoch: [164]  [ 400/1251]  eta: 0:03:02  lr: 0.001904  min_lr: 0.001904  loss: 2.3856 (3.0270)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8994 (0.9226)  time: 0.2079  data: 0.0007  max mem: 9147
Epoch: [164]  [ 600/1251]  eta: 0:02:20  lr: 0.001900  min_lr: 0.001900  loss: 2.5473 (3.0463)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7312 (0.8997)  time: 0.2298  data: 0.0045  max mem: 9147
Epoch: [164]  [ 800/1251]  eta: 0:01:35  lr: 0.001896  min_lr: 0.001896  loss: 2.4948 (3.0330)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9289 (0.9063)  time: 0.1961  data: 0.0011  max mem: 9147
Epoch: [164]  [1000/1251]  eta: 0:00:51  lr: 0.001893  min_lr: 0.001893  loss: 2.6565 (3.0337)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0086 (0.9229)  time: 0.1947  data: 0.0010  max mem: 9147
Epoch: [164]  [1200/1251]  eta: 0:00:10  lr: 0.001889  min_lr: 0.001889  loss: 2.6659 (3.0542)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0002 (0.9326)  time: 0.2003  data: 0.0007  max mem: 9147
Epoch: [164]  [1250/1251]  eta: 0:00:00  lr: 0.001888  min_lr: 0.001888  loss: 2.4551 (3.0544)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9507 (0.9296)  time: 0.1468  data: 0.0010  max mem: 9147
Epoch: [164] Total time: 0:04:15 (0.2042 s / it)
Averaged stats: lr: 0.001888  min_lr: 0.001888  loss: 2.4551 (3.0779)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9507 (0.9296)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.7228 (0.7228)  acc1: 86.0000 (86.0000)  acc5: 98.4000 (98.4000)  time: 5.6255  data: 5.5452  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9172 (0.9665)  acc1: 81.6000 (79.6000)  acc5: 96.0000 (95.6364)  time: 0.7680  data: 0.6749  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2111 (1.1752)  acc1: 73.2000 (75.1619)  acc5: 92.0000 (92.8191)  time: 0.2207  data: 0.1380  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3199 (1.1928)  acc1: 73.2000 (74.6240)  acc5: 90.8000 (92.6400)  time: 0.2162  data: 0.1379  max mem: 9147
Test: Total time: 0:00:10 (0.4155 s / it)
* Acc@1 74.682 Acc@5 92.688 loss 1.190
Accuracy of the model on the 50000 test images: 74.7%
Max accuracy: 74.68%
Epoch: [165]  [   0/1251]  eta: 0:57:26  lr: 0.001888  min_lr: 0.001888  loss: 4.2464 (4.2464)  weight_decay: 0.0500 (0.0500)  time: 2.7546  data: 2.4733  max mem: 9147
Epoch: [165]  [ 200/1251]  eta: 0:03:53  lr: 0.001885  min_lr: 0.001885  loss: 2.4904 (3.0538)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8028 (0.9099)  time: 0.2255  data: 0.0006  max mem: 9147
Epoch: [165]  [ 400/1251]  eta: 0:02:56  lr: 0.001881  min_lr: 0.001881  loss: 2.5971 (3.0885)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8197 (0.8812)  time: 0.1924  data: 0.0006  max mem: 9147
Epoch: [165]  [ 600/1251]  eta: 0:02:13  lr: 0.001878  min_lr: 0.001878  loss: 2.4631 (3.0854)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7967 (0.8690)  time: 0.2111  data: 0.0008  max mem: 9147
Epoch: [165]  [ 800/1251]  eta: 0:01:33  lr: 0.001874  min_lr: 0.001874  loss: 3.1834 (3.0716)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8884 (0.8867)  time: 0.2189  data: 0.0006  max mem: 9147
Epoch: [165]  [1000/1251]  eta: 0:00:51  lr: 0.001870  min_lr: 0.001870  loss: 2.6601 (3.0617)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8331 (0.8973)  time: 0.2016  data: 0.0006  max mem: 9147
Epoch: [165]  [1200/1251]  eta: 0:00:10  lr: 0.001867  min_lr: 0.001867  loss: 2.5080 (3.0668)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9229 (0.9060)  time: 0.2347  data: 0.0304  max mem: 9147
Epoch: [165]  [1250/1251]  eta: 0:00:00  lr: 0.001866  min_lr: 0.001866  loss: 2.6849 (3.0685)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8037 (0.9031)  time: 0.1434  data: 0.0008  max mem: 9147
Epoch: [165] Total time: 0:04:20 (0.2083 s / it)
Averaged stats: lr: 0.001866  min_lr: 0.001866  loss: 2.6849 (3.0709)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8037 (0.9031)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.7785 (0.7785)  acc1: 84.8000 (84.8000)  acc5: 98.0000 (98.0000)  time: 5.6125  data: 5.5319  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9466 (0.9919)  acc1: 82.0000 (79.8545)  acc5: 95.2000 (95.4545)  time: 0.7457  data: 0.6518  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2647 (1.1917)  acc1: 72.0000 (74.9333)  acc5: 92.4000 (93.1048)  time: 0.1993  data: 0.1171  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3322 (1.2109)  acc1: 71.6000 (74.4800)  acc5: 91.2000 (92.8800)  time: 0.1988  data: 0.1221  max mem: 9147
Test: Total time: 0:00:10 (0.4023 s / it)
* Acc@1 74.618 Acc@5 92.428 loss 1.215
Accuracy of the model on the 50000 test images: 74.6%
Max accuracy: 74.68%
Epoch: [166]  [   0/1251]  eta: 1:03:40  lr: 0.001866  min_lr: 0.001866  loss: 2.5247 (2.5247)  weight_decay: 0.0500 (0.0500)  time: 3.0538  data: 2.7964  max mem: 9147
Epoch: [166]  [ 200/1251]  eta: 0:03:51  lr: 0.001862  min_lr: 0.001862  loss: 2.7308 (3.1380)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8574 (0.9471)  time: 0.2000  data: 0.0006  max mem: 9147
Epoch: [166]  [ 400/1251]  eta: 0:03:04  lr: 0.001859  min_lr: 0.001859  loss: 3.7181 (3.1262)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9871 (0.9424)  time: 0.2205  data: 0.0007  max mem: 9147
Epoch: [166]  [ 600/1251]  eta: 0:02:20  lr: 0.001855  min_lr: 0.001855  loss: 2.6538 (3.1165)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0794 (0.9448)  time: 0.2063  data: 0.0007  max mem: 9147
Epoch: [166]  [ 800/1251]  eta: 0:01:35  lr: 0.001852  min_lr: 0.001852  loss: 3.5115 (3.1012)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8707 (0.9332)  time: 0.2104  data: 0.0007  max mem: 9147
Epoch: [166]  [1000/1251]  eta: 0:00:52  lr: 0.001848  min_lr: 0.001848  loss: 2.7707 (3.1006)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7838 (0.9168)  time: 0.2004  data: 0.0006  max mem: 9147
Epoch: [166]  [1200/1251]  eta: 0:00:10  lr: 0.001844  min_lr: 0.001844  loss: 2.4671 (3.0991)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8786 (0.9095)  time: 0.2150  data: 0.0006  max mem: 9147
Epoch: [166]  [1250/1251]  eta: 0:00:00  lr: 0.001844  min_lr: 0.001844  loss: 2.8968 (3.0956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9178 (0.9119)  time: 0.1389  data: 0.0011  max mem: 9147
Epoch: [166] Total time: 0:04:22 (0.2097 s / it)
Averaged stats: lr: 0.001844  min_lr: 0.001844  loss: 2.8968 (3.0808)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9178 (0.9119)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.8033 (0.8033)  acc1: 85.2000 (85.2000)  acc5: 97.6000 (97.6000)  time: 5.5274  data: 5.4469  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0227 (0.9986)  acc1: 82.0000 (80.6545)  acc5: 96.0000 (95.6364)  time: 0.7476  data: 0.6717  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2256 (1.1972)  acc1: 74.4000 (75.6381)  acc5: 92.8000 (92.6667)  time: 0.2196  data: 0.1462  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3073 (1.2111)  acc1: 72.4000 (75.2480)  acc5: 91.2000 (92.5440)  time: 0.2179  data: 0.1461  max mem: 9147
Test: Total time: 0:00:10 (0.4110 s / it)
* Acc@1 75.112 Acc@5 92.680 loss 1.217
Accuracy of the model on the 50000 test images: 75.1%
Max accuracy: 75.11%
Epoch: [167]  [   0/1251]  eta: 0:56:31  lr: 0.001844  min_lr: 0.001844  loss: 3.7431 (3.7431)  weight_decay: 0.0500 (0.0500)  time: 2.7108  data: 2.5043  max mem: 9147
Epoch: [167]  [ 200/1251]  eta: 0:03:54  lr: 0.001840  min_lr: 0.001840  loss: 2.9066 (3.0807)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8412 (0.9189)  time: 0.2253  data: 0.0007  max mem: 9147
Epoch: [167]  [ 400/1251]  eta: 0:03:07  lr: 0.001836  min_lr: 0.001836  loss: 3.8544 (3.0766)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9060 (0.9321)  time: 0.2193  data: 0.0007  max mem: 9147
Epoch: [167]  [ 600/1251]  eta: 0:02:22  lr: 0.001833  min_lr: 0.001833  loss: 3.0440 (3.0977)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8722 (0.9369)  time: 0.2489  data: 0.0007  max mem: 9147
Epoch: [167]  [ 800/1251]  eta: 0:01:37  lr: 0.001829  min_lr: 0.001829  loss: 3.2982 (3.1096)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9067 (0.9225)  time: 0.2128  data: 0.0007  max mem: 9147
Epoch: [167]  [1000/1251]  eta: 0:00:54  lr: 0.001826  min_lr: 0.001826  loss: 2.4813 (3.1056)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8298 (0.9159)  time: 0.2206  data: 0.0007  max mem: 9147
Epoch: [167]  [1200/1251]  eta: 0:00:11  lr: 0.001822  min_lr: 0.001822  loss: 3.0971 (3.1055)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8952 (0.9190)  time: 0.2036  data: 0.0006  max mem: 9147
Epoch: [167]  [1250/1251]  eta: 0:00:00  lr: 0.001821  min_lr: 0.001821  loss: 2.5185 (3.1054)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8173 (0.9147)  time: 0.1492  data: 0.0015  max mem: 9147
Epoch: [167] Total time: 0:04:30 (0.2163 s / it)
Averaged stats: lr: 0.001821  min_lr: 0.001821  loss: 2.5185 (3.0685)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8173 (0.9147)
Test:  [ 0/25]  eta: 0:01:23  loss: 0.8133 (0.8133)  acc1: 84.8000 (84.8000)  acc5: 97.2000 (97.2000)  time: 3.3224  data: 3.2419  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 1.0122 (1.0022)  acc1: 80.4000 (79.6364)  acc5: 96.4000 (95.2000)  time: 0.5744  data: 0.4924  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2145 (1.2286)  acc1: 72.8000 (74.7619)  acc5: 91.6000 (92.8191)  time: 0.2631  data: 0.1862  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.4027 (1.2488)  acc1: 71.2000 (74.1920)  acc5: 91.2000 (92.5280)  time: 0.2319  data: 0.1586  max mem: 9147
Test: Total time: 0:00:09 (0.3979 s / it)
* Acc@1 74.692 Acc@5 92.540 loss 1.242
Accuracy of the model on the 50000 test images: 74.7%
Max accuracy: 75.11%
Epoch: [168]  [   0/1251]  eta: 1:05:29  lr: 0.001821  min_lr: 0.001821  loss: 2.2635 (2.2635)  weight_decay: 0.0500 (0.0500)  time: 3.1413  data: 2.2410  max mem: 9147
Epoch: [168]  [ 200/1251]  eta: 0:03:47  lr: 0.001818  min_lr: 0.001818  loss: 2.3840 (3.0312)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0169 (nan)  time: 0.2392  data: 0.0007  max mem: 9147
Epoch: [168]  [ 400/1251]  eta: 0:03:03  lr: 0.001814  min_lr: 0.001814  loss: 2.3805 (3.0182)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9057 (nan)  time: 0.2003  data: 0.0006  max mem: 9147
Epoch: [168]  [ 600/1251]  eta: 0:02:17  lr: 0.001811  min_lr: 0.001811  loss: 2.8799 (3.0326)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8587 (nan)  time: 0.1969  data: 0.0013  max mem: 9147
Epoch: [168]  [ 800/1251]  eta: 0:01:33  lr: 0.001807  min_lr: 0.001807  loss: 2.8389 (3.0494)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9618 (nan)  time: 0.2000  data: 0.0005  max mem: 9147
Epoch: [168]  [1000/1251]  eta: 0:00:52  lr: 0.001803  min_lr: 0.001803  loss: 2.6571 (3.0449)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8130 (nan)  time: 0.2241  data: 0.0007  max mem: 9147
Epoch: [168]  [1200/1251]  eta: 0:00:10  lr: 0.001800  min_lr: 0.001800  loss: 3.0174 (3.0467)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8506 (nan)  time: 0.1894  data: 0.0010  max mem: 9147
Epoch: [168]  [1250/1251]  eta: 0:00:00  lr: 0.001799  min_lr: 0.001799  loss: 2.9430 (3.0542)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9302 (nan)  time: 0.1388  data: 0.0016  max mem: 9147
Epoch: [168] Total time: 0:04:18 (0.2069 s / it)
Averaged stats: lr: 0.001799  min_lr: 0.001799  loss: 2.9430 (3.0484)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9302 (nan)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.7747 (0.7747)  acc1: 85.2000 (85.2000)  acc5: 98.0000 (98.0000)  time: 5.7162  data: 5.6358  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0171 (1.0246)  acc1: 80.8000 (79.1636)  acc5: 96.4000 (95.9273)  time: 0.7579  data: 0.6733  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2072 (1.2244)  acc1: 71.2000 (74.5333)  acc5: 92.8000 (93.2952)  time: 0.2070  data: 0.1293  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3304 (1.2310)  acc1: 70.8000 (74.3040)  acc5: 91.6000 (93.0720)  time: 0.2041  data: 0.1306  max mem: 9147
Test: Total time: 0:00:10 (0.4098 s / it)
* Acc@1 75.020 Acc@5 92.678 loss 1.229
Accuracy of the model on the 50000 test images: 75.0%
Max accuracy: 75.11%
Epoch: [169]  [   0/1251]  eta: 1:07:36  lr: 0.001799  min_lr: 0.001799  loss: 3.9359 (3.9359)  weight_decay: 0.0500 (0.0500)  time: 3.2427  data: 3.0507  max mem: 9147
Epoch: [169]  [ 200/1251]  eta: 0:03:55  lr: 0.001795  min_lr: 0.001795  loss: 2.3416 (3.0465)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9324 (1.0057)  time: 0.2212  data: 0.0006  max mem: 9147
Epoch: [169]  [ 400/1251]  eta: 0:03:02  lr: 0.001792  min_lr: 0.001792  loss: 2.5980 (3.0249)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7407 (0.9376)  time: 0.2046  data: 0.0006  max mem: 9147
Epoch: [169]  [ 600/1251]  eta: 0:02:20  lr: 0.001788  min_lr: 0.001788  loss: 3.4720 (3.0135)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9298 (0.9220)  time: 0.2226  data: 0.0008  max mem: 9147
Epoch: [169]  [ 800/1251]  eta: 0:01:36  lr: 0.001785  min_lr: 0.001785  loss: 2.5173 (3.0438)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8810 (0.9249)  time: 0.2150  data: 0.0022  max mem: 9147
Epoch: [169]  [1000/1251]  eta: 0:00:53  lr: 0.001781  min_lr: 0.001781  loss: 2.4320 (3.0382)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8226 (0.9232)  time: 0.2204  data: 0.0007  max mem: 9147
Epoch: [169]  [1200/1251]  eta: 0:00:10  lr: 0.001777  min_lr: 0.001777  loss: 3.6095 (3.0679)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9576 (0.9276)  time: 0.1994  data: 0.0005  max mem: 9147
Epoch: [169]  [1250/1251]  eta: 0:00:00  lr: 0.001777  min_lr: 0.001777  loss: 2.7139 (3.0707)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0752 (0.9322)  time: 0.1471  data: 0.0015  max mem: 9147
Epoch: [169] Total time: 0:04:27 (0.2141 s / it)
Averaged stats: lr: 0.001777  min_lr: 0.001777  loss: 2.7139 (3.0604)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0752 (0.9322)
Test:  [ 0/25]  eta: 0:02:24  loss: 0.9233 (0.9233)  acc1: 85.2000 (85.2000)  acc5: 96.0000 (96.0000)  time: 5.7683  data: 5.6715  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0005 (1.0420)  acc1: 80.8000 (79.6727)  acc5: 95.6000 (95.2727)  time: 0.7495  data: 0.6729  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2918 (1.2415)  acc1: 72.8000 (75.2952)  acc5: 92.4000 (92.8000)  time: 0.2076  data: 0.1350  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3913 (1.2549)  acc1: 72.0000 (74.7200)  acc5: 92.0000 (92.6400)  time: 0.2062  data: 0.1349  max mem: 9147
Test: Total time: 0:00:10 (0.4110 s / it)
* Acc@1 74.756 Acc@5 92.626 loss 1.257
Accuracy of the model on the 50000 test images: 74.8%
Max accuracy: 75.11%
Epoch: [170]  [   0/1251]  eta: 0:57:17  lr: 0.001777  min_lr: 0.001777  loss: 2.4184 (2.4184)  weight_decay: 0.0500 (0.0500)  time: 2.7481  data: 1.6383  max mem: 9147
Epoch: [170]  [ 200/1251]  eta: 0:03:59  lr: 0.001773  min_lr: 0.001773  loss: 2.4736 (3.0361)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8979 (0.8817)  time: 0.2392  data: 0.0009  max mem: 9147
Epoch: [170]  [ 400/1251]  eta: 0:03:08  lr: 0.001769  min_lr: 0.001769  loss: 3.0035 (3.1111)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7760 (0.8802)  time: 0.2066  data: 0.0006  max mem: 9147
Epoch: [170]  [ 600/1251]  eta: 0:02:20  lr: 0.001766  min_lr: 0.001766  loss: 2.5843 (3.1027)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9111 (0.9021)  time: 0.2287  data: 0.0008  max mem: 9147
Epoch: [170]  [ 800/1251]  eta: 0:01:37  lr: 0.001762  min_lr: 0.001762  loss: 2.3846 (3.0967)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8964 (0.9052)  time: 0.2187  data: 0.0006  max mem: 9147
Epoch: [170]  [1000/1251]  eta: 0:00:54  lr: 0.001759  min_lr: 0.001759  loss: 2.6314 (3.0750)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9152 (0.9077)  time: 0.2242  data: 0.0279  max mem: 9147
Epoch: [170]  [1200/1251]  eta: 0:00:10  lr: 0.001755  min_lr: 0.001755  loss: 2.6634 (3.0696)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8398 (0.9101)  time: 0.2198  data: 0.0007  max mem: 9147
Epoch: [170]  [1250/1251]  eta: 0:00:00  lr: 0.001754  min_lr: 0.001754  loss: 2.7750 (3.0729)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8209 (0.9099)  time: 0.1415  data: 0.0015  max mem: 9147
Epoch: [170] Total time: 0:04:29 (0.2151 s / it)
Averaged stats: lr: 0.001754  min_lr: 0.001754  loss: 2.7750 (3.0618)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8209 (0.9099)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.6958 (0.6958)  acc1: 86.0000 (86.0000)  acc5: 97.6000 (97.6000)  time: 5.6734  data: 5.5931  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9578 (0.9488)  acc1: 80.0000 (79.9636)  acc5: 95.6000 (95.5273)  time: 0.7385  data: 0.6575  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2243 (1.1649)  acc1: 73.2000 (75.2952)  acc5: 92.0000 (92.8952)  time: 0.1931  data: 0.1173  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2878 (1.1885)  acc1: 71.6000 (74.6400)  acc5: 91.2000 (92.7360)  time: 0.1961  data: 0.1221  max mem: 9147
Test: Total time: 0:00:10 (0.4017 s / it)
* Acc@1 74.842 Acc@5 92.664 loss 1.175
Accuracy of the model on the 50000 test images: 74.8%
Max accuracy: 75.11%
Epoch: [171]  [   0/1251]  eta: 1:02:35  lr: 0.001754  min_lr: 0.001754  loss: 2.6162 (2.6162)  weight_decay: 0.0500 (0.0500)  time: 3.0019  data: 2.1872  max mem: 9147
Epoch: [171]  [ 200/1251]  eta: 0:03:38  lr: 0.001751  min_lr: 0.001751  loss: 2.5777 (2.9242)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8317 (0.8821)  time: 0.2003  data: 0.0006  max mem: 9147
Epoch: [171]  [ 400/1251]  eta: 0:02:58  lr: 0.001747  min_lr: 0.001747  loss: 2.6395 (2.9583)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9651 (0.9322)  time: 0.1987  data: 0.0007  max mem: 9147
Epoch: [171]  [ 600/1251]  eta: 0:02:15  lr: 0.001744  min_lr: 0.001744  loss: 2.6870 (2.9842)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8554 (0.9303)  time: 0.2003  data: 0.0006  max mem: 9147
Epoch: [171]  [ 800/1251]  eta: 0:01:34  lr: 0.001740  min_lr: 0.001740  loss: 2.6157 (3.0170)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8472 (0.9501)  time: 0.2003  data: 0.0007  max mem: 9147
Epoch: [171]  [1000/1251]  eta: 0:00:52  lr: 0.001737  min_lr: 0.001737  loss: 2.7155 (3.0430)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8572 (0.9257)  time: 0.2245  data: 0.0008  max mem: 9147
Epoch: [171]  [1200/1251]  eta: 0:00:10  lr: 0.001733  min_lr: 0.001733  loss: 2.5020 (3.0534)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8579 (0.9178)  time: 0.2156  data: 0.0006  max mem: 9147
Epoch: [171]  [1250/1251]  eta: 0:00:00  lr: 0.001732  min_lr: 0.001732  loss: 2.5127 (3.0610)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7960 (0.9146)  time: 0.1392  data: 0.0009  max mem: 9147
Epoch: [171] Total time: 0:04:24 (0.2118 s / it)
Averaged stats: lr: 0.001732  min_lr: 0.001732  loss: 2.5127 (3.0756)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7960 (0.9146)
Test:  [ 0/25]  eta: 0:02:10  loss: 0.7449 (0.7449)  acc1: 85.6000 (85.6000)  acc5: 96.0000 (96.0000)  time: 5.2297  data: 5.1047  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9448 (0.9714)  acc1: 81.2000 (79.5636)  acc5: 96.0000 (95.2000)  time: 0.7360  data: 0.6388  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2146 (1.1825)  acc1: 74.4000 (75.3143)  acc5: 92.0000 (92.8762)  time: 0.2187  data: 0.1322  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3245 (1.1953)  acc1: 72.4000 (74.9440)  acc5: 91.2000 (92.6720)  time: 0.2127  data: 0.1322  max mem: 9147
Test: Total time: 0:00:09 (0.3985 s / it)
* Acc@1 75.226 Acc@5 92.834 loss 1.194
Accuracy of the model on the 50000 test images: 75.2%
Max accuracy: 75.23%
Epoch: [172]  [   0/1251]  eta: 0:57:25  lr: 0.001732  min_lr: 0.001732  loss: 2.2338 (2.2338)  weight_decay: 0.0500 (0.0500)  time: 2.7542  data: 2.5471  max mem: 9147
Epoch: [172]  [ 200/1251]  eta: 0:03:56  lr: 0.001729  min_lr: 0.001729  loss: 2.4569 (3.0109)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8476 (0.8656)  time: 0.2099  data: 0.0006  max mem: 9147
Epoch: [172]  [ 400/1251]  eta: 0:03:05  lr: 0.001725  min_lr: 0.001725  loss: 2.6047 (3.0495)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0429 (0.9003)  time: 0.2233  data: 0.0020  max mem: 9147
Epoch: [172]  [ 600/1251]  eta: 0:02:21  lr: 0.001721  min_lr: 0.001721  loss: 2.6097 (3.0521)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9560 (0.9575)  time: 0.1995  data: 0.0007  max mem: 9147
Epoch: [172]  [ 800/1251]  eta: 0:01:37  lr: 0.001718  min_lr: 0.001718  loss: 2.4944 (3.0462)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8511 (0.9583)  time: 0.2101  data: 0.0008  max mem: 9147
Epoch: [172]  [1000/1251]  eta: 0:00:54  lr: 0.001714  min_lr: 0.001714  loss: 2.7215 (3.0519)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9525 (0.9605)  time: 0.2203  data: 0.0006  max mem: 9147
Epoch: [172]  [1200/1251]  eta: 0:00:10  lr: 0.001711  min_lr: 0.001711  loss: 3.4596 (3.0613)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8487 (0.9573)  time: 0.2052  data: 0.0009  max mem: 9147
Epoch: [172]  [1250/1251]  eta: 0:00:00  lr: 0.001710  min_lr: 0.001710  loss: 3.0329 (3.0649)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8542 (0.9582)  time: 0.1483  data: 0.0011  max mem: 9147
Epoch: [172] Total time: 0:04:28 (0.2145 s / it)
Averaged stats: lr: 0.001710  min_lr: 0.001710  loss: 3.0329 (3.0749)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8542 (0.9582)
Test:  [ 0/25]  eta: 0:02:23  loss: 0.7185 (0.7185)  acc1: 88.0000 (88.0000)  acc5: 96.8000 (96.8000)  time: 5.7349  data: 5.6545  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9481 (0.9894)  acc1: 79.6000 (79.7091)  acc5: 96.0000 (95.5273)  time: 0.7898  data: 0.6942  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2312 (1.1944)  acc1: 72.4000 (75.1048)  acc5: 93.2000 (92.9524)  time: 0.2085  data: 0.1251  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3420 (1.2055)  acc1: 72.4000 (74.7840)  acc5: 90.4000 (92.8000)  time: 0.2078  data: 0.1303  max mem: 9147
Test: Total time: 0:00:10 (0.4142 s / it)
* Acc@1 75.112 Acc@5 92.666 loss 1.199
Accuracy of the model on the 50000 test images: 75.1%
Max accuracy: 75.23%
Epoch: [173]  [   0/1251]  eta: 1:04:11  lr: 0.001710  min_lr: 0.001710  loss: 2.9429 (2.9429)  weight_decay: 0.0500 (0.0500)  time: 3.0790  data: 2.5924  max mem: 9147
Epoch: [173]  [ 200/1251]  eta: 0:03:53  lr: 0.001706  min_lr: 0.001706  loss: 2.7425 (3.0432)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9540 (0.9583)  time: 0.2272  data: 0.0006  max mem: 9147
Epoch: [173]  [ 400/1251]  eta: 0:03:03  lr: 0.001703  min_lr: 0.001703  loss: 2.5118 (3.0127)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9292 (0.9525)  time: 0.1908  data: 0.0006  max mem: 9147
Epoch: [173]  [ 600/1251]  eta: 0:02:15  lr: 0.001699  min_lr: 0.001699  loss: 2.3901 (3.0290)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0575 (0.9814)  time: 0.1946  data: 0.0012  max mem: 9147
Epoch: [173]  [ 800/1251]  eta: 0:01:34  lr: 0.001696  min_lr: 0.001696  loss: 2.4968 (3.0270)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8552 (0.9651)  time: 0.2154  data: 0.0006  max mem: 9147
Epoch: [173]  [1000/1251]  eta: 0:00:52  lr: 0.001692  min_lr: 0.001692  loss: 2.8106 (3.0462)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8921 (0.9602)  time: 0.2290  data: 0.0013  max mem: 9147
Epoch: [173]  [1200/1251]  eta: 0:00:10  lr: 0.001689  min_lr: 0.001689  loss: 2.4868 (3.0329)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8930 (0.9496)  time: 0.2057  data: 0.0007  max mem: 9147
Epoch: [173]  [1250/1251]  eta: 0:00:00  lr: 0.001688  min_lr: 0.001688  loss: 2.6149 (3.0386)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9169 (0.9493)  time: 0.1400  data: 0.0013  max mem: 9147
Epoch: [173] Total time: 0:04:22 (0.2099 s / it)
Averaged stats: lr: 0.001688  min_lr: 0.001688  loss: 2.6149 (3.0568)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9169 (0.9493)
Test:  [ 0/25]  eta: 0:02:23  loss: 0.8046 (0.8046)  acc1: 85.2000 (85.2000)  acc5: 97.2000 (97.2000)  time: 5.7256  data: 5.6452  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0296 (1.0431)  acc1: 79.6000 (79.0909)  acc5: 95.2000 (95.2000)  time: 0.7539  data: 0.6767  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2729 (1.2301)  acc1: 72.8000 (75.2762)  acc5: 91.6000 (92.6667)  time: 0.1984  data: 0.1247  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3539 (1.2437)  acc1: 72.0000 (74.9440)  acc5: 91.2000 (92.6400)  time: 0.1956  data: 0.1246  max mem: 9147
Test: Total time: 0:00:10 (0.4040 s / it)
* Acc@1 75.174 Acc@5 92.810 loss 1.237
Accuracy of the model on the 50000 test images: 75.2%
Max accuracy: 75.23%
Epoch: [174]  [   0/1251]  eta: 1:05:59  lr: 0.001688  min_lr: 0.001688  loss: 2.5637 (2.5637)  weight_decay: 0.0500 (0.0500)  time: 3.1649  data: 2.8483  max mem: 9147
Epoch: [174]  [ 200/1251]  eta: 0:03:59  lr: 0.001684  min_lr: 0.001684  loss: 3.0866 (3.1364)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8290 (0.8762)  time: 0.2142  data: 0.0007  max mem: 9147
Epoch: [174]  [ 400/1251]  eta: 0:03:07  lr: 0.001681  min_lr: 0.001681  loss: 3.0333 (3.0862)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8498 (0.8814)  time: 0.2061  data: 0.0006  max mem: 9147
Epoch: [174]  [ 600/1251]  eta: 0:02:22  lr: 0.001677  min_lr: 0.001677  loss: 2.4946 (3.0918)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0587 (0.9124)  time: 0.2341  data: 0.0007  max mem: 9147
Epoch: [174]  [ 800/1251]  eta: 0:01:38  lr: 0.001674  min_lr: 0.001674  loss: 2.4985 (3.0763)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8307 (0.9133)  time: 0.2245  data: 0.0006  max mem: 9147
Epoch: [174]  [1000/1251]  eta: 0:00:54  lr: 0.001670  min_lr: 0.001670  loss: 2.7017 (3.0752)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1073 (0.9465)  time: 0.2293  data: 0.0290  max mem: 9147
Epoch: [174]  [1200/1251]  eta: 0:00:10  lr: 0.001666  min_lr: 0.001666  loss: 3.2515 (3.0735)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9587 (0.9494)  time: 0.2054  data: 0.0008  max mem: 9147
Epoch: [174]  [1250/1251]  eta: 0:00:00  lr: 0.001666  min_lr: 0.001666  loss: 2.5467 (3.0722)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9247 (0.9468)  time: 0.1504  data: 0.0017  max mem: 9147
Epoch: [174] Total time: 0:04:29 (0.2155 s / it)
Averaged stats: lr: 0.001666  min_lr: 0.001666  loss: 2.5467 (3.0543)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9247 (0.9468)
Test:  [ 0/25]  eta: 0:01:46  loss: 0.7141 (0.7141)  acc1: 86.8000 (86.8000)  acc5: 97.6000 (97.6000)  time: 4.2542  data: 4.1737  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9485 (0.9322)  acc1: 80.8000 (79.4545)  acc5: 96.0000 (95.7455)  time: 0.7080  data: 0.6156  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1601 (1.1346)  acc1: 73.6000 (75.3333)  acc5: 92.4000 (92.9524)  time: 0.2589  data: 0.1766  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2767 (1.1522)  acc1: 72.4000 (75.0080)  acc5: 91.2000 (92.7520)  time: 0.2157  data: 0.1366  max mem: 9147
Test: Total time: 0:00:10 (0.4160 s / it)
* Acc@1 75.336 Acc@5 92.876 loss 1.151
Accuracy of the model on the 50000 test images: 75.3%
Max accuracy: 75.34%
Epoch: [175]  [   0/1251]  eta: 0:58:50  lr: 0.001666  min_lr: 0.001666  loss: 2.1708 (2.1708)  weight_decay: 0.0500 (0.0500)  time: 2.8224  data: 2.5334  max mem: 9147
Epoch: [175]  [ 200/1251]  eta: 0:03:34  lr: 0.001662  min_lr: 0.001662  loss: 3.6067 (3.1475)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8615 (0.9836)  time: 0.1900  data: 0.0005  max mem: 9147
Epoch: [175]  [ 400/1251]  eta: 0:02:47  lr: 0.001658  min_lr: 0.001658  loss: 2.6985 (3.0752)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8259 (0.9689)  time: 0.1947  data: 0.0015  max mem: 9147
Epoch: [175]  [ 600/1251]  eta: 0:02:06  lr: 0.001655  min_lr: 0.001655  loss: 2.4288 (3.0528)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9399 (0.9651)  time: 0.1857  data: 0.0014  max mem: 9147
Epoch: [175]  [ 800/1251]  eta: 0:01:29  lr: 0.001651  min_lr: 0.001651  loss: 2.5503 (3.0684)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9168 (0.9566)  time: 0.2109  data: 0.0007  max mem: 9147
Epoch: [175]  [1000/1251]  eta: 0:00:50  lr: 0.001648  min_lr: 0.001648  loss: 3.1971 (3.0713)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0131 (0.9516)  time: 0.2303  data: 0.0007  max mem: 9147
Epoch: [175]  [1200/1251]  eta: 0:00:10  lr: 0.001644  min_lr: 0.001644  loss: 3.8637 (3.0750)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0114 (0.9638)  time: 0.2186  data: 0.0007  max mem: 9147
Epoch: [175]  [1250/1251]  eta: 0:00:00  lr: 0.001644  min_lr: 0.001644  loss: 2.4552 (3.0712)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9485 (0.9626)  time: 0.1503  data: 0.0010  max mem: 9147
Epoch: [175] Total time: 0:04:16 (0.2048 s / it)
Averaged stats: lr: 0.001644  min_lr: 0.001644  loss: 2.4552 (3.0534)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9485 (0.9626)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.6980 (0.6980)  acc1: 85.6000 (85.6000)  acc5: 98.4000 (98.4000)  time: 5.5972  data: 5.5164  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9023 (0.9433)  acc1: 81.2000 (79.8545)  acc5: 96.0000 (95.4545)  time: 0.7367  data: 0.6528  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2600 (1.1472)  acc1: 72.8000 (75.5429)  acc5: 91.2000 (93.0476)  time: 0.2038  data: 0.1262  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2803 (1.1601)  acc1: 72.4000 (75.0400)  acc5: 91.2000 (92.8160)  time: 0.1989  data: 0.1262  max mem: 9147
Test: Total time: 0:00:10 (0.4013 s / it)
* Acc@1 75.028 Acc@5 92.840 loss 1.158
Accuracy of the model on the 50000 test images: 75.0%
Max accuracy: 75.34%
Epoch: [176]  [   0/1251]  eta: 1:03:48  lr: 0.001643  min_lr: 0.001643  loss: 3.1768 (3.1768)  weight_decay: 0.0500 (0.0500)  time: 3.0607  data: 2.3844  max mem: 9147
Epoch: [176]  [ 200/1251]  eta: 0:03:59  lr: 0.001640  min_lr: 0.001640  loss: 2.7277 (3.0404)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8384 (0.9024)  time: 0.2257  data: 0.0007  max mem: 9147
Epoch: [176]  [ 400/1251]  eta: 0:03:09  lr: 0.001636  min_lr: 0.001636  loss: 2.7869 (3.0369)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8362 (0.9213)  time: 0.2198  data: 0.0007  max mem: 9147
Epoch: [176]  [ 600/1251]  eta: 0:02:21  lr: 0.001633  min_lr: 0.001633  loss: 2.5340 (3.0095)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9567 (0.9423)  time: 0.2236  data: 0.0006  max mem: 9147
Epoch: [176]  [ 800/1251]  eta: 0:01:37  lr: 0.001629  min_lr: 0.001629  loss: 2.4707 (3.0036)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8903 (0.9503)  time: 0.2001  data: 0.0007  max mem: 9147
Epoch: [176]  [1000/1251]  eta: 0:00:54  lr: 0.001626  min_lr: 0.001626  loss: 2.7685 (2.9968)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9230 (0.9515)  time: 0.2088  data: 0.0006  max mem: 9147
Epoch: [176]  [1200/1251]  eta: 0:00:10  lr: 0.001622  min_lr: 0.001622  loss: 2.4644 (3.0165)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8392 (0.9393)  time: 0.2095  data: 0.0007  max mem: 9147
Epoch: [176]  [1250/1251]  eta: 0:00:00  lr: 0.001621  min_lr: 0.001621  loss: 2.5646 (3.0205)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8009 (0.9366)  time: 0.1480  data: 0.0009  max mem: 9147
Epoch: [176] Total time: 0:04:28 (0.2146 s / it)
Averaged stats: lr: 0.001621  min_lr: 0.001621  loss: 2.5646 (3.0434)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8009 (0.9366)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.7211 (0.7211)  acc1: 84.4000 (84.4000)  acc5: 98.4000 (98.4000)  time: 5.4528  data: 5.3246  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8667 (0.9298)  acc1: 81.6000 (80.2182)  acc5: 95.6000 (95.3455)  time: 0.7491  data: 0.6580  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1767 (1.1271)  acc1: 74.4000 (76.0571)  acc5: 92.0000 (92.8952)  time: 0.2128  data: 0.1338  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2582 (1.1409)  acc1: 74.0000 (75.5040)  acc5: 90.8000 (92.6400)  time: 0.2067  data: 0.1337  max mem: 9147
Test: Total time: 0:00:10 (0.4032 s / it)
* Acc@1 75.326 Acc@5 92.854 loss 1.140
Accuracy of the model on the 50000 test images: 75.3%
Max accuracy: 75.34%
Epoch: [177]  [   0/1251]  eta: 1:01:42  lr: 0.001621  min_lr: 0.001621  loss: 3.9457 (3.9457)  weight_decay: 0.0500 (0.0500)  time: 2.9598  data: 2.3810  max mem: 9147
Epoch: [177]  [ 200/1251]  eta: 0:03:58  lr: 0.001618  min_lr: 0.001618  loss: 2.9588 (2.9922)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9031 (0.9829)  time: 0.2213  data: 0.0007  max mem: 9147
Epoch: [177]  [ 400/1251]  eta: 0:03:04  lr: 0.001614  min_lr: 0.001614  loss: 2.5814 (3.0435)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8199 (0.9498)  time: 0.2148  data: 0.0007  max mem: 9147
Epoch: [177]  [ 600/1251]  eta: 0:02:20  lr: 0.001611  min_lr: 0.001611  loss: 3.4092 (3.0594)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8976 (0.9510)  time: 0.2051  data: 0.0008  max mem: 9147
Epoch: [177]  [ 800/1251]  eta: 0:01:36  lr: 0.001607  min_lr: 0.001607  loss: 2.5019 (3.0482)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8977 (0.9588)  time: 0.2103  data: 0.0007  max mem: 9147
Epoch: [177]  [1000/1251]  eta: 0:00:53  lr: 0.001604  min_lr: 0.001604  loss: 3.0922 (3.0415)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9537 (0.9629)  time: 0.1893  data: 0.0006  max mem: 9147
Epoch: [177]  [1200/1251]  eta: 0:00:10  lr: 0.001600  min_lr: 0.001600  loss: 2.5381 (3.0344)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8874 (0.9629)  time: 0.2099  data: 0.0007  max mem: 9147
Epoch: [177]  [1250/1251]  eta: 0:00:00  lr: 0.001599  min_lr: 0.001599  loss: 2.5233 (3.0320)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9500 (0.9604)  time: 0.1499  data: 0.0017  max mem: 9147
Epoch: [177] Total time: 0:04:20 (0.2085 s / it)
Averaged stats: lr: 0.001599  min_lr: 0.001599  loss: 2.5233 (3.0388)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9500 (0.9604)
Test:  [ 0/25]  eta: 0:02:11  loss: 0.7732 (0.7732)  acc1: 85.2000 (85.2000)  acc5: 97.6000 (97.6000)  time: 5.2703  data: 5.1898  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9539 (0.9655)  acc1: 81.6000 (79.9273)  acc5: 96.0000 (95.8182)  time: 0.7119  data: 0.6181  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1999 (1.1709)  acc1: 72.4000 (75.4857)  acc5: 92.4000 (92.9524)  time: 0.2131  data: 0.1296  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2879 (1.1816)  acc1: 72.0000 (75.2320)  acc5: 91.2000 (92.8000)  time: 0.2290  data: 0.1508  max mem: 9147
Test: Total time: 0:00:10 (0.4124 s / it)
* Acc@1 75.152 Acc@5 92.690 loss 1.183
Accuracy of the model on the 50000 test images: 75.2%
Max accuracy: 75.34%
Epoch: [178]  [   0/1251]  eta: 1:06:20  lr: 0.001599  min_lr: 0.001599  loss: 2.7306 (2.7306)  weight_decay: 0.0500 (0.0500)  time: 3.1816  data: 1.8125  max mem: 9147
Epoch: [178]  [ 200/1251]  eta: 0:04:00  lr: 0.001596  min_lr: 0.001596  loss: 2.5812 (3.0822)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9707 (0.9303)  time: 0.2141  data: 0.0013  max mem: 9147
Epoch: [178]  [ 400/1251]  eta: 0:03:00  lr: 0.001592  min_lr: 0.001592  loss: 2.4810 (3.0379)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8874 (0.9041)  time: 0.1983  data: 0.0012  max mem: 9147
Epoch: [178]  [ 600/1251]  eta: 0:02:14  lr: 0.001589  min_lr: 0.001589  loss: 3.6135 (3.0755)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7940 (0.9064)  time: 0.1984  data: 0.0006  max mem: 9147
Epoch: [178]  [ 800/1251]  eta: 0:01:32  lr: 0.001585  min_lr: 0.001585  loss: 2.4000 (3.0561)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9604 (0.9217)  time: 0.2103  data: 0.0005  max mem: 9147
Epoch: [178]  [1000/1251]  eta: 0:00:52  lr: 0.001582  min_lr: 0.001582  loss: 2.8290 (3.0554)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8832 (0.9182)  time: 0.2184  data: 0.0006  max mem: 9147
Epoch: [178]  [1200/1251]  eta: 0:00:10  lr: 0.001578  min_lr: 0.001578  loss: 2.9259 (3.0510)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0089 (0.9367)  time: 0.2091  data: 0.0007  max mem: 9147
Epoch: [178]  [1250/1251]  eta: 0:00:00  lr: 0.001578  min_lr: 0.001578  loss: 2.6685 (3.0497)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1040 (0.9458)  time: 0.1490  data: 0.0030  max mem: 9147
Epoch: [178] Total time: 0:04:17 (0.2055 s / it)
Averaged stats: lr: 0.001578  min_lr: 0.001578  loss: 2.6685 (3.0463)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1040 (0.9458)
Test:  [ 0/25]  eta: 0:01:57  loss: 0.7615 (0.7615)  acc1: 86.0000 (86.0000)  acc5: 98.0000 (98.0000)  time: 4.6941  data: 4.5847  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9949 (0.9958)  acc1: 80.4000 (79.9273)  acc5: 96.0000 (95.4909)  time: 0.6801  data: 0.6020  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2430 (1.1972)  acc1: 74.0000 (75.2191)  acc5: 92.0000 (93.0095)  time: 0.2335  data: 0.1614  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2716 (1.2125)  acc1: 74.0000 (74.8000)  acc5: 90.8000 (92.6880)  time: 0.2110  data: 0.1407  max mem: 9147
Test: Total time: 0:00:09 (0.3993 s / it)
* Acc@1 75.120 Acc@5 92.872 loss 1.209
Accuracy of the model on the 50000 test images: 75.1%
Max accuracy: 75.34%
Epoch: [179]  [   0/1251]  eta: 1:04:32  lr: 0.001577  min_lr: 0.001577  loss: 4.0092 (4.0092)  weight_decay: 0.0500 (0.0500)  time: 3.0952  data: 1.7784  max mem: 9147
Epoch: [179]  [ 200/1251]  eta: 0:03:47  lr: 0.001574  min_lr: 0.001574  loss: 2.4073 (3.0832)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9069 (1.0110)  time: 0.2000  data: 0.0009  max mem: 9147
Epoch: [179]  [ 400/1251]  eta: 0:02:55  lr: 0.001570  min_lr: 0.001570  loss: 2.7199 (3.0180)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8074 (0.9588)  time: 0.2035  data: 0.0006  max mem: 9147
Epoch: [179]  [ 600/1251]  eta: 0:02:14  lr: 0.001567  min_lr: 0.001567  loss: 2.4023 (3.0096)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8856 (0.9322)  time: 0.1900  data: 0.0005  max mem: 9147
Epoch: [179]  [ 800/1251]  eta: 0:01:31  lr: 0.001563  min_lr: 0.001563  loss: 2.8464 (3.0004)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8832 (0.9414)  time: 0.1952  data: 0.0006  max mem: 9147
Epoch: [179]  [1000/1251]  eta: 0:00:50  lr: 0.001560  min_lr: 0.001560  loss: 3.0173 (3.0085)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9602 (0.9532)  time: 0.2005  data: 0.0008  max mem: 9147
Epoch: [179]  [1200/1251]  eta: 0:00:10  lr: 0.001556  min_lr: 0.001556  loss: 2.5041 (3.0011)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8448 (0.9404)  time: 0.1997  data: 0.0006  max mem: 9147
Epoch: [179]  [1250/1251]  eta: 0:00:00  lr: 0.001556  min_lr: 0.001556  loss: 2.6282 (3.0086)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8328 (0.9366)  time: 0.1404  data: 0.0013  max mem: 9147
Epoch: [179] Total time: 0:04:12 (0.2022 s / it)
Averaged stats: lr: 0.001556  min_lr: 0.001556  loss: 2.6282 (3.0311)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8328 (0.9366)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.8596 (0.8596)  acc1: 84.4000 (84.4000)  acc5: 97.6000 (97.6000)  time: 5.5718  data: 5.4915  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9626 (1.0089)  acc1: 80.8000 (79.9273)  acc5: 96.0000 (95.6364)  time: 0.7444  data: 0.6689  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2625 (1.1859)  acc1: 72.8000 (75.7143)  acc5: 91.2000 (93.2571)  time: 0.2088  data: 0.1364  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2873 (1.1949)  acc1: 71.2000 (75.1360)  acc5: 91.2000 (93.1200)  time: 0.2073  data: 0.1363  max mem: 9147
Test: Total time: 0:00:10 (0.4040 s / it)
* Acc@1 75.350 Acc@5 92.888 loss 1.194
Accuracy of the model on the 50000 test images: 75.4%
Max accuracy: 75.35%
Epoch: [180]  [   0/1251]  eta: 0:57:18  lr: 0.001556  min_lr: 0.001556  loss: 2.5569 (2.5569)  weight_decay: 0.0500 (0.0500)  time: 2.7483  data: 2.4473  max mem: 9147
Epoch: [180]  [ 200/1251]  eta: 0:03:43  lr: 0.001552  min_lr: 0.001552  loss: 2.4885 (3.0635)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8836 (1.0368)  time: 0.1940  data: 0.0008  max mem: 9147
Epoch: [180]  [ 400/1251]  eta: 0:02:52  lr: 0.001549  min_lr: 0.001549  loss: 2.5508 (3.0728)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8434 (0.9762)  time: 0.1929  data: 0.0011  max mem: 9147
Epoch: [180]  [ 600/1251]  eta: 0:02:11  lr: 0.001545  min_lr: 0.001545  loss: 2.9631 (3.0816)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9069 (0.9799)  time: 0.2053  data: 0.0007  max mem: 9147
Epoch: [180]  [ 800/1251]  eta: 0:01:32  lr: 0.001542  min_lr: 0.001542  loss: 2.5458 (3.0392)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9283 (0.9812)  time: 0.2147  data: 0.0007  max mem: 9147
Epoch: [180]  [1000/1251]  eta: 0:00:51  lr: 0.001538  min_lr: 0.001538  loss: 2.5294 (3.0398)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9436 (0.9931)  time: 0.2155  data: 0.0007  max mem: 9147
Epoch: [180]  [1200/1251]  eta: 0:00:10  lr: 0.001535  min_lr: 0.001535  loss: 2.5661 (3.0487)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9456 (inf)  time: 0.2000  data: 0.0005  max mem: 9147
Epoch: [180]  [1250/1251]  eta: 0:00:00  lr: 0.001534  min_lr: 0.001534  loss: 2.8386 (3.0473)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8584 (inf)  time: 0.1399  data: 0.0012  max mem: 9147
Epoch: [180] Total time: 0:04:19 (0.2075 s / it)
Averaged stats: lr: 0.001534  min_lr: 0.001534  loss: 2.8386 (3.0308)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8584 (inf)
Test:  [ 0/25]  eta: 0:01:39  loss: 0.8147 (0.8147)  acc1: 85.6000 (85.6000)  acc5: 98.0000 (98.0000)  time: 3.9620  data: 3.8557  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 0.9292 (0.9792)  acc1: 80.0000 (79.5636)  acc5: 96.4000 (95.9273)  time: 0.6004  data: 0.5224  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2332 (1.1988)  acc1: 73.6000 (75.2000)  acc5: 93.2000 (93.0095)  time: 0.2381  data: 0.1632  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3540 (1.2158)  acc1: 71.6000 (74.8000)  acc5: 90.8000 (92.7040)  time: 0.2086  data: 0.1356  max mem: 9147
Test: Total time: 0:00:09 (0.3988 s / it)
* Acc@1 75.442 Acc@5 92.766 loss 1.205
Accuracy of the model on the 50000 test images: 75.4%
Max accuracy: 75.44%
Epoch: [181]  [   0/1251]  eta: 1:04:21  lr: 0.001534  min_lr: 0.001534  loss: 2.5715 (2.5715)  weight_decay: 0.0500 (0.0500)  time: 3.0870  data: 2.8526  max mem: 9147
Epoch: [181]  [ 200/1251]  eta: 0:03:46  lr: 0.001530  min_lr: 0.001530  loss: 2.3422 (2.9982)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8695 (1.0386)  time: 0.2264  data: 0.0007  max mem: 9147
Epoch: [181]  [ 400/1251]  eta: 0:02:59  lr: 0.001527  min_lr: 0.001527  loss: 2.7781 (2.9913)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0892 (1.0777)  time: 0.1912  data: 0.0018  max mem: 9147
Epoch: [181]  [ 600/1251]  eta: 0:02:17  lr: 0.001523  min_lr: 0.001523  loss: 2.4177 (2.9853)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0155 (1.0497)  time: 0.2248  data: 0.0006  max mem: 9147
Epoch: [181]  [ 800/1251]  eta: 0:01:35  lr: 0.001520  min_lr: 0.001520  loss: 2.4392 (3.0153)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8870 (1.0251)  time: 0.2292  data: 0.0064  max mem: 9147
Epoch: [181]  [1000/1251]  eta: 0:00:52  lr: 0.001516  min_lr: 0.001516  loss: 2.9341 (3.0181)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8079 (0.9995)  time: 0.2252  data: 0.0008  max mem: 9147
Epoch: [181]  [1200/1251]  eta: 0:00:10  lr: 0.001513  min_lr: 0.001513  loss: 2.8826 (3.0235)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9062 (0.9905)  time: 0.2113  data: 0.0007  max mem: 9147
Epoch: [181]  [1250/1251]  eta: 0:00:00  lr: 0.001512  min_lr: 0.001512  loss: 2.4952 (3.0210)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8833 (0.9910)  time: 0.1494  data: 0.0030  max mem: 9147
Epoch: [181] Total time: 0:04:25 (0.2121 s / it)
Averaged stats: lr: 0.001512  min_lr: 0.001512  loss: 2.4952 (3.0268)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8833 (0.9910)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.7225 (0.7225)  acc1: 86.4000 (86.4000)  acc5: 98.0000 (98.0000)  time: 5.6403  data: 5.5597  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8615 (0.8960)  acc1: 81.6000 (80.3273)  acc5: 95.6000 (95.7818)  time: 0.7581  data: 0.6728  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1468 (1.1138)  acc1: 72.0000 (75.5238)  acc5: 92.0000 (93.2571)  time: 0.2214  data: 0.1430  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2623 (1.1373)  acc1: 71.6000 (74.9920)  acc5: 90.8000 (93.0080)  time: 0.2156  data: 0.1421  max mem: 9147
Test: Total time: 0:00:10 (0.4172 s / it)
* Acc@1 75.446 Acc@5 92.914 loss 1.124
Accuracy of the model on the 50000 test images: 75.4%
Max accuracy: 75.45%
Epoch: [182]  [   0/1251]  eta: 0:58:30  lr: 0.001512  min_lr: 0.001512  loss: 2.3368 (2.3368)  weight_decay: 0.0500 (0.0500)  time: 2.8060  data: 2.5937  max mem: 9147
Epoch: [182]  [ 200/1251]  eta: 0:03:51  lr: 0.001508  min_lr: 0.001508  loss: 2.5212 (2.8963)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0185 (0.9126)  time: 0.2092  data: 0.0103  max mem: 9147
Epoch: [182]  [ 400/1251]  eta: 0:02:55  lr: 0.001505  min_lr: 0.001505  loss: 3.8024 (2.9294)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8257 (0.9600)  time: 0.1837  data: 0.0005  max mem: 9147
Epoch: [182]  [ 600/1251]  eta: 0:02:11  lr: 0.001501  min_lr: 0.001501  loss: 2.5254 (2.9670)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8686 (0.9679)  time: 0.2011  data: 0.0007  max mem: 9147
Epoch: [182]  [ 800/1251]  eta: 0:01:32  lr: 0.001498  min_lr: 0.001498  loss: 2.3280 (2.9752)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8372 (0.9487)  time: 0.2251  data: 0.0006  max mem: 9147
Epoch: [182]  [1000/1251]  eta: 0:00:52  lr: 0.001495  min_lr: 0.001495  loss: 2.4138 (2.9696)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8292 (0.9335)  time: 0.2355  data: 0.0008  max mem: 9147
Epoch: [182]  [1200/1251]  eta: 0:00:10  lr: 0.001491  min_lr: 0.001491  loss: 3.2864 (2.9897)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9531 (0.9458)  time: 0.1893  data: 0.0006  max mem: 9147
Epoch: [182]  [1250/1251]  eta: 0:00:00  lr: 0.001490  min_lr: 0.001490  loss: 2.4890 (2.9891)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0842 (0.9530)  time: 0.1396  data: 0.0009  max mem: 9147
Epoch: [182] Total time: 0:04:17 (0.2058 s / it)
Averaged stats: lr: 0.001490  min_lr: 0.001490  loss: 2.4890 (3.0259)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0842 (0.9530)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.6654 (0.6654)  acc1: 86.8000 (86.8000)  acc5: 98.8000 (98.8000)  time: 5.6718  data: 5.5797  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8664 (0.9068)  acc1: 81.6000 (79.4545)  acc5: 96.0000 (95.5636)  time: 0.7759  data: 0.6970  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1681 (1.1026)  acc1: 74.4000 (75.5429)  acc5: 92.4000 (93.2000)  time: 0.2100  data: 0.1366  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2583 (1.1186)  acc1: 72.8000 (75.0880)  acc5: 92.0000 (93.0080)  time: 0.2075  data: 0.1365  max mem: 9147
Test: Total time: 0:00:10 (0.4096 s / it)
* Acc@1 75.640 Acc@5 92.980 loss 1.116
Accuracy of the model on the 50000 test images: 75.6%
Max accuracy: 75.64%
Epoch: [183]  [   0/1251]  eta: 1:00:05  lr: 0.001490  min_lr: 0.001490  loss: 2.4963 (2.4963)  weight_decay: 0.0500 (0.0500)  time: 2.8822  data: 2.6601  max mem: 9147
Epoch: [183]  [ 200/1251]  eta: 0:03:57  lr: 0.001487  min_lr: 0.001487  loss: 2.6111 (3.0875)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8907 (0.9429)  time: 0.2244  data: 0.0007  max mem: 9147
Epoch: [183]  [ 400/1251]  eta: 0:03:07  lr: 0.001483  min_lr: 0.001483  loss: 2.5910 (3.0197)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0311 (0.9820)  time: 0.1928  data: 0.0005  max mem: 9147
Epoch: [183]  [ 600/1251]  eta: 0:02:21  lr: 0.001480  min_lr: 0.001480  loss: 2.4145 (3.0435)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9182 (0.9495)  time: 0.2154  data: 0.0093  max mem: 9147
Epoch: [183]  [ 800/1251]  eta: 0:01:35  lr: 0.001476  min_lr: 0.001476  loss: 2.6851 (3.0536)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8451 (0.9398)  time: 0.1950  data: 0.0005  max mem: 9147
Epoch: [183]  [1000/1251]  eta: 0:00:52  lr: 0.001473  min_lr: 0.001473  loss: 2.3906 (3.0533)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9543 (0.9632)  time: 0.1955  data: 0.0006  max mem: 9147
Epoch: [183]  [1200/1251]  eta: 0:00:10  lr: 0.001469  min_lr: 0.001469  loss: 2.5126 (3.0477)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9238 (0.9651)  time: 0.2194  data: 0.0007  max mem: 9147
Epoch: [183]  [1250/1251]  eta: 0:00:00  lr: 0.001469  min_lr: 0.001469  loss: 2.6673 (3.0473)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8955 (0.9630)  time: 0.1405  data: 0.0006  max mem: 9147
Epoch: [183] Total time: 0:04:20 (0.2080 s / it)
Averaged stats: lr: 0.001469  min_lr: 0.001469  loss: 2.6673 (3.0199)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8955 (0.9630)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.7355 (0.7355)  acc1: 84.8000 (84.8000)  acc5: 98.4000 (98.4000)  time: 5.6455  data: 5.5652  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9527 (0.9795)  acc1: 81.6000 (79.9636)  acc5: 95.2000 (95.5636)  time: 0.7544  data: 0.6652  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2252 (1.1670)  acc1: 73.6000 (75.6381)  acc5: 92.8000 (93.1619)  time: 0.2095  data: 0.1296  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2506 (1.1826)  acc1: 71.6000 (75.1840)  acc5: 91.2000 (92.8480)  time: 0.2185  data: 0.1446  max mem: 9147
Test: Total time: 0:00:10 (0.4196 s / it)
* Acc@1 75.668 Acc@5 93.018 loss 1.170
Accuracy of the model on the 50000 test images: 75.7%
Max accuracy: 75.67%
Epoch: [184]  [   0/1251]  eta: 1:00:12  lr: 0.001469  min_lr: 0.001469  loss: 3.3663 (3.3663)  weight_decay: 0.0500 (0.0500)  time: 2.8877  data: 2.3378  max mem: 9147
Epoch: [184]  [ 200/1251]  eta: 0:03:56  lr: 0.001465  min_lr: 0.001465  loss: 2.6281 (2.9544)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8619 (0.9741)  time: 0.2150  data: 0.0171  max mem: 9147
Epoch: [184]  [ 400/1251]  eta: 0:03:03  lr: 0.001462  min_lr: 0.001462  loss: 2.4906 (2.9413)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0879 (1.0019)  time: 0.1951  data: 0.0005  max mem: 9147
Epoch: [184]  [ 600/1251]  eta: 0:02:16  lr: 0.001458  min_lr: 0.001458  loss: 3.1250 (2.9973)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9517 (0.9939)  time: 0.2000  data: 0.0007  max mem: 9147
Epoch: [184]  [ 800/1251]  eta: 0:01:34  lr: 0.001455  min_lr: 0.001455  loss: 2.3783 (2.9915)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9303 (0.9996)  time: 0.2001  data: 0.0006  max mem: 9147
Epoch: [184]  [1000/1251]  eta: 0:00:53  lr: 0.001451  min_lr: 0.001451  loss: 2.4211 (2.9945)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0074 (1.0032)  time: 0.2220  data: 0.0007  max mem: 9147
Epoch: [184]  [1200/1251]  eta: 0:00:10  lr: 0.001448  min_lr: 0.001448  loss: 2.5225 (2.9933)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8414 (0.9843)  time: 0.1998  data: 0.0006  max mem: 9147
Epoch: [184]  [1250/1251]  eta: 0:00:00  lr: 0.001447  min_lr: 0.001447  loss: 2.5390 (2.9938)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9756 (0.9888)  time: 0.1409  data: 0.0015  max mem: 9147
Epoch: [184] Total time: 0:04:24 (0.2114 s / it)
Averaged stats: lr: 0.001447  min_lr: 0.001447  loss: 2.5390 (3.0139)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9756 (0.9888)
Test:  [ 0/25]  eta: 0:02:12  loss: 0.7303 (0.7303)  acc1: 84.4000 (84.4000)  acc5: 98.4000 (98.4000)  time: 5.3080  data: 5.2203  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9422 (0.9629)  acc1: 80.4000 (79.4545)  acc5: 95.6000 (95.6000)  time: 0.7452  data: 0.6655  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1935 (1.1454)  acc1: 73.2000 (75.3905)  acc5: 92.8000 (93.3714)  time: 0.2200  data: 0.1443  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2867 (1.1621)  acc1: 71.6000 (74.8480)  acc5: 91.2000 (93.1840)  time: 0.2179  data: 0.1443  max mem: 9147
Test: Total time: 0:00:10 (0.4029 s / it)
* Acc@1 75.308 Acc@5 93.114 loss 1.158
Accuracy of the model on the 50000 test images: 75.3%
Max accuracy: 75.67%
Epoch: [185]  [   0/1251]  eta: 1:08:49  lr: 0.001447  min_lr: 0.001447  loss: 3.1880 (3.1880)  weight_decay: 0.0500 (0.0500)  time: 3.3010  data: 3.0657  max mem: 9147
Epoch: [185]  [ 200/1251]  eta: 0:03:57  lr: 0.001444  min_lr: 0.001444  loss: 2.7222 (2.9734)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8637 (0.9433)  time: 0.1903  data: 0.0005  max mem: 9147
Epoch: [185]  [ 400/1251]  eta: 0:02:59  lr: 0.001440  min_lr: 0.001440  loss: 2.4147 (2.9802)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0515 (1.0357)  time: 0.1876  data: 0.0006  max mem: 9147
Epoch: [185]  [ 600/1251]  eta: 0:02:12  lr: 0.001437  min_lr: 0.001437  loss: 2.4756 (3.0123)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9656 (1.0107)  time: 0.1905  data: 0.0016  max mem: 9147
Epoch: [185]  [ 800/1251]  eta: 0:01:30  lr: 0.001433  min_lr: 0.001433  loss: 2.5273 (3.0293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9253 (1.0010)  time: 0.1909  data: 0.0007  max mem: 9147
Epoch: [185]  [1000/1251]  eta: 0:00:49  lr: 0.001430  min_lr: 0.001430  loss: 2.5315 (3.0234)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8956 (0.9986)  time: 0.1895  data: 0.0012  max mem: 9147
Epoch: [185]  [1200/1251]  eta: 0:00:10  lr: 0.001426  min_lr: 0.001426  loss: 2.4826 (3.0254)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9188 (0.9963)  time: 0.2108  data: 0.0007  max mem: 9147
Epoch: [185]  [1250/1251]  eta: 0:00:00  lr: 0.001426  min_lr: 0.001426  loss: 2.4911 (3.0213)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0127 (0.9996)  time: 0.1544  data: 0.0012  max mem: 9147
Epoch: [185] Total time: 0:04:08 (0.1990 s / it)
Averaged stats: lr: 0.001426  min_lr: 0.001426  loss: 2.4911 (3.0160)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0127 (0.9996)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.8347 (0.8347)  acc1: 83.2000 (83.2000)  acc5: 97.6000 (97.6000)  time: 5.6174  data: 5.4881  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 1.0168 (1.0018)  acc1: 79.6000 (80.3273)  acc5: 95.6000 (95.4909)  time: 0.7415  data: 0.6526  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2339 (1.1753)  acc1: 74.8000 (75.8476)  acc5: 92.4000 (93.2762)  time: 0.2167  data: 0.1387  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2737 (1.1899)  acc1: 72.8000 (75.4240)  acc5: 91.6000 (93.0720)  time: 0.2115  data: 0.1386  max mem: 9147
Test: Total time: 0:00:10 (0.4130 s / it)
* Acc@1 75.476 Acc@5 93.058 loss 1.187
Accuracy of the model on the 50000 test images: 75.5%
Max accuracy: 75.67%
Epoch: [186]  [   0/1251]  eta: 1:09:05  lr: 0.001425  min_lr: 0.001425  loss: 4.0294 (4.0294)  weight_decay: 0.0500 (0.0500)  time: 3.3136  data: 2.3868  max mem: 9147
Epoch: [186]  [ 200/1251]  eta: 0:03:52  lr: 0.001422  min_lr: 0.001422  loss: 2.4835 (3.0313)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1202 (0.9824)  time: 0.2186  data: 0.0008  max mem: 9147
Epoch: [186]  [ 400/1251]  eta: 0:03:05  lr: 0.001419  min_lr: 0.001419  loss: 2.6987 (3.0522)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8890 (0.9531)  time: 0.2250  data: 0.0006  max mem: 9147
Epoch: [186]  [ 600/1251]  eta: 0:02:20  lr: 0.001415  min_lr: 0.001415  loss: 2.4990 (3.0429)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9646 (0.9735)  time: 0.2003  data: 0.0006  max mem: 9147
Epoch: [186]  [ 800/1251]  eta: 0:01:37  lr: 0.001412  min_lr: 0.001412  loss: 2.6142 (3.0476)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8627 (0.9573)  time: 0.2195  data: 0.0006  max mem: 9147
Epoch: [186]  [1000/1251]  eta: 0:00:53  lr: 0.001408  min_lr: 0.001408  loss: 3.8678 (3.0554)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0620 (0.9594)  time: 0.2005  data: 0.0006  max mem: 9147
Epoch: [186]  [1200/1251]  eta: 0:00:10  lr: 0.001405  min_lr: 0.001405  loss: 2.3517 (3.0323)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9312 (0.9513)  time: 0.2246  data: 0.0007  max mem: 9147
Epoch: [186]  [1250/1251]  eta: 0:00:00  lr: 0.001404  min_lr: 0.001404  loss: 2.4668 (3.0318)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9274 (0.9507)  time: 0.1719  data: 0.0014  max mem: 9147
Epoch: [186] Total time: 0:04:28 (0.2147 s / it)
Averaged stats: lr: 0.001404  min_lr: 0.001404  loss: 2.4668 (3.0221)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9274 (0.9507)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.7240 (0.7240)  acc1: 85.2000 (85.2000)  acc5: 97.2000 (97.2000)  time: 5.4407  data: 5.3429  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8909 (0.8966)  acc1: 80.8000 (80.5455)  acc5: 96.4000 (95.9273)  time: 0.7518  data: 0.6750  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1030 (1.1123)  acc1: 75.2000 (76.0000)  acc5: 92.4000 (93.1429)  time: 0.2162  data: 0.1434  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2549 (1.1312)  acc1: 73.2000 (75.6000)  acc5: 91.2000 (92.8800)  time: 0.2163  data: 0.1434  max mem: 9147
Test: Total time: 0:00:10 (0.4058 s / it)
* Acc@1 75.522 Acc@5 93.046 loss 1.122
Accuracy of the model on the 50000 test images: 75.5%
Max accuracy: 75.67%
Epoch: [187]  [   0/1251]  eta: 1:08:14  lr: 0.001404  min_lr: 0.001404  loss: 2.7033 (2.7033)  weight_decay: 0.0500 (0.0500)  time: 3.2733  data: 2.6005  max mem: 9147
Epoch: [187]  [ 200/1251]  eta: 0:03:55  lr: 0.001401  min_lr: 0.001401  loss: 2.6447 (3.0150)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0168 (1.1403)  time: 0.2099  data: 0.0007  max mem: 9147
Epoch: [187]  [ 400/1251]  eta: 0:03:05  lr: 0.001397  min_lr: 0.001397  loss: 2.4708 (2.9970)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9817 (1.0474)  time: 0.2242  data: 0.0006  max mem: 9147
Epoch: [187]  [ 600/1251]  eta: 0:02:20  lr: 0.001394  min_lr: 0.001394  loss: 2.4468 (2.9962)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9576 (1.0025)  time: 0.2342  data: 0.0007  max mem: 9147
Epoch: [187]  [ 800/1251]  eta: 0:01:36  lr: 0.001390  min_lr: 0.001390  loss: 2.4228 (2.9957)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9652 (1.0014)  time: 0.2148  data: 0.0116  max mem: 9147
Epoch: [187]  [1000/1251]  eta: 0:00:53  lr: 0.001387  min_lr: 0.001387  loss: 2.4091 (2.9913)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9315 (0.9995)  time: 0.1937  data: 0.0007  max mem: 9147
Epoch: [187]  [1200/1251]  eta: 0:00:10  lr: 0.001383  min_lr: 0.001383  loss: 2.5336 (3.0031)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9583 (0.9987)  time: 0.2251  data: 0.0169  max mem: 9147
Epoch: [187]  [1250/1251]  eta: 0:00:00  lr: 0.001383  min_lr: 0.001383  loss: 2.5498 (3.0027)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0715 (1.0080)  time: 0.1372  data: 0.0006  max mem: 9147
Epoch: [187] Total time: 0:04:24 (0.2116 s / it)
Averaged stats: lr: 0.001383  min_lr: 0.001383  loss: 2.5498 (3.0098)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0715 (1.0080)
Test:  [ 0/25]  eta: 0:02:13  loss: 0.7220 (0.7220)  acc1: 87.6000 (87.6000)  acc5: 98.4000 (98.4000)  time: 5.3382  data: 5.2028  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8706 (0.9138)  acc1: 81.2000 (80.7636)  acc5: 96.0000 (95.7455)  time: 0.6816  data: 0.5937  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1663 (1.1253)  acc1: 75.2000 (76.3048)  acc5: 92.8000 (93.1048)  time: 0.2073  data: 0.1311  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2630 (1.1460)  acc1: 72.4000 (75.5840)  acc5: 90.4000 (92.8320)  time: 0.2047  data: 0.1311  max mem: 9147
Test: Total time: 0:00:09 (0.3975 s / it)
* Acc@1 75.640 Acc@5 93.076 loss 1.140
Accuracy of the model on the 50000 test images: 75.6%
Max accuracy: 75.67%
Epoch: [188]  [   0/1251]  eta: 1:06:55  lr: 0.001383  min_lr: 0.001383  loss: 4.0445 (4.0445)  weight_decay: 0.0500 (0.0500)  time: 3.2100  data: 1.7616  max mem: 9147
Epoch: [188]  [ 200/1251]  eta: 0:03:41  lr: 0.001379  min_lr: 0.001379  loss: 2.4820 (2.9902)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9846 (1.0423)  time: 0.1952  data: 0.0005  max mem: 9147
Epoch: [188]  [ 400/1251]  eta: 0:03:01  lr: 0.001376  min_lr: 0.001376  loss: 2.4691 (2.9781)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0472 (1.0341)  time: 0.2207  data: 0.0008  max mem: 9147
Epoch: [188]  [ 600/1251]  eta: 0:02:19  lr: 0.001372  min_lr: 0.001372  loss: 2.5140 (3.0046)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8558 (0.9933)  time: 0.2291  data: 0.0007  max mem: 9147
Epoch: [188]  [ 800/1251]  eta: 0:01:36  lr: 0.001369  min_lr: 0.001369  loss: 3.1478 (3.0324)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9361 (0.9824)  time: 0.2089  data: 0.0012  max mem: 9147
Epoch: [188]  [1000/1251]  eta: 0:00:52  lr: 0.001366  min_lr: 0.001366  loss: 3.0569 (3.0186)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9811 (0.9782)  time: 0.2103  data: 0.0008  max mem: 9147
Epoch: [188]  [1200/1251]  eta: 0:00:10  lr: 0.001362  min_lr: 0.001362  loss: 2.6585 (3.0163)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9219 (0.9785)  time: 0.2252  data: 0.0006  max mem: 9147
Epoch: [188]  [1250/1251]  eta: 0:00:00  lr: 0.001361  min_lr: 0.001361  loss: 2.5233 (3.0152)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9629 (0.9803)  time: 0.1434  data: 0.0011  max mem: 9147
Epoch: [188] Total time: 0:04:24 (0.2117 s / it)
Averaged stats: lr: 0.001361  min_lr: 0.001361  loss: 2.5233 (3.0059)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9629 (0.9803)
Test:  [ 0/25]  eta: 0:01:25  loss: 0.7960 (0.7960)  acc1: 85.6000 (85.6000)  acc5: 98.0000 (98.0000)  time: 3.4134  data: 3.3331  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 0.9227 (0.9435)  acc1: 81.6000 (80.2545)  acc5: 96.0000 (95.4909)  time: 0.5757  data: 0.5016  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2291 (1.1573)  acc1: 74.0000 (75.8286)  acc5: 92.0000 (93.0286)  time: 0.2695  data: 0.1955  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2899 (1.1771)  acc1: 71.6000 (75.3440)  acc5: 91.2000 (92.8160)  time: 0.2109  data: 0.1390  max mem: 9147
Test: Total time: 0:00:09 (0.3994 s / it)
* Acc@1 75.470 Acc@5 93.032 loss 1.176
Accuracy of the model on the 50000 test images: 75.5%
Max accuracy: 75.67%
Epoch: [189]  [   0/1251]  eta: 1:02:02  lr: 0.001361  min_lr: 0.001361  loss: 3.8478 (3.8478)  weight_decay: 0.0500 (0.0500)  time: 2.9753  data: 2.7205  max mem: 9147
Epoch: [189]  [ 200/1251]  eta: 0:03:52  lr: 0.001358  min_lr: 0.001358  loss: 2.4664 (3.0442)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9994 (1.0182)  time: 0.1915  data: 0.0006  max mem: 9147
Epoch: [189]  [ 400/1251]  eta: 0:02:55  lr: 0.001355  min_lr: 0.001355  loss: 2.6217 (3.0344)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9175 (1.0211)  time: 0.1948  data: 0.0005  max mem: 9147
Epoch: [189]  [ 600/1251]  eta: 0:02:15  lr: 0.001351  min_lr: 0.001351  loss: 2.4154 (3.0325)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9095 (1.0003)  time: 0.2244  data: 0.0007  max mem: 9147
Epoch: [189]  [ 800/1251]  eta: 0:01:34  lr: 0.001348  min_lr: 0.001348  loss: 2.8463 (3.0278)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9220 (0.9862)  time: 0.2015  data: 0.0007  max mem: 9147
Epoch: [189]  [1000/1251]  eta: 0:00:52  lr: 0.001344  min_lr: 0.001344  loss: 2.4710 (3.0187)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0935 (1.0067)  time: 0.2156  data: 0.0006  max mem: 9147
Epoch: [189]  [1200/1251]  eta: 0:00:10  lr: 0.001341  min_lr: 0.001341  loss: 2.7885 (3.0170)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0295 (1.0207)  time: 0.1900  data: 0.0010  max mem: 9147
Epoch: [189]  [1250/1251]  eta: 0:00:00  lr: 0.001340  min_lr: 0.001340  loss: 2.3382 (3.0131)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0278 (1.0201)  time: 0.1379  data: 0.0014  max mem: 9147
Epoch: [189] Total time: 0:04:22 (0.2097 s / it)
Averaged stats: lr: 0.001340  min_lr: 0.001340  loss: 2.3382 (3.0036)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0278 (1.0201)
Test:  [ 0/25]  eta: 0:02:27  loss: 0.7014 (0.7014)  acc1: 86.4000 (86.4000)  acc5: 98.0000 (98.0000)  time: 5.8833  data: 5.8015  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9142 (0.9273)  acc1: 82.0000 (80.9818)  acc5: 96.4000 (95.9636)  time: 0.7698  data: 0.6864  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1512 (1.1299)  acc1: 73.2000 (76.4571)  acc5: 92.0000 (93.1238)  time: 0.2082  data: 0.1284  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2792 (1.1446)  acc1: 72.4000 (76.0640)  acc5: 91.2000 (93.0400)  time: 0.2050  data: 0.1283  max mem: 9147
Test: Total time: 0:00:10 (0.4172 s / it)
* Acc@1 75.772 Acc@5 93.146 loss 1.143
Accuracy of the model on the 50000 test images: 75.8%
Max accuracy: 75.77%
Epoch: [190]  [   0/1251]  eta: 1:03:51  lr: 0.001340  min_lr: 0.001340  loss: 4.1497 (4.1497)  weight_decay: 0.0500 (0.0500)  time: 3.0627  data: 2.8511  max mem: 9147
Epoch: [190]  [ 200/1251]  eta: 0:03:40  lr: 0.001337  min_lr: 0.001337  loss: 2.4912 (2.9996)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8720 (0.9572)  time: 0.1950  data: 0.0006  max mem: 9147
Epoch: [190]  [ 400/1251]  eta: 0:02:49  lr: 0.001333  min_lr: 0.001333  loss: 2.4625 (2.9971)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8382 (0.9290)  time: 0.1903  data: 0.0009  max mem: 9147
Epoch: [190]  [ 600/1251]  eta: 0:02:08  lr: 0.001330  min_lr: 0.001330  loss: 3.7106 (3.0293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8948 (0.9289)  time: 0.2109  data: 0.0007  max mem: 9147
Epoch: [190]  [ 800/1251]  eta: 0:01:30  lr: 0.001327  min_lr: 0.001327  loss: 3.0731 (3.0404)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9602 (0.9582)  time: 0.1898  data: 0.0005  max mem: 9147
Epoch: [190]  [1000/1251]  eta: 0:00:51  lr: 0.001323  min_lr: 0.001323  loss: 2.5094 (3.0363)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0185 (0.9765)  time: 0.2291  data: 0.0006  max mem: 9147
Epoch: [190]  [1200/1251]  eta: 0:00:10  lr: 0.001320  min_lr: 0.001320  loss: 2.5797 (3.0309)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0200 (0.9839)  time: 0.2306  data: 0.0006  max mem: 9147
Epoch: [190]  [1250/1251]  eta: 0:00:00  lr: 0.001319  min_lr: 0.001319  loss: 2.9471 (3.0293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9654 (0.9832)  time: 0.1431  data: 0.0013  max mem: 9147
Epoch: [190] Total time: 0:04:15 (0.2044 s / it)
Averaged stats: lr: 0.001319  min_lr: 0.001319  loss: 2.9471 (3.0029)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9654 (0.9832)
Test:  [ 0/25]  eta: 0:02:15  loss: 0.7895 (0.7895)  acc1: 85.2000 (85.2000)  acc5: 98.4000 (98.4000)  time: 5.4001  data: 5.3011  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9920 (0.9822)  acc1: 80.0000 (80.4364)  acc5: 96.0000 (95.7818)  time: 0.7165  data: 0.6260  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2478 (1.1800)  acc1: 74.4000 (76.1905)  acc5: 92.0000 (93.1429)  time: 0.2087  data: 0.1293  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3099 (1.1889)  acc1: 72.4000 (75.7760)  acc5: 91.2000 (93.0560)  time: 0.2062  data: 0.1293  max mem: 9147
Test: Total time: 0:00:09 (0.3991 s / it)
* Acc@1 75.660 Acc@5 92.990 loss 1.188
Accuracy of the model on the 50000 test images: 75.7%
Max accuracy: 75.77%
Epoch: [191]  [   0/1251]  eta: 1:06:12  lr: 0.001319  min_lr: 0.001319  loss: 3.9476 (3.9476)  weight_decay: 0.0500 (0.0500)  time: 3.1757  data: 1.8430  max mem: 9147
Epoch: [191]  [ 200/1251]  eta: 0:03:56  lr: 0.001316  min_lr: 0.001316  loss: 2.4169 (2.9562)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9053 (0.9372)  time: 0.2050  data: 0.0008  max mem: 9147
Epoch: [191]  [ 400/1251]  eta: 0:03:09  lr: 0.001312  min_lr: 0.001312  loss: 2.9161 (3.0075)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0186 (0.9657)  time: 0.2197  data: 0.0006  max mem: 9147
Epoch: [191]  [ 600/1251]  eta: 0:02:21  lr: 0.001309  min_lr: 0.001309  loss: 2.3746 (3.0088)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8606 (0.9727)  time: 0.2010  data: 0.0005  max mem: 9147
Epoch: [191]  [ 800/1251]  eta: 0:01:37  lr: 0.001305  min_lr: 0.001305  loss: 3.3289 (3.0210)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8735 (0.9892)  time: 0.1946  data: 0.0006  max mem: 9147
Epoch: [191]  [1000/1251]  eta: 0:00:53  lr: 0.001302  min_lr: 0.001302  loss: 2.6378 (3.0393)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9381 (0.9852)  time: 0.2137  data: 0.0007  max mem: 9147
Epoch: [191]  [1200/1251]  eta: 0:00:10  lr: 0.001299  min_lr: 0.001299  loss: 2.3993 (3.0159)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0311 (1.0034)  time: 0.1831  data: 0.0010  max mem: 9147
Epoch: [191]  [1250/1251]  eta: 0:00:00  lr: 0.001298  min_lr: 0.001298  loss: 2.8231 (3.0159)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9128 (1.0026)  time: 0.1391  data: 0.0011  max mem: 9147
Epoch: [191] Total time: 0:04:27 (0.2136 s / it)
Averaged stats: lr: 0.001298  min_lr: 0.001298  loss: 2.8231 (3.0040)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9128 (1.0026)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.7859 (0.7859)  acc1: 86.0000 (86.0000)  acc5: 97.6000 (97.6000)  time: 5.5399  data: 5.4055  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9082 (0.9576)  acc1: 81.6000 (80.4000)  acc5: 96.0000 (95.5273)  time: 0.7299  data: 0.6327  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1766 (1.1526)  acc1: 74.4000 (75.7143)  acc5: 92.8000 (93.0667)  time: 0.2099  data: 0.1281  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2408 (1.1714)  acc1: 72.0000 (75.1200)  acc5: 92.0000 (92.8960)  time: 0.2052  data: 0.1280  max mem: 9147
Test: Total time: 0:00:10 (0.4047 s / it)
* Acc@1 75.532 Acc@5 92.984 loss 1.173
Accuracy of the model on the 50000 test images: 75.5%
Max accuracy: 75.77%
Epoch: [192]  [   0/1251]  eta: 1:05:14  lr: 0.001298  min_lr: 0.001298  loss: 2.1792 (2.1792)  weight_decay: 0.0500 (0.0500)  time: 3.1290  data: 2.8647  max mem: 9147
Epoch: [192]  [ 200/1251]  eta: 0:03:39  lr: 0.001295  min_lr: 0.001295  loss: 2.5295 (2.9562)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0949 (1.0228)  time: 0.1944  data: 0.0011  max mem: 9147
Epoch: [192]  [ 400/1251]  eta: 0:02:50  lr: 0.001291  min_lr: 0.001291  loss: 2.9186 (2.9801)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8586 (0.9922)  time: 0.2011  data: 0.0005  max mem: 9147
Epoch: [192]  [ 600/1251]  eta: 0:02:13  lr: 0.001288  min_lr: 0.001288  loss: 3.0319 (3.0025)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0279 (1.0333)  time: 0.2109  data: 0.0007  max mem: 9147
Epoch: [192]  [ 800/1251]  eta: 0:01:33  lr: 0.001284  min_lr: 0.001284  loss: 2.3923 (3.0120)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9377 (1.0203)  time: 0.2196  data: 0.0009  max mem: 9147
Epoch: [192]  [1000/1251]  eta: 0:00:52  lr: 0.001281  min_lr: 0.001281  loss: 2.8056 (3.0102)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9754 (1.0321)  time: 0.2202  data: 0.0007  max mem: 9147
Epoch: [192]  [1200/1251]  eta: 0:00:10  lr: 0.001278  min_lr: 0.001278  loss: 3.2945 (3.0217)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9797 (1.0271)  time: 0.2108  data: 0.0007  max mem: 9147
Epoch: [192]  [1250/1251]  eta: 0:00:00  lr: 0.001277  min_lr: 0.001277  loss: 2.7665 (3.0194)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9382 (1.0211)  time: 0.1396  data: 0.0010  max mem: 9147
Epoch: [192] Total time: 0:04:20 (0.2080 s / it)
Averaged stats: lr: 0.001277  min_lr: 0.001277  loss: 2.7665 (2.9940)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9382 (1.0211)
Test:  [ 0/25]  eta: 0:02:24  loss: 0.7022 (0.7022)  acc1: 87.2000 (87.2000)  acc5: 97.6000 (97.6000)  time: 5.7884  data: 5.7079  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8672 (0.9135)  acc1: 80.0000 (79.9273)  acc5: 95.6000 (95.5636)  time: 0.7595  data: 0.6841  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1701 (1.1174)  acc1: 74.0000 (75.8667)  acc5: 92.0000 (92.9524)  time: 0.2224  data: 0.1491  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2665 (1.1312)  acc1: 74.0000 (75.7120)  acc5: 91.2000 (92.8320)  time: 0.2229  data: 0.1508  max mem: 9147
Test: Total time: 0:00:10 (0.4247 s / it)
* Acc@1 75.876 Acc@5 93.082 loss 1.127
Accuracy of the model on the 50000 test images: 75.9%
Max accuracy: 75.88%
Epoch: [193]  [   0/1251]  eta: 1:03:48  lr: 0.001277  min_lr: 0.001277  loss: 2.0994 (2.0994)  weight_decay: 0.0500 (0.0500)  time: 3.0603  data: 2.8758  max mem: 9147
Epoch: [193]  [ 200/1251]  eta: 0:03:37  lr: 0.001274  min_lr: 0.001274  loss: 2.8338 (2.9110)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0279 (0.9996)  time: 0.1904  data: 0.0005  max mem: 9147
Epoch: [193]  [ 400/1251]  eta: 0:02:48  lr: 0.001270  min_lr: 0.001270  loss: 2.4566 (2.9606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8612 (1.0372)  time: 0.1904  data: 0.0006  max mem: 9147
Epoch: [193]  [ 600/1251]  eta: 0:02:07  lr: 0.001267  min_lr: 0.001267  loss: 2.8406 (3.0244)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1234 (1.0486)  time: 0.2057  data: 0.0007  max mem: 9147
Epoch: [193]  [ 800/1251]  eta: 0:01:31  lr: 0.001264  min_lr: 0.001264  loss: 2.4437 (3.0236)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9534 (1.0409)  time: 0.2256  data: 0.0008  max mem: 9147
Epoch: [193]  [1000/1251]  eta: 0:00:51  lr: 0.001260  min_lr: 0.001260  loss: 2.7964 (3.0171)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9838 (1.0276)  time: 0.2245  data: 0.0007  max mem: 9147
Epoch: [193]  [1200/1251]  eta: 0:00:10  lr: 0.001257  min_lr: 0.001257  loss: 2.6325 (3.0085)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9722 (1.0214)  time: 0.2057  data: 0.0006  max mem: 9147
Epoch: [193]  [1250/1251]  eta: 0:00:00  lr: 0.001256  min_lr: 0.001256  loss: 2.4064 (3.0048)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2143 (1.0305)  time: 0.1416  data: 0.0012  max mem: 9147
Epoch: [193] Total time: 0:04:17 (0.2059 s / it)
Averaged stats: lr: 0.001256  min_lr: 0.001256  loss: 2.4064 (2.9975)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2143 (1.0305)
Test:  [ 0/25]  eta: 0:01:18  loss: 0.7412 (0.7412)  acc1: 87.2000 (87.2000)  acc5: 98.0000 (98.0000)  time: 3.1394  data: 3.0503  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 0.9015 (0.9234)  acc1: 81.6000 (80.5818)  acc5: 96.4000 (95.5636)  time: 0.5492  data: 0.4698  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1591 (1.1016)  acc1: 74.0000 (76.5143)  acc5: 92.8000 (93.4667)  time: 0.2736  data: 0.1996  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2421 (1.1194)  acc1: 74.0000 (76.0800)  acc5: 91.6000 (93.2640)  time: 0.2175  data: 0.1455  max mem: 9147
Test: Total time: 0:00:09 (0.3992 s / it)
* Acc@1 75.958 Acc@5 93.088 loss 1.120
Accuracy of the model on the 50000 test images: 76.0%
Max accuracy: 75.96%
Epoch: [194]  [   0/1251]  eta: 0:52:39  lr: 0.001256  min_lr: 0.001256  loss: 2.2797 (2.2797)  weight_decay: 0.0500 (0.0500)  time: 2.5255  data: 1.9310  max mem: 9147
Epoch: [194]  [ 200/1251]  eta: 0:04:01  lr: 0.001253  min_lr: 0.001253  loss: 2.4726 (2.9381)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9067 (0.9908)  time: 0.2266  data: 0.0007  max mem: 9147
Epoch: [194]  [ 400/1251]  eta: 0:03:03  lr: 0.001249  min_lr: 0.001249  loss: 3.9481 (3.0265)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9270 (0.9705)  time: 0.2103  data: 0.0007  max mem: 9147
Epoch: [194]  [ 600/1251]  eta: 0:02:20  lr: 0.001246  min_lr: 0.001246  loss: 2.4432 (3.0097)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1240 (1.0310)  time: 0.2199  data: 0.0088  max mem: 9147
Epoch: [194]  [ 800/1251]  eta: 0:01:36  lr: 0.001243  min_lr: 0.001243  loss: 2.3886 (2.9766)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9435 (nan)  time: 0.2095  data: 0.0007  max mem: 9147
Epoch: [194]  [1000/1251]  eta: 0:00:53  lr: 0.001239  min_lr: 0.001239  loss: 2.4234 (2.9827)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9373 (nan)  time: 0.2212  data: 0.0006  max mem: 9147
Epoch: [194]  [1200/1251]  eta: 0:00:10  lr: 0.001236  min_lr: 0.001236  loss: 2.4683 (2.9694)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9602 (nan)  time: 0.2159  data: 0.0008  max mem: 9147
Epoch: [194]  [1250/1251]  eta: 0:00:00  lr: 0.001235  min_lr: 0.001235  loss: 3.3212 (2.9735)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0147 (nan)  time: 0.1420  data: 0.0009  max mem: 9147
Epoch: [194] Total time: 0:04:25 (0.2124 s / it)
Averaged stats: lr: 0.001235  min_lr: 0.001235  loss: 3.3212 (2.9745)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0147 (nan)
Test:  [ 0/25]  eta: 0:02:28  loss: 0.7535 (0.7535)  acc1: 86.4000 (86.4000)  acc5: 96.8000 (96.8000)  time: 5.9435  data: 5.8631  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9457 (0.9644)  acc1: 80.8000 (80.4727)  acc5: 96.0000 (95.6727)  time: 0.7439  data: 0.6689  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2126 (1.1649)  acc1: 73.2000 (76.0000)  acc5: 92.4000 (93.2762)  time: 0.1904  data: 0.1178  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3014 (1.1754)  acc1: 72.8000 (75.7760)  acc5: 91.2000 (93.2000)  time: 0.1911  data: 0.1202  max mem: 9147
Test: Total time: 0:00:10 (0.4060 s / it)
* Acc@1 76.072 Acc@5 93.210 loss 1.171
Accuracy of the model on the 50000 test images: 76.1%
Max accuracy: 76.07%
Epoch: [195]  [   0/1251]  eta: 1:01:55  lr: 0.001235  min_lr: 0.001235  loss: 3.9288 (3.9288)  weight_decay: 0.0500 (0.0500)  time: 2.9699  data: 2.7680  max mem: 9147
Epoch: [195]  [ 200/1251]  eta: 0:03:48  lr: 0.001232  min_lr: 0.001232  loss: 2.6484 (3.0048)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9827 (0.9981)  time: 0.2240  data: 0.0007  max mem: 9147
Epoch: [195]  [ 400/1251]  eta: 0:03:00  lr: 0.001229  min_lr: 0.001229  loss: 3.0177 (3.0203)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0674 (1.0197)  time: 0.1950  data: 0.0005  max mem: 9147
Epoch: [195]  [ 600/1251]  eta: 0:02:13  lr: 0.001225  min_lr: 0.001225  loss: 3.4184 (3.0222)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9057 (1.0164)  time: 0.1770  data: 0.0005  max mem: 9147
Epoch: [195]  [ 800/1251]  eta: 0:01:31  lr: 0.001222  min_lr: 0.001222  loss: 2.8824 (3.0240)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9521 (1.0141)  time: 0.1996  data: 0.0007  max mem: 9147
Epoch: [195]  [1000/1251]  eta: 0:00:50  lr: 0.001219  min_lr: 0.001219  loss: 2.3767 (3.0081)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0290 (1.0181)  time: 0.2303  data: 0.0029  max mem: 9147
Epoch: [195]  [1200/1251]  eta: 0:00:10  lr: 0.001215  min_lr: 0.001215  loss: 2.5364 (3.0087)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0972 (1.0186)  time: 0.1881  data: 0.0012  max mem: 9147
Epoch: [195]  [1250/1251]  eta: 0:00:00  lr: 0.001215  min_lr: 0.001215  loss: 2.3973 (3.0015)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9552 (1.0157)  time: 0.1383  data: 0.0012  max mem: 9147
Epoch: [195] Total time: 0:04:11 (0.2014 s / it)
Averaged stats: lr: 0.001215  min_lr: 0.001215  loss: 2.3973 (2.9765)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9552 (1.0157)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.6899 (0.6899)  acc1: 85.2000 (85.2000)  acc5: 97.2000 (97.2000)  time: 5.6578  data: 5.5774  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8544 (0.8843)  acc1: 82.4000 (80.9455)  acc5: 96.0000 (95.8909)  time: 0.7731  data: 0.6806  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0895 (1.0915)  acc1: 74.4000 (76.2667)  acc5: 92.8000 (93.2381)  time: 0.2150  data: 0.1325  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2497 (1.1034)  acc1: 72.4000 (76.0640)  acc5: 90.8000 (93.1200)  time: 0.2099  data: 0.1324  max mem: 9147
Test: Total time: 0:00:10 (0.4134 s / it)
* Acc@1 76.206 Acc@5 93.424 loss 1.103
Accuracy of the model on the 50000 test images: 76.2%
Max accuracy: 76.21%
Epoch: [196]  [   0/1251]  eta: 0:54:43  lr: 0.001215  min_lr: 0.001215  loss: 3.8018 (3.8018)  weight_decay: 0.0500 (0.0500)  time: 2.6245  data: 1.9389  max mem: 9147
Epoch: [196]  [ 200/1251]  eta: 0:03:53  lr: 0.001211  min_lr: 0.001211  loss: 2.3458 (3.0359)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9036 (0.9842)  time: 0.1975  data: 0.0012  max mem: 9147
Epoch: [196]  [ 400/1251]  eta: 0:02:57  lr: 0.001208  min_lr: 0.001208  loss: 2.3605 (2.9992)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0276 (1.0174)  time: 0.1898  data: 0.0012  max mem: 9147
Epoch: [196]  [ 600/1251]  eta: 0:02:15  lr: 0.001205  min_lr: 0.001205  loss: 2.4885 (2.9737)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0854 (1.0368)  time: 0.2144  data: 0.0008  max mem: 9147
Epoch: [196]  [ 800/1251]  eta: 0:01:34  lr: 0.001201  min_lr: 0.001201  loss: 2.7069 (2.9871)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0027 (1.0304)  time: 0.1897  data: 0.0005  max mem: 9147
Epoch: [196]  [1000/1251]  eta: 0:00:51  lr: 0.001198  min_lr: 0.001198  loss: 2.8681 (2.9994)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9212 (1.0378)  time: 0.2264  data: 0.0008  max mem: 9147
Epoch: [196]  [1200/1251]  eta: 0:00:10  lr: 0.001195  min_lr: 0.001195  loss: 2.4014 (2.9949)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9106 (1.0249)  time: 0.1901  data: 0.0005  max mem: 9147
Epoch: [196]  [1250/1251]  eta: 0:00:00  lr: 0.001194  min_lr: 0.001194  loss: 3.5779 (2.9923)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9288 (1.0215)  time: 0.1382  data: 0.0006  max mem: 9147
Epoch: [196] Total time: 0:04:16 (0.2054 s / it)
Averaged stats: lr: 0.001194  min_lr: 0.001194  loss: 3.5779 (2.9862)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9288 (1.0215)
Test:  [ 0/25]  eta: 0:02:26  loss: 0.7265 (0.7265)  acc1: 85.2000 (85.2000)  acc5: 98.4000 (98.4000)  time: 5.8681  data: 5.7653  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8611 (0.9041)  acc1: 81.2000 (80.4000)  acc5: 96.0000 (95.5636)  time: 0.7299  data: 0.6528  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1623 (1.1270)  acc1: 74.4000 (76.0191)  acc5: 91.6000 (93.1238)  time: 0.1949  data: 0.1200  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2975 (1.1462)  acc1: 73.6000 (75.5360)  acc5: 90.8000 (92.8640)  time: 0.1937  data: 0.1200  max mem: 9147
Test: Total time: 0:00:10 (0.4047 s / it)
* Acc@1 75.846 Acc@5 93.236 loss 1.137
Accuracy of the model on the 50000 test images: 75.8%
Max accuracy: 76.21%
Epoch: [197]  [   0/1251]  eta: 1:03:32  lr: 0.001194  min_lr: 0.001194  loss: 4.7097 (4.7097)  weight_decay: 0.0500 (0.0500)  time: 3.0479  data: 2.3809  max mem: 9147
Epoch: [197]  [ 200/1251]  eta: 0:03:52  lr: 0.001191  min_lr: 0.001191  loss: 2.4756 (3.0443)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0089 (1.0003)  time: 0.2003  data: 0.0006  max mem: 9147
Epoch: [197]  [ 400/1251]  eta: 0:03:00  lr: 0.001187  min_lr: 0.001187  loss: 3.4961 (3.0273)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9432 (1.0096)  time: 0.1904  data: 0.0005  max mem: 9147
Epoch: [197]  [ 600/1251]  eta: 0:02:14  lr: 0.001184  min_lr: 0.001184  loss: 2.7628 (3.0319)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9211 (1.0000)  time: 0.2105  data: 0.0008  max mem: 9147
Epoch: [197]  [ 800/1251]  eta: 0:01:33  lr: 0.001181  min_lr: 0.001181  loss: 2.7354 (3.0337)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9435 (0.9966)  time: 0.2101  data: 0.0006  max mem: 9147
Epoch: [197]  [1000/1251]  eta: 0:00:51  lr: 0.001178  min_lr: 0.001178  loss: 2.3197 (3.0238)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1273 (1.0334)  time: 0.2053  data: 0.0008  max mem: 9147
Epoch: [197]  [1200/1251]  eta: 0:00:10  lr: 0.001174  min_lr: 0.001174  loss: 2.4449 (3.0033)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9806 (1.0395)  time: 0.1954  data: 0.0007  max mem: 9147
Epoch: [197]  [1250/1251]  eta: 0:00:00  lr: 0.001174  min_lr: 0.001174  loss: 2.4042 (3.0050)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9229 (1.0367)  time: 0.1456  data: 0.0012  max mem: 9147
Epoch: [197] Total time: 0:04:19 (0.2074 s / it)
Averaged stats: lr: 0.001174  min_lr: 0.001174  loss: 2.4042 (2.9886)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9229 (1.0367)
Test:  [ 0/25]  eta: 0:02:23  loss: 0.8116 (0.8116)  acc1: 85.2000 (85.2000)  acc5: 96.8000 (96.8000)  time: 5.7346  data: 5.6543  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9360 (0.9491)  acc1: 82.8000 (80.7636)  acc5: 96.0000 (95.5636)  time: 0.7742  data: 0.6861  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1652 (1.1573)  acc1: 74.0000 (76.1905)  acc5: 92.4000 (93.3524)  time: 0.2030  data: 0.1229  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3019 (1.1684)  acc1: 74.0000 (75.6160)  acc5: 92.4000 (93.3600)  time: 0.1978  data: 0.1228  max mem: 9147
Test: Total time: 0:00:10 (0.4076 s / it)
* Acc@1 75.948 Acc@5 93.224 loss 1.165
Accuracy of the model on the 50000 test images: 75.9%
Max accuracy: 76.21%
Epoch: [198]  [   0/1251]  eta: 1:02:25  lr: 0.001174  min_lr: 0.001174  loss: 2.3274 (2.3274)  weight_decay: 0.0500 (0.0500)  time: 2.9939  data: 1.8147  max mem: 9147
Epoch: [198]  [ 200/1251]  eta: 0:03:49  lr: 0.001170  min_lr: 0.001170  loss: 2.9427 (3.0045)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9016 (0.9742)  time: 0.2000  data: 0.0006  max mem: 9147
Epoch: [198]  [ 400/1251]  eta: 0:03:01  lr: 0.001167  min_lr: 0.001167  loss: 2.9214 (2.9834)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2336 (1.0183)  time: 0.1892  data: 0.0013  max mem: 9147
Epoch: [198]  [ 600/1251]  eta: 0:02:14  lr: 0.001164  min_lr: 0.001164  loss: 2.3450 (2.9862)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9128 (0.9962)  time: 0.1997  data: 0.0008  max mem: 9147
Epoch: [198]  [ 800/1251]  eta: 0:01:33  lr: 0.001161  min_lr: 0.001161  loss: 2.6059 (2.9921)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8952 (0.9925)  time: 0.2202  data: 0.0007  max mem: 9147
Epoch: [198]  [1000/1251]  eta: 0:00:52  lr: 0.001157  min_lr: 0.001157  loss: 2.4332 (2.9987)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9312 (0.9994)  time: 0.2205  data: 0.0008  max mem: 9147
Epoch: [198]  [1200/1251]  eta: 0:00:10  lr: 0.001154  min_lr: 0.001154  loss: 2.4083 (2.9944)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9236 (0.9907)  time: 0.2103  data: 0.0007  max mem: 9147
Epoch: [198]  [1250/1251]  eta: 0:00:00  lr: 0.001153  min_lr: 0.001153  loss: 3.3470 (2.9973)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9583 (0.9951)  time: 0.1402  data: 0.0016  max mem: 9147
Epoch: [198] Total time: 0:04:18 (0.2063 s / it)
Averaged stats: lr: 0.001153  min_lr: 0.001153  loss: 3.3470 (2.9864)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9583 (0.9951)
Test:  [ 0/25]  eta: 0:02:10  loss: 0.7871 (0.7871)  acc1: 84.8000 (84.8000)  acc5: 97.2000 (97.2000)  time: 5.2327  data: 5.1342  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 0.9320 (0.9711)  acc1: 80.0000 (80.5455)  acc5: 96.4000 (95.9273)  time: 0.6599  data: 0.5668  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2151 (1.1863)  acc1: 74.0000 (76.3048)  acc5: 92.4000 (93.0476)  time: 0.1932  data: 0.1114  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3201 (1.2007)  acc1: 73.6000 (75.6800)  acc5: 91.6000 (92.8800)  time: 0.2139  data: 0.1378  max mem: 9147
Test: Total time: 0:00:10 (0.4009 s / it)
* Acc@1 76.004 Acc@5 93.164 loss 1.204
Accuracy of the model on the 50000 test images: 76.0%
Max accuracy: 76.21%
Epoch: [199]  [   0/1251]  eta: 1:04:16  lr: 0.001153  min_lr: 0.001153  loss: 3.8261 (3.8261)  weight_decay: 0.0500 (0.0500)  time: 3.0827  data: 2.4802  max mem: 9147
Epoch: [199]  [ 200/1251]  eta: 0:03:37  lr: 0.001150  min_lr: 0.001150  loss: 2.2965 (3.0328)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8695 (1.0260)  time: 0.1831  data: 0.0006  max mem: 9147
Epoch: [199]  [ 400/1251]  eta: 0:02:52  lr: 0.001147  min_lr: 0.001147  loss: 3.3984 (2.9944)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9565 (0.9932)  time: 0.2212  data: 0.0007  max mem: 9147
Epoch: [199]  [ 600/1251]  eta: 0:02:12  lr: 0.001143  min_lr: 0.001143  loss: 2.5220 (2.9808)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1169 (1.0214)  time: 0.1850  data: 0.0005  max mem: 9147
Epoch: [199]  [ 800/1251]  eta: 0:01:29  lr: 0.001140  min_lr: 0.001140  loss: 2.3809 (2.9830)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9562 (1.0259)  time: 0.1895  data: 0.0012  max mem: 9147
Epoch: [199]  [1000/1251]  eta: 0:00:50  lr: 0.001137  min_lr: 0.001137  loss: 2.6075 (2.9746)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0943 (1.0260)  time: 0.2051  data: 0.0056  max mem: 9147
Epoch: [199]  [1200/1251]  eta: 0:00:10  lr: 0.001134  min_lr: 0.001134  loss: 2.8053 (2.9883)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0332 (1.0249)  time: 0.2248  data: 0.0224  max mem: 9147
Epoch: [199]  [1250/1251]  eta: 0:00:00  lr: 0.001133  min_lr: 0.001133  loss: 2.6210 (2.9905)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9680 (1.0209)  time: 0.1520  data: 0.0126  max mem: 9147
Epoch: [199] Total time: 0:04:14 (0.2036 s / it)
Averaged stats: lr: 0.001133  min_lr: 0.001133  loss: 2.6210 (2.9788)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9680 (1.0209)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.7013 (0.7013)  acc1: 84.8000 (84.8000)  acc5: 98.4000 (98.4000)  time: 5.5385  data: 5.4573  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9230 (0.9393)  acc1: 82.0000 (80.3273)  acc5: 96.4000 (96.3636)  time: 0.7673  data: 0.6713  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2008 (1.1588)  acc1: 74.4000 (76.4381)  acc5: 92.8000 (93.6952)  time: 0.2242  data: 0.1389  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2602 (1.1742)  acc1: 72.0000 (75.9840)  acc5: 91.6000 (93.6320)  time: 0.2181  data: 0.1388  max mem: 9147
Test: Total time: 0:00:10 (0.4156 s / it)
* Acc@1 76.028 Acc@5 93.370 loss 1.174
Accuracy of the model on the 50000 test images: 76.0%
Max accuracy: 76.21%
Epoch: [200]  [   0/1251]  eta: 1:01:51  lr: 0.001133  min_lr: 0.001133  loss: 3.3735 (3.3735)  weight_decay: 0.0500 (0.0500)  time: 2.9671  data: 2.6484  max mem: 9147
Epoch: [200]  [ 200/1251]  eta: 0:04:00  lr: 0.001130  min_lr: 0.001130  loss: 2.6464 (2.9192)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9834 (1.0584)  time: 0.2203  data: 0.0006  max mem: 9147
Epoch: [200]  [ 400/1251]  eta: 0:03:08  lr: 0.001126  min_lr: 0.001126  loss: 2.6948 (2.9574)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1512 (1.0843)  time: 0.2100  data: 0.0006  max mem: 9147
Epoch: [200]  [ 600/1251]  eta: 0:02:19  lr: 0.001123  min_lr: 0.001123  loss: 2.6292 (2.9651)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9727 (1.0736)  time: 0.2103  data: 0.0006  max mem: 9147
Epoch: [200]  [ 800/1251]  eta: 0:01:36  lr: 0.001120  min_lr: 0.001120  loss: 3.6213 (2.9956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9462 (1.0602)  time: 0.2000  data: 0.0007  max mem: 9147
Epoch: [200]  [1000/1251]  eta: 0:00:53  lr: 0.001117  min_lr: 0.001117  loss: 2.3775 (2.9715)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8733 (1.0425)  time: 0.2012  data: 0.0006  max mem: 9147
Epoch: [200]  [1200/1251]  eta: 0:00:10  lr: 0.001114  min_lr: 0.001114  loss: 2.8117 (2.9915)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0273 (1.0476)  time: 0.1903  data: 0.0014  max mem: 9147
Epoch: [200]  [1250/1251]  eta: 0:00:00  lr: 0.001113  min_lr: 0.001113  loss: 2.4553 (2.9924)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9045 (1.0430)  time: 0.1460  data: 0.0014  max mem: 9147
Epoch: [200] Total time: 0:04:23 (0.2103 s / it)
Averaged stats: lr: 0.001113  min_lr: 0.001113  loss: 2.4553 (2.9745)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9045 (1.0430)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.7081 (0.7081)  acc1: 84.8000 (84.8000)  acc5: 98.4000 (98.4000)  time: 5.6444  data: 5.5641  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8612 (0.8950)  acc1: 80.8000 (80.5091)  acc5: 96.0000 (95.6364)  time: 0.7827  data: 0.6901  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1393 (1.0965)  acc1: 76.0000 (76.8000)  acc5: 92.0000 (93.5048)  time: 0.2151  data: 0.1329  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2562 (1.1107)  acc1: 74.8000 (76.2560)  acc5: 92.0000 (93.3600)  time: 0.2109  data: 0.1329  max mem: 9147
Test: Total time: 0:00:10 (0.4120 s / it)
* Acc@1 76.134 Acc@5 93.300 loss 1.118
Accuracy of the model on the 50000 test images: 76.1%
Max accuracy: 76.21%
Epoch: [201]  [   0/1251]  eta: 1:05:10  lr: 0.001113  min_lr: 0.001113  loss: 3.9361 (3.9361)  weight_decay: 0.0500 (0.0500)  time: 3.1258  data: 2.3210  max mem: 9147
Epoch: [201]  [ 200/1251]  eta: 0:03:58  lr: 0.001110  min_lr: 0.001110  loss: 2.4355 (2.9682)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0915 (1.0939)  time: 0.2369  data: 0.0007  max mem: 9147
Epoch: [201]  [ 400/1251]  eta: 0:03:06  lr: 0.001106  min_lr: 0.001106  loss: 3.2940 (3.0337)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0996 (1.1095)  time: 0.2099  data: 0.0067  max mem: 9147
Epoch: [201]  [ 600/1251]  eta: 0:02:21  lr: 0.001103  min_lr: 0.001103  loss: 2.5320 (3.0086)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9361 (1.0623)  time: 0.2195  data: 0.0158  max mem: 9147
Epoch: [201]  [ 800/1251]  eta: 0:01:36  lr: 0.001100  min_lr: 0.001100  loss: 3.3922 (3.0107)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0142 (1.0362)  time: 0.1908  data: 0.0005  max mem: 9147
Epoch: [201]  [1000/1251]  eta: 0:00:53  lr: 0.001097  min_lr: 0.001097  loss: 3.0036 (3.0058)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9843 (1.0401)  time: 0.1992  data: 0.0007  max mem: 9147
Epoch: [201]  [1200/1251]  eta: 0:00:10  lr: 0.001094  min_lr: 0.001094  loss: 2.9179 (2.9932)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8503 (1.0276)  time: 0.2050  data: 0.0008  max mem: 9147
Epoch: [201]  [1250/1251]  eta: 0:00:00  lr: 0.001093  min_lr: 0.001093  loss: 3.3051 (2.9956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9663 (1.0293)  time: 0.1533  data: 0.0014  max mem: 9147
Epoch: [201] Total time: 0:04:26 (0.2126 s / it)
Averaged stats: lr: 0.001093  min_lr: 0.001093  loss: 3.3051 (2.9740)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9663 (1.0293)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.8319 (0.8319)  acc1: 86.4000 (86.4000)  acc5: 98.0000 (98.0000)  time: 5.6907  data: 5.5898  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9699 (0.9769)  acc1: 80.8000 (80.3273)  acc5: 96.0000 (95.8546)  time: 0.7587  data: 0.6734  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1918 (1.1675)  acc1: 74.4000 (76.4571)  acc5: 92.8000 (93.3524)  time: 0.2030  data: 0.1241  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2989 (1.1808)  acc1: 73.2000 (76.0160)  acc5: 91.6000 (93.1840)  time: 0.2015  data: 0.1240  max mem: 9147
Test: Total time: 0:00:10 (0.4055 s / it)
* Acc@1 76.026 Acc@5 93.240 loss 1.178
Accuracy of the model on the 50000 test images: 76.0%
Max accuracy: 76.21%
Epoch: [202]  [   0/1251]  eta: 1:04:52  lr: 0.001093  min_lr: 0.001093  loss: 4.0515 (4.0515)  weight_decay: 0.0500 (0.0500)  time: 3.1119  data: 2.7804  max mem: 9147
Epoch: [202]  [ 200/1251]  eta: 0:03:53  lr: 0.001090  min_lr: 0.001090  loss: 2.9457 (2.9242)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0651 (1.0667)  time: 0.1777  data: 0.0006  max mem: 9147
Epoch: [202]  [ 400/1251]  eta: 0:02:57  lr: 0.001086  min_lr: 0.001086  loss: 2.3799 (2.9140)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9729 (1.0560)  time: 0.1998  data: 0.0010  max mem: 9147
Epoch: [202]  [ 600/1251]  eta: 0:02:10  lr: 0.001083  min_lr: 0.001083  loss: 2.4746 (2.9423)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9916 (1.0666)  time: 0.1799  data: 0.0005  max mem: 9147
Epoch: [202]  [ 800/1251]  eta: 0:01:28  lr: 0.001080  min_lr: 0.001080  loss: 3.1594 (2.9327)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0718 (1.0518)  time: 0.1962  data: 0.0006  max mem: 9147
Epoch: [202]  [1000/1251]  eta: 0:00:49  lr: 0.001077  min_lr: 0.001077  loss: 2.6477 (2.9403)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9529 (1.0524)  time: 0.2006  data: 0.0007  max mem: 9147
Epoch: [202]  [1200/1251]  eta: 0:00:10  lr: 0.001074  min_lr: 0.001074  loss: 2.9374 (2.9517)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0932 (1.0582)  time: 0.1999  data: 0.0007  max mem: 9147
Epoch: [202]  [1250/1251]  eta: 0:00:00  lr: 0.001073  min_lr: 0.001073  loss: 2.5370 (2.9519)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1207 (1.0632)  time: 0.1415  data: 0.0009  max mem: 9147
Epoch: [202] Total time: 0:04:12 (0.2017 s / it)
Averaged stats: lr: 0.001073  min_lr: 0.001073  loss: 2.5370 (2.9556)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1207 (1.0632)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.7027 (0.7027)  acc1: 85.2000 (85.2000)  acc5: 97.6000 (97.6000)  time: 5.6138  data: 5.5333  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8854 (0.9042)  acc1: 81.6000 (80.8727)  acc5: 96.0000 (95.8909)  time: 0.6978  data: 0.6225  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1344 (1.1101)  acc1: 75.2000 (76.7429)  acc5: 92.8000 (93.6762)  time: 0.1825  data: 0.1093  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2338 (1.1220)  acc1: 75.2000 (76.3360)  acc5: 92.4000 (93.7280)  time: 0.2012  data: 0.1296  max mem: 9147
Test: Total time: 0:00:10 (0.4013 s / it)
* Acc@1 76.448 Acc@5 93.370 loss 1.124
Accuracy of the model on the 50000 test images: 76.4%
Max accuracy: 76.45%
Epoch: [203]  [   0/1251]  eta: 0:57:40  lr: 0.001073  min_lr: 0.001073  loss: 2.0353 (2.0353)  weight_decay: 0.0500 (0.0500)  time: 2.7665  data: 2.4957  max mem: 9147
Epoch: [203]  [ 200/1251]  eta: 0:03:57  lr: 0.001070  min_lr: 0.001070  loss: 2.2664 (2.9358)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9648 (1.0730)  time: 0.2247  data: 0.0006  max mem: 9147
Epoch: [203]  [ 400/1251]  eta: 0:02:59  lr: 0.001066  min_lr: 0.001066  loss: 2.5316 (2.9647)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0548 (1.0892)  time: 0.1900  data: 0.0011  max mem: 9147
Epoch: [203]  [ 600/1251]  eta: 0:02:14  lr: 0.001063  min_lr: 0.001063  loss: 2.4383 (2.9617)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9061 (1.0605)  time: 0.2130  data: 0.0007  max mem: 9147
Epoch: [203]  [ 800/1251]  eta: 0:01:34  lr: 0.001060  min_lr: 0.001060  loss: 2.9178 (2.9763)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0173 (1.0510)  time: 0.2065  data: 0.0007  max mem: 9147
Epoch: [203]  [1000/1251]  eta: 0:00:52  lr: 0.001057  min_lr: 0.001057  loss: 2.3077 (2.9463)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0541 (1.0529)  time: 0.1916  data: 0.0006  max mem: 9147
Epoch: [203]  [1200/1251]  eta: 0:00:10  lr: 0.001054  min_lr: 0.001054  loss: 2.6076 (2.9592)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9872 (1.0423)  time: 0.2197  data: 0.0117  max mem: 9147
Epoch: [203]  [1250/1251]  eta: 0:00:00  lr: 0.001053  min_lr: 0.001053  loss: 2.5628 (2.9606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9305 (1.0376)  time: 0.1456  data: 0.0057  max mem: 9147
Epoch: [203] Total time: 0:04:24 (0.2116 s / it)
Averaged stats: lr: 0.001053  min_lr: 0.001053  loss: 2.5628 (2.9487)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9305 (1.0376)
Test:  [ 0/25]  eta: 0:02:17  loss: 0.7133 (0.7133)  acc1: 87.2000 (87.2000)  acc5: 98.4000 (98.4000)  time: 5.4849  data: 5.3783  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8760 (0.9198)  acc1: 82.8000 (81.0909)  acc5: 96.0000 (95.8545)  time: 0.7543  data: 0.6769  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1811 (1.1109)  acc1: 73.2000 (76.9524)  acc5: 93.2000 (93.5810)  time: 0.2130  data: 0.1398  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2282 (1.1281)  acc1: 74.4000 (76.5440)  acc5: 92.0000 (93.4400)  time: 0.2343  data: 0.1621  max mem: 9147
Test: Total time: 0:00:10 (0.4218 s / it)
* Acc@1 76.408 Acc@5 93.332 loss 1.128
Accuracy of the model on the 50000 test images: 76.4%
Max accuracy: 76.45%
Epoch: [204]  [   0/1251]  eta: 1:07:00  lr: 0.001053  min_lr: 0.001053  loss: 2.1476 (2.1476)  weight_decay: 0.0500 (0.0500)  time: 3.2142  data: 1.6678  max mem: 9147
Epoch: [204]  [ 200/1251]  eta: 0:03:57  lr: 0.001050  min_lr: 0.001050  loss: 2.3959 (2.8908)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0644 (1.1478)  time: 0.2138  data: 0.0021  max mem: 9147
Epoch: [204]  [ 400/1251]  eta: 0:03:09  lr: 0.001047  min_lr: 0.001047  loss: 2.5880 (2.9070)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0613 (1.1526)  time: 0.2305  data: 0.0007  max mem: 9147
Epoch: [204]  [ 600/1251]  eta: 0:02:24  lr: 0.001044  min_lr: 0.001044  loss: 2.8463 (2.9312)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0478 (1.1342)  time: 0.2352  data: 0.0006  max mem: 9147
Epoch: [204]  [ 800/1251]  eta: 0:01:38  lr: 0.001040  min_lr: 0.001040  loss: 2.6505 (2.9301)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9643 (1.1137)  time: 0.2051  data: 0.0006  max mem: 9147
Epoch: [204]  [1000/1251]  eta: 0:00:54  lr: 0.001037  min_lr: 0.001037  loss: 2.4113 (2.9302)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0399 (1.0963)  time: 0.2045  data: 0.0006  max mem: 9147
Epoch: [204]  [1200/1251]  eta: 0:00:11  lr: 0.001034  min_lr: 0.001034  loss: 3.4288 (2.9227)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9855 (nan)  time: 0.2297  data: 0.0006  max mem: 9147
Epoch: [204]  [1250/1251]  eta: 0:00:00  lr: 0.001033  min_lr: 0.001033  loss: 2.6315 (2.9255)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9171 (nan)  time: 0.1431  data: 0.0016  max mem: 9147
Epoch: [204] Total time: 0:04:30 (0.2163 s / it)
Averaged stats: lr: 0.001033  min_lr: 0.001033  loss: 2.6315 (2.9472)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9171 (nan)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.7480 (0.7480)  acc1: 87.2000 (87.2000)  acc5: 98.8000 (98.8000)  time: 5.5511  data: 5.4585  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8635 (0.9202)  acc1: 83.6000 (80.8000)  acc5: 96.8000 (96.1818)  time: 0.7520  data: 0.6544  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1876 (1.1156)  acc1: 74.4000 (76.8762)  acc5: 92.4000 (93.4095)  time: 0.2125  data: 0.1279  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2492 (1.1306)  acc1: 74.4000 (76.4480)  acc5: 91.6000 (93.2160)  time: 0.2065  data: 0.1278  max mem: 9147
Test: Total time: 0:00:10 (0.4068 s / it)
* Acc@1 76.298 Acc@5 93.298 loss 1.132
Accuracy of the model on the 50000 test images: 76.3%
Max accuracy: 76.45%
Epoch: [205]  [   0/1251]  eta: 0:59:20  lr: 0.001033  min_lr: 0.001033  loss: 3.7081 (3.7081)  weight_decay: 0.0500 (0.0500)  time: 2.8464  data: 1.6336  max mem: 9147
Epoch: [205]  [ 200/1251]  eta: 0:03:54  lr: 0.001030  min_lr: 0.001030  loss: 2.4942 (2.9468)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8816 (0.9983)  time: 0.2082  data: 0.0006  max mem: 9147
Epoch: [205]  [ 400/1251]  eta: 0:02:56  lr: 0.001027  min_lr: 0.001027  loss: 2.5095 (2.9695)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9408 (1.0384)  time: 0.1949  data: 0.0005  max mem: 9147
Epoch: [205]  [ 600/1251]  eta: 0:02:14  lr: 0.001024  min_lr: 0.001024  loss: 2.5126 (2.9433)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0225 (1.0815)  time: 0.2149  data: 0.0007  max mem: 9147
Epoch: [205]  [ 800/1251]  eta: 0:01:33  lr: 0.001021  min_lr: 0.001021  loss: 2.5196 (2.9269)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1143 (1.0853)  time: 0.1841  data: 0.0005  max mem: 9147
Epoch: [205]  [1000/1251]  eta: 0:00:51  lr: 0.001018  min_lr: 0.001018  loss: 2.4867 (2.9315)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0679 (1.0842)  time: 0.1737  data: 0.0005  max mem: 9147
Epoch: [205]  [1200/1251]  eta: 0:00:10  lr: 0.001014  min_lr: 0.001014  loss: 2.8664 (2.9281)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9952 (1.0841)  time: 0.1940  data: 0.0011  max mem: 9147
Epoch: [205]  [1250/1251]  eta: 0:00:00  lr: 0.001014  min_lr: 0.001014  loss: 2.5081 (2.9250)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9161 (1.0832)  time: 0.1391  data: 0.0014  max mem: 9147
Epoch: [205] Total time: 0:04:12 (0.2022 s / it)
Averaged stats: lr: 0.001014  min_lr: 0.001014  loss: 2.5081 (2.9460)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9161 (1.0832)
Test:  [ 0/25]  eta: 0:02:25  loss: 0.6692 (0.6692)  acc1: 86.0000 (86.0000)  acc5: 98.4000 (98.4000)  time: 5.8274  data: 5.7321  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8880 (0.8791)  acc1: 81.2000 (80.5818)  acc5: 95.6000 (95.8909)  time: 0.7705  data: 0.6958  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1152 (1.0831)  acc1: 75.2000 (76.5143)  acc5: 93.6000 (93.6952)  time: 0.2046  data: 0.1329  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2093 (1.0955)  acc1: 73.2000 (76.1760)  acc5: 92.4000 (93.6160)  time: 0.2033  data: 0.1328  max mem: 9147
Test: Total time: 0:00:10 (0.4109 s / it)
* Acc@1 76.612 Acc@5 93.434 loss 1.092
Accuracy of the model on the 50000 test images: 76.6%
Max accuracy: 76.61%
Epoch: [206]  [   0/1251]  eta: 1:01:23  lr: 0.001014  min_lr: 0.001014  loss: 1.9847 (1.9847)  weight_decay: 0.0500 (0.0500)  time: 2.9446  data: 2.7519  max mem: 9147
Epoch: [206]  [ 200/1251]  eta: 0:03:54  lr: 0.001011  min_lr: 0.001011  loss: 2.3922 (2.8779)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8581 (0.9955)  time: 0.2098  data: 0.0007  max mem: 9147
Epoch: [206]  [ 400/1251]  eta: 0:02:59  lr: 0.001007  min_lr: 0.001007  loss: 2.7481 (2.9479)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1419 (1.0431)  time: 0.1895  data: 0.0012  max mem: 9147
Epoch: [206]  [ 600/1251]  eta: 0:02:17  lr: 0.001004  min_lr: 0.001004  loss: 2.2216 (2.9207)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1469 (1.0621)  time: 0.2290  data: 0.0006  max mem: 9147
Epoch: [206]  [ 800/1251]  eta: 0:01:34  lr: 0.001001  min_lr: 0.001001  loss: 2.5067 (2.9219)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0966 (1.0783)  time: 0.1949  data: 0.0006  max mem: 9147
Epoch: [206]  [1000/1251]  eta: 0:00:52  lr: 0.000998  min_lr: 0.000998  loss: 2.4122 (2.9403)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9449 (1.0624)  time: 0.2143  data: 0.0128  max mem: 9147
Epoch: [206]  [1200/1251]  eta: 0:00:10  lr: 0.000995  min_lr: 0.000995  loss: 2.3672 (2.9312)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9801 (1.0539)  time: 0.2334  data: 0.0007  max mem: 9147
Epoch: [206]  [1250/1251]  eta: 0:00:00  lr: 0.000994  min_lr: 0.000994  loss: 2.3732 (2.9322)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0059 (1.0558)  time: 0.1422  data: 0.0014  max mem: 9147
Epoch: [206] Total time: 0:04:21 (0.2092 s / it)
Averaged stats: lr: 0.000994  min_lr: 0.000994  loss: 2.3732 (2.9595)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0059 (1.0558)
Test:  [ 0/25]  eta: 0:02:25  loss: 0.7046 (0.7046)  acc1: 86.0000 (86.0000)  acc5: 97.2000 (97.2000)  time: 5.8255  data: 5.7451  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9100 (0.8742)  acc1: 83.6000 (81.7818)  acc5: 96.4000 (95.8545)  time: 0.7173  data: 0.6249  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0647 (1.0642)  acc1: 74.8000 (77.3333)  acc5: 92.4000 (93.6191)  time: 0.1792  data: 0.0978  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2252 (1.0812)  acc1: 74.4000 (76.8160)  acc5: 92.4000 (93.6000)  time: 0.1965  data: 0.1202  max mem: 9147
Test: Total time: 0:00:10 (0.4085 s / it)
* Acc@1 76.678 Acc@5 93.518 loss 1.081
Accuracy of the model on the 50000 test images: 76.7%
Max accuracy: 76.68%
Epoch: [207]  [   0/1251]  eta: 1:08:41  lr: 0.000994  min_lr: 0.000994  loss: 2.1484 (2.1484)  weight_decay: 0.0500 (0.0500)  time: 3.2948  data: 3.1079  max mem: 9147
Epoch: [207]  [ 200/1251]  eta: 0:03:30  lr: 0.000991  min_lr: 0.000991  loss: 2.4882 (2.8295)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9065 (1.0071)  time: 0.1887  data: 0.0006  max mem: 9147
Epoch: [207]  [ 400/1251]  eta: 0:02:47  lr: 0.000988  min_lr: 0.000988  loss: 2.8080 (2.8534)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9913 (1.0630)  time: 0.2001  data: 0.0006  max mem: 9147
Epoch: [207]  [ 600/1251]  eta: 0:02:11  lr: 0.000985  min_lr: 0.000985  loss: 2.5367 (2.9048)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9615 (1.0346)  time: 0.2243  data: 0.0006  max mem: 9147
Epoch: [207]  [ 800/1251]  eta: 0:01:32  lr: 0.000982  min_lr: 0.000982  loss: 2.5315 (2.9382)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9390 (1.0302)  time: 0.2193  data: 0.0015  max mem: 9147
Epoch: [207]  [1000/1251]  eta: 0:00:51  lr: 0.000979  min_lr: 0.000979  loss: 2.3634 (2.9392)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1142 (1.0487)  time: 0.2143  data: 0.0006  max mem: 9147
Epoch: [207]  [1200/1251]  eta: 0:00:10  lr: 0.000976  min_lr: 0.000976  loss: 2.9367 (2.9610)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0013 (1.0491)  time: 0.2052  data: 0.0008  max mem: 9147
Epoch: [207]  [1250/1251]  eta: 0:00:00  lr: 0.000975  min_lr: 0.000975  loss: 2.3364 (2.9557)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0812 (1.0526)  time: 0.1529  data: 0.0013  max mem: 9147
Epoch: [207] Total time: 0:04:19 (0.2078 s / it)
Averaged stats: lr: 0.000975  min_lr: 0.000975  loss: 2.3364 (2.9389)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0812 (1.0526)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.7779 (0.7779)  acc1: 86.8000 (86.8000)  acc5: 98.0000 (98.0000)  time: 5.6618  data: 5.5814  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8931 (0.9298)  acc1: 82.8000 (81.0545)  acc5: 96.4000 (96.1091)  time: 0.7287  data: 0.6533  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1892 (1.1321)  acc1: 74.4000 (76.6095)  acc5: 92.4000 (93.3714)  time: 0.1962  data: 0.1242  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2984 (1.1468)  acc1: 73.6000 (76.2080)  acc5: 91.6000 (93.2960)  time: 0.2007  data: 0.1297  max mem: 9147
Test: Total time: 0:00:10 (0.4045 s / it)
* Acc@1 76.524 Acc@5 93.384 loss 1.140
Accuracy of the model on the 50000 test images: 76.5%
Max accuracy: 76.68%
Epoch: [208]  [   0/1251]  eta: 1:06:58  lr: 0.000975  min_lr: 0.000975  loss: 2.4659 (2.4659)  weight_decay: 0.0500 (0.0500)  time: 3.2119  data: 1.6931  max mem: 9147
Epoch: [208]  [ 200/1251]  eta: 0:03:45  lr: 0.000972  min_lr: 0.000972  loss: 2.4491 (2.9575)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0732 (1.0227)  time: 0.1854  data: 0.0024  max mem: 9147
Epoch: [208]  [ 400/1251]  eta: 0:02:58  lr: 0.000969  min_lr: 0.000969  loss: 2.3671 (2.9026)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0318 (1.0910)  time: 0.2023  data: 0.0006  max mem: 9147
Epoch: [208]  [ 600/1251]  eta: 0:02:14  lr: 0.000966  min_lr: 0.000966  loss: 2.7444 (2.8957)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0705 (1.1035)  time: 0.2002  data: 0.0005  max mem: 9147
Epoch: [208]  [ 800/1251]  eta: 0:01:31  lr: 0.000963  min_lr: 0.000963  loss: 2.3222 (2.8964)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8554 (1.1099)  time: 0.2025  data: 0.0005  max mem: 9147
Epoch: [208]  [1000/1251]  eta: 0:00:50  lr: 0.000960  min_lr: 0.000960  loss: 2.6446 (2.8907)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9358 (1.0861)  time: 0.2006  data: 0.0006  max mem: 9147
Epoch: [208]  [1200/1251]  eta: 0:00:10  lr: 0.000956  min_lr: 0.000956  loss: 2.4603 (2.8985)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0122 (1.0827)  time: 0.2110  data: 0.0007  max mem: 9147
Epoch: [208]  [1250/1251]  eta: 0:00:00  lr: 0.000956  min_lr: 0.000956  loss: 2.8348 (2.9086)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1895 (1.0904)  time: 0.1435  data: 0.0013  max mem: 9147
Epoch: [208] Total time: 0:04:13 (0.2022 s / it)
Averaged stats: lr: 0.000956  min_lr: 0.000956  loss: 2.8348 (2.9415)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1895 (1.0904)
Test:  [ 0/25]  eta: 0:02:17  loss: 0.8028 (0.8028)  acc1: 85.2000 (85.2000)  acc5: 98.0000 (98.0000)  time: 5.4921  data: 5.4117  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9459 (0.9758)  acc1: 82.4000 (80.6182)  acc5: 96.4000 (96.1455)  time: 0.7298  data: 0.6341  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2095 (1.1711)  acc1: 74.4000 (76.3810)  acc5: 92.8000 (93.7143)  time: 0.2053  data: 0.1203  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2993 (1.1892)  acc1: 74.4000 (75.7280)  acc5: 92.0000 (93.3600)  time: 0.2047  data: 0.1256  max mem: 9147
Test: Total time: 0:00:10 (0.4050 s / it)
* Acc@1 76.378 Acc@5 93.432 loss 1.177
Accuracy of the model on the 50000 test images: 76.4%
Max accuracy: 76.68%
Epoch: [209]  [   0/1251]  eta: 1:07:18  lr: 0.000956  min_lr: 0.000956  loss: 2.1364 (2.1364)  weight_decay: 0.0500 (0.0500)  time: 3.2281  data: 2.9746  max mem: 9147
Epoch: [209]  [ 200/1251]  eta: 0:03:56  lr: 0.000953  min_lr: 0.000953  loss: 2.9362 (2.9517)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.2102  data: 0.0006  max mem: 9147
Epoch: [209]  [ 400/1251]  eta: 0:02:59  lr: 0.000950  min_lr: 0.000950  loss: 2.5340 (2.8869)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9874 (nan)  time: 0.1925  data: 0.0005  max mem: 9147
Epoch: [209]  [ 600/1251]  eta: 0:02:13  lr: 0.000947  min_lr: 0.000947  loss: 2.3272 (2.8912)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1458 (nan)  time: 0.1987  data: 0.0011  max mem: 9147
Epoch: [209]  [ 800/1251]  eta: 0:01:33  lr: 0.000944  min_lr: 0.000944  loss: 2.8002 (2.9087)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0043 (nan)  time: 0.2161  data: 0.0007  max mem: 9147
Epoch: [209]  [1000/1251]  eta: 0:00:52  lr: 0.000940  min_lr: 0.000940  loss: 2.5070 (2.9118)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9918 (nan)  time: 0.2097  data: 0.0012  max mem: 9147
Epoch: [209]  [1200/1251]  eta: 0:00:10  lr: 0.000937  min_lr: 0.000937  loss: 2.3489 (2.9234)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0717 (nan)  time: 0.2147  data: 0.0006  max mem: 9147
Epoch: [209]  [1250/1251]  eta: 0:00:00  lr: 0.000937  min_lr: 0.000937  loss: 2.3559 (2.9272)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9835 (nan)  time: 0.1393  data: 0.0012  max mem: 9147
Epoch: [209] Total time: 0:04:20 (0.2082 s / it)
Averaged stats: lr: 0.000937  min_lr: 0.000937  loss: 2.3559 (2.9433)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9835 (nan)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.7028 (0.7028)  acc1: 88.0000 (88.0000)  acc5: 98.4000 (98.4000)  time: 5.5435  data: 5.4408  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8980 (0.9062)  acc1: 81.6000 (81.3455)  acc5: 96.0000 (96.1455)  time: 0.7231  data: 0.6479  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1584 (1.0950)  acc1: 74.4000 (77.3143)  acc5: 93.2000 (93.6381)  time: 0.2056  data: 0.1338  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2404 (1.1072)  acc1: 74.0000 (76.9280)  acc5: 93.2000 (93.6000)  time: 0.2041  data: 0.1337  max mem: 9147
Test: Total time: 0:00:10 (0.4053 s / it)
* Acc@1 76.754 Acc@5 93.556 loss 1.105
Accuracy of the model on the 50000 test images: 76.8%
Max accuracy: 76.75%
Epoch: [210]  [   0/1251]  eta: 1:08:30  lr: 0.000937  min_lr: 0.000937  loss: 3.8441 (3.8441)  weight_decay: 0.0500 (0.0500)  time: 3.2861  data: 3.1049  max mem: 9147
Epoch: [210]  [ 200/1251]  eta: 0:03:37  lr: 0.000934  min_lr: 0.000934  loss: 2.3452 (2.8445)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9073 (1.0078)  time: 0.2000  data: 0.0006  max mem: 9147
Epoch: [210]  [ 400/1251]  eta: 0:02:54  lr: 0.000931  min_lr: 0.000931  loss: 2.7201 (2.8889)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9776 (1.0443)  time: 0.2205  data: 0.0006  max mem: 9147
Epoch: [210]  [ 600/1251]  eta: 0:02:14  lr: 0.000928  min_lr: 0.000928  loss: 2.6550 (2.9098)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0481 (1.0978)  time: 0.2133  data: 0.0007  max mem: 9147
Epoch: [210]  [ 800/1251]  eta: 0:01:34  lr: 0.000925  min_lr: 0.000925  loss: 2.4261 (2.9208)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0661 (1.1098)  time: 0.2108  data: 0.0058  max mem: 9147
Epoch: [210]  [1000/1251]  eta: 0:00:52  lr: 0.000922  min_lr: 0.000922  loss: 2.5025 (2.9190)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9918 (1.1107)  time: 0.1998  data: 0.0006  max mem: 9147
Epoch: [210]  [1200/1251]  eta: 0:00:10  lr: 0.000918  min_lr: 0.000918  loss: 2.4337 (2.9258)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0685 (1.1035)  time: 0.2197  data: 0.0006  max mem: 9147
Epoch: [210]  [1250/1251]  eta: 0:00:00  lr: 0.000918  min_lr: 0.000918  loss: 2.3521 (2.9238)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1811 (1.1123)  time: 0.1452  data: 0.0010  max mem: 9147
Epoch: [210] Total time: 0:04:21 (0.2087 s / it)
Averaged stats: lr: 0.000918  min_lr: 0.000918  loss: 2.3521 (2.9430)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1811 (1.1123)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.7511 (0.7511)  acc1: 87.2000 (87.2000)  acc5: 98.4000 (98.4000)  time: 5.5495  data: 5.4225  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9256 (0.9314)  acc1: 81.6000 (81.5636)  acc5: 96.4000 (96.1818)  time: 0.7080  data: 0.6283  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1598 (1.1410)  acc1: 74.8000 (76.8571)  acc5: 93.6000 (93.8286)  time: 0.1850  data: 0.1101  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3217 (1.1520)  acc1: 73.2000 (76.3680)  acc5: 92.8000 (93.8240)  time: 0.2285  data: 0.1564  max mem: 9147
Test: Total time: 0:00:10 (0.4348 s / it)
* Acc@1 76.630 Acc@5 93.576 loss 1.148
Accuracy of the model on the 50000 test images: 76.6%
Max accuracy: 76.75%
Epoch: [211]  [   0/1251]  eta: 1:02:52  lr: 0.000918  min_lr: 0.000918  loss: 2.1201 (2.1201)  weight_decay: 0.0500 (0.0500)  time: 3.0156  data: 1.7310  max mem: 9147
Epoch: [211]  [ 200/1251]  eta: 0:03:49  lr: 0.000915  min_lr: 0.000915  loss: 2.3818 (2.9272)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1806 (1.2082)  time: 0.2094  data: 0.0009  max mem: 9147
Epoch: [211]  [ 400/1251]  eta: 0:03:04  lr: 0.000912  min_lr: 0.000912  loss: 2.3938 (2.9137)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0352 (1.1349)  time: 0.2161  data: 0.0072  max mem: 9147
Epoch: [211]  [ 600/1251]  eta: 0:02:19  lr: 0.000909  min_lr: 0.000909  loss: 2.9550 (2.9099)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0334 (1.1147)  time: 0.1941  data: 0.0019  max mem: 9147
Epoch: [211]  [ 800/1251]  eta: 0:01:33  lr: 0.000906  min_lr: 0.000906  loss: 2.8561 (2.9361)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2954 (1.1341)  time: 0.1875  data: 0.0005  max mem: 9147
Epoch: [211]  [1000/1251]  eta: 0:00:51  lr: 0.000903  min_lr: 0.000903  loss: 2.4164 (2.9240)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9421 (1.1177)  time: 0.1910  data: 0.0007  max mem: 9147
Epoch: [211]  [1200/1251]  eta: 0:00:10  lr: 0.000900  min_lr: 0.000900  loss: 2.2919 (2.9332)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0181 (1.0983)  time: 0.1999  data: 0.0006  max mem: 9147
Epoch: [211]  [1250/1251]  eta: 0:00:00  lr: 0.000899  min_lr: 0.000899  loss: 2.5652 (2.9339)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9926 (1.0963)  time: 0.1474  data: 0.0012  max mem: 9147
Epoch: [211] Total time: 0:04:12 (0.2020 s / it)
Averaged stats: lr: 0.000899  min_lr: 0.000899  loss: 2.5652 (2.9293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9926 (1.0963)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.7133 (0.7133)  acc1: 85.6000 (85.6000)  acc5: 98.8000 (98.8000)  time: 5.5340  data: 5.4537  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9023 (0.9299)  acc1: 81.6000 (80.8364)  acc5: 96.4000 (96.1091)  time: 0.7133  data: 0.6286  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1736 (1.1187)  acc1: 74.8000 (76.8381)  acc5: 93.2000 (93.7905)  time: 0.1940  data: 0.1158  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2330 (1.1289)  acc1: 74.8000 (76.6080)  acc5: 92.8000 (93.6640)  time: 0.1888  data: 0.1149  max mem: 9147
Test: Total time: 0:00:09 (0.3940 s / it)
* Acc@1 76.768 Acc@5 93.468 loss 1.126
Accuracy of the model on the 50000 test images: 76.8%
Max accuracy: 76.77%
Epoch: [212]  [   0/1251]  eta: 0:56:08  lr: 0.000899  min_lr: 0.000899  loss: 3.5771 (3.5771)  weight_decay: 0.0500 (0.0500)  time: 2.6926  data: 2.0561  max mem: 9147
Epoch: [212]  [ 200/1251]  eta: 0:03:53  lr: 0.000896  min_lr: 0.000896  loss: 2.2871 (2.7865)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9805 (1.0436)  time: 0.2099  data: 0.0070  max mem: 9147
Epoch: [212]  [ 400/1251]  eta: 0:03:06  lr: 0.000893  min_lr: 0.000893  loss: 2.4064 (2.8413)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0033 (1.0558)  time: 0.2303  data: 0.0007  max mem: 9147
Epoch: [212]  [ 600/1251]  eta: 0:02:19  lr: 0.000890  min_lr: 0.000890  loss: 3.1356 (2.9220)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0312 (1.0828)  time: 0.1909  data: 0.0006  max mem: 9147
Epoch: [212]  [ 800/1251]  eta: 0:01:34  lr: 0.000887  min_lr: 0.000887  loss: 2.3112 (2.9285)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9782 (1.0761)  time: 0.2031  data: 0.0009  max mem: 9147
Epoch: [212]  [1000/1251]  eta: 0:00:52  lr: 0.000884  min_lr: 0.000884  loss: 3.0619 (2.9409)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0609 (1.0874)  time: 0.1993  data: 0.0011  max mem: 9147
Epoch: [212]  [1200/1251]  eta: 0:00:10  lr: 0.000881  min_lr: 0.000881  loss: 2.9876 (2.9446)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1250 (1.0863)  time: 0.2050  data: 0.0006  max mem: 9147
Epoch: [212]  [1250/1251]  eta: 0:00:00  lr: 0.000880  min_lr: 0.000880  loss: 2.3791 (2.9417)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0226 (1.0812)  time: 0.1448  data: 0.0009  max mem: 9147
Epoch: [212] Total time: 0:04:21 (0.2094 s / it)
Averaged stats: lr: 0.000880  min_lr: 0.000880  loss: 2.3791 (2.9091)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0226 (1.0812)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.7483 (0.7483)  acc1: 87.6000 (87.6000)  acc5: 98.4000 (98.4000)  time: 5.6753  data: 5.5949  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9695 (0.9698)  acc1: 82.0000 (81.4182)  acc5: 96.0000 (95.8182)  time: 0.7453  data: 0.6709  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2077 (1.1591)  acc1: 74.8000 (77.1048)  acc5: 93.2000 (93.6381)  time: 0.1982  data: 0.1267  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3040 (1.1727)  acc1: 74.4000 (76.6720)  acc5: 92.0000 (93.5680)  time: 0.1969  data: 0.1266  max mem: 9147
Test: Total time: 0:00:09 (0.3997 s / it)
* Acc@1 76.836 Acc@5 93.472 loss 1.167
Accuracy of the model on the 50000 test images: 76.8%
Max accuracy: 76.84%
Epoch: [213]  [   0/1251]  eta: 0:59:10  lr: 0.000880  min_lr: 0.000880  loss: 3.7725 (3.7725)  weight_decay: 0.0500 (0.0500)  time: 2.8382  data: 2.6357  max mem: 9147
Epoch: [213]  [ 200/1251]  eta: 0:03:57  lr: 0.000877  min_lr: 0.000877  loss: 2.3820 (2.9099)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1506 (1.1426)  time: 0.2103  data: 0.0023  max mem: 9147
Epoch: [213]  [ 400/1251]  eta: 0:02:57  lr: 0.000874  min_lr: 0.000874  loss: 3.5305 (2.9213)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0555 (1.0887)  time: 0.1883  data: 0.0005  max mem: 9147
Epoch: [213]  [ 600/1251]  eta: 0:02:14  lr: 0.000871  min_lr: 0.000871  loss: 2.4641 (2.9108)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1537 (1.0927)  time: 0.2401  data: 0.0136  max mem: 9147
Epoch: [213]  [ 800/1251]  eta: 0:01:34  lr: 0.000868  min_lr: 0.000868  loss: 2.5765 (2.8987)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1270 (1.1138)  time: 0.1960  data: 0.0006  max mem: 9147
Epoch: [213]  [1000/1251]  eta: 0:00:51  lr: 0.000865  min_lr: 0.000865  loss: 2.3509 (2.9089)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1444 (1.1189)  time: 0.2044  data: 0.0008  max mem: 9147
Epoch: [213]  [1200/1251]  eta: 0:00:10  lr: 0.000863  min_lr: 0.000863  loss: 3.5906 (2.8971)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0663 (1.1298)  time: 0.2096  data: 0.0006  max mem: 9147
Epoch: [213]  [1250/1251]  eta: 0:00:00  lr: 0.000862  min_lr: 0.000862  loss: 2.3819 (2.8989)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9631 (1.1227)  time: 0.1415  data: 0.0009  max mem: 9147
Epoch: [213] Total time: 0:04:21 (0.2087 s / it)
Averaged stats: lr: 0.000862  min_lr: 0.000862  loss: 2.3819 (2.9204)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9631 (1.1227)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.7044 (0.7044)  acc1: 86.0000 (86.0000)  acc5: 98.0000 (98.0000)  time: 5.4737  data: 5.3849  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8730 (0.9086)  acc1: 82.0000 (81.3455)  acc5: 96.4000 (95.8546)  time: 0.7673  data: 0.6707  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1497 (1.0924)  acc1: 74.4000 (77.2762)  acc5: 92.0000 (93.5429)  time: 0.2171  data: 0.1330  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2231 (1.1074)  acc1: 74.0000 (76.9920)  acc5: 92.0000 (93.5680)  time: 0.2110  data: 0.1329  max mem: 9147
Test: Total time: 0:00:10 (0.4083 s / it)
* Acc@1 76.918 Acc@5 93.430 loss 1.106
Accuracy of the model on the 50000 test images: 76.9%
Max accuracy: 76.92%
Epoch: [214]  [   0/1251]  eta: 0:58:41  lr: 0.000862  min_lr: 0.000862  loss: 2.3210 (2.3210)  weight_decay: 0.0500 (0.0500)  time: 2.8146  data: 2.5746  max mem: 9147
Epoch: [214]  [ 200/1251]  eta: 0:03:53  lr: 0.000859  min_lr: 0.000859  loss: 2.6575 (2.9038)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1239 (1.1356)  time: 0.2060  data: 0.0007  max mem: 9147
Epoch: [214]  [ 400/1251]  eta: 0:03:05  lr: 0.000856  min_lr: 0.000856  loss: 2.2930 (2.9181)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1413 (1.1373)  time: 0.2010  data: 0.0008  max mem: 9147
Epoch: [214]  [ 600/1251]  eta: 0:02:20  lr: 0.000853  min_lr: 0.000853  loss: 2.3516 (2.8917)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0469 (1.1130)  time: 0.1949  data: 0.0019  max mem: 9147
Epoch: [214]  [ 800/1251]  eta: 0:01:34  lr: 0.000850  min_lr: 0.000850  loss: 2.4550 (2.8873)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0267 (1.1044)  time: 0.1860  data: 0.0005  max mem: 9147
Epoch: [214]  [1000/1251]  eta: 0:00:51  lr: 0.000847  min_lr: 0.000847  loss: 3.6159 (2.8907)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9683 (1.0918)  time: 0.1997  data: 0.0006  max mem: 9147
Epoch: [214]  [1200/1251]  eta: 0:00:10  lr: 0.000844  min_lr: 0.000844  loss: 2.3734 (2.8922)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2640 (1.1226)  time: 0.2001  data: 0.0006  max mem: 9147
Epoch: [214]  [1250/1251]  eta: 0:00:00  lr: 0.000844  min_lr: 0.000844  loss: 2.2536 (2.8973)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1976 (1.1265)  time: 0.1431  data: 0.0010  max mem: 9147
Epoch: [214] Total time: 0:04:21 (0.2087 s / it)
Averaged stats: lr: 0.000844  min_lr: 0.000844  loss: 2.2536 (2.9250)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1976 (1.1265)
Test:  [ 0/25]  eta: 0:01:20  loss: 0.7022 (0.7022)  acc1: 86.4000 (86.4000)  acc5: 98.0000 (98.0000)  time: 3.2050  data: 3.1196  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 0.8933 (0.9043)  acc1: 81.2000 (80.7273)  acc5: 96.4000 (96.1091)  time: 0.5416  data: 0.4633  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1580 (1.1041)  acc1: 74.0000 (76.9333)  acc5: 92.4000 (93.6762)  time: 0.2723  data: 0.1955  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2616 (1.1216)  acc1: 74.0000 (76.4800)  acc5: 92.4000 (93.5520)  time: 0.2314  data: 0.1561  max mem: 9147
Test: Total time: 0:00:09 (0.3977 s / it)
* Acc@1 77.018 Acc@5 93.476 loss 1.110
Accuracy of the model on the 50000 test images: 77.0%
Max accuracy: 77.02%
Epoch: [215]  [   0/1251]  eta: 0:58:15  lr: 0.000843  min_lr: 0.000843  loss: 2.0958 (2.0958)  weight_decay: 0.0500 (0.0500)  time: 2.7939  data: 2.4382  max mem: 9147
Epoch: [215]  [ 200/1251]  eta: 0:03:58  lr: 0.000841  min_lr: 0.000841  loss: 2.4232 (2.9554)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0681 (1.1275)  time: 0.2149  data: 0.0006  max mem: 9147
Epoch: [215]  [ 400/1251]  eta: 0:03:07  lr: 0.000838  min_lr: 0.000838  loss: 3.5540 (2.9451)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0754 (1.0738)  time: 0.2001  data: 0.0010  max mem: 9147
Epoch: [215]  [ 600/1251]  eta: 0:02:18  lr: 0.000835  min_lr: 0.000835  loss: 2.3243 (2.9177)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1390 (1.1066)  time: 0.1998  data: 0.0010  max mem: 9147
Epoch: [215]  [ 800/1251]  eta: 0:01:35  lr: 0.000832  min_lr: 0.000832  loss: 2.5099 (2.9154)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0656 (1.0938)  time: 0.2258  data: 0.0007  max mem: 9147
Epoch: [215]  [1000/1251]  eta: 0:00:52  lr: 0.000829  min_lr: 0.000829  loss: 2.4794 (2.9294)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2403 (1.1226)  time: 0.1950  data: 0.0007  max mem: 9147
Epoch: [215]  [1200/1251]  eta: 0:00:10  lr: 0.000826  min_lr: 0.000826  loss: 2.3919 (2.9280)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0028 (1.1099)  time: 0.1903  data: 0.0007  max mem: 9147
Epoch: [215]  [1250/1251]  eta: 0:00:00  lr: 0.000825  min_lr: 0.000825  loss: 2.5590 (2.9217)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9685 (1.1109)  time: 0.1392  data: 0.0012  max mem: 9147
Epoch: [215] Total time: 0:04:19 (0.2072 s / it)
Averaged stats: lr: 0.000825  min_lr: 0.000825  loss: 2.5590 (2.9187)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9685 (1.1109)
Test:  [ 0/25]  eta: 0:01:51  loss: 0.6847 (0.6847)  acc1: 85.6000 (85.6000)  acc5: 98.0000 (98.0000)  time: 4.4646  data: 4.3821  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9168 (0.8883)  acc1: 81.6000 (81.4182)  acc5: 96.4000 (96.2182)  time: 0.7103  data: 0.6353  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1346 (1.0884)  acc1: 75.6000 (77.2381)  acc5: 92.4000 (93.7143)  time: 0.2543  data: 0.1826  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2079 (1.1031)  acc1: 74.0000 (76.7680)  acc5: 92.0000 (93.6640)  time: 0.2060  data: 0.1357  max mem: 9147
Test: Total time: 0:00:10 (0.4002 s / it)
* Acc@1 76.944 Acc@5 93.644 loss 1.095
Accuracy of the model on the 50000 test images: 76.9%
Max accuracy: 77.02%
Epoch: [216]  [   0/1251]  eta: 1:06:47  lr: 0.000825  min_lr: 0.000825  loss: 3.9815 (3.9815)  weight_decay: 0.0500 (0.0500)  time: 3.2033  data: 2.7348  max mem: 9147
Epoch: [216]  [ 200/1251]  eta: 0:03:37  lr: 0.000822  min_lr: 0.000822  loss: 3.6687 (2.9754)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1144 (1.1258)  time: 0.1957  data: 0.0011  max mem: 9147
Epoch: [216]  [ 400/1251]  eta: 0:02:49  lr: 0.000819  min_lr: 0.000819  loss: 2.4955 (2.9427)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0049 (1.1050)  time: 0.1903  data: 0.0004  max mem: 9147
Epoch: [216]  [ 600/1251]  eta: 0:02:07  lr: 0.000817  min_lr: 0.000817  loss: 2.2765 (2.9152)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2366 (1.1439)  time: 0.1939  data: 0.0012  max mem: 9147
Epoch: [216]  [ 800/1251]  eta: 0:01:28  lr: 0.000814  min_lr: 0.000814  loss: 2.3777 (2.9155)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0215 (1.1219)  time: 0.1900  data: 0.0005  max mem: 9147
Epoch: [216]  [1000/1251]  eta: 0:00:49  lr: 0.000811  min_lr: 0.000811  loss: 2.4656 (2.9103)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1076 (1.1201)  time: 0.2288  data: 0.0006  max mem: 9147
Epoch: [216]  [1200/1251]  eta: 0:00:10  lr: 0.000808  min_lr: 0.000808  loss: 2.3401 (2.8975)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9737 (1.1125)  time: 0.1892  data: 0.0006  max mem: 9147
Epoch: [216]  [1250/1251]  eta: 0:00:00  lr: 0.000807  min_lr: 0.000807  loss: 2.5037 (2.8911)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9686 (1.1099)  time: 0.1385  data: 0.0010  max mem: 9147
Epoch: [216] Total time: 0:04:06 (0.1966 s / it)
Averaged stats: lr: 0.000807  min_lr: 0.000807  loss: 2.5037 (2.9157)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9686 (1.1099)
Test:  [ 0/25]  eta: 0:01:18  loss: 0.7216 (0.7216)  acc1: 86.4000 (86.4000)  acc5: 97.6000 (97.6000)  time: 3.1542  data: 3.0738  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 0.9017 (0.8983)  acc1: 81.6000 (81.3455)  acc5: 96.4000 (95.9636)  time: 0.5644  data: 0.4875  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1670 (1.0851)  acc1: 75.6000 (77.2952)  acc5: 92.8000 (93.7143)  time: 0.2674  data: 0.1934  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2078 (1.0996)  acc1: 73.6000 (76.7520)  acc5: 92.4000 (93.6800)  time: 0.2273  data: 0.1557  max mem: 9147
Test: Total time: 0:00:10 (0.4025 s / it)
* Acc@1 76.952 Acc@5 93.558 loss 1.095
Accuracy of the model on the 50000 test images: 77.0%
Max accuracy: 77.02%
Epoch: [217]  [   0/1251]  eta: 1:05:55  lr: 0.000807  min_lr: 0.000807  loss: 2.1875 (2.1875)  weight_decay: 0.0500 (0.0500)  time: 3.1620  data: 2.1089  max mem: 9147
Epoch: [217]  [ 200/1251]  eta: 0:03:58  lr: 0.000804  min_lr: 0.000804  loss: 2.4438 (2.9041)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9836 (1.1417)  time: 0.2159  data: 0.0007  max mem: 9147
Epoch: [217]  [ 400/1251]  eta: 0:02:59  lr: 0.000801  min_lr: 0.000801  loss: 2.2133 (2.9049)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0290 (1.1381)  time: 0.1867  data: 0.0011  max mem: 9147
Epoch: [217]  [ 600/1251]  eta: 0:02:14  lr: 0.000799  min_lr: 0.000799  loss: 3.2856 (2.9264)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1881 (1.1280)  time: 0.2040  data: 0.0008  max mem: 9147
Epoch: [217]  [ 800/1251]  eta: 0:01:33  lr: 0.000796  min_lr: 0.000796  loss: 2.3411 (2.9380)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1249 (1.1269)  time: 0.2238  data: 0.0007  max mem: 9147
Epoch: [217]  [1000/1251]  eta: 0:00:52  lr: 0.000793  min_lr: 0.000793  loss: 2.5131 (2.9374)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0351 (1.1185)  time: 0.1948  data: 0.0005  max mem: 9147
Epoch: [217]  [1200/1251]  eta: 0:00:10  lr: 0.000790  min_lr: 0.000790  loss: 2.7546 (2.9241)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0003 (1.1151)  time: 0.2106  data: 0.0006  max mem: 9147
Epoch: [217]  [1250/1251]  eta: 0:00:00  lr: 0.000789  min_lr: 0.000789  loss: 2.4392 (2.9234)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0279 (1.1133)  time: 0.1381  data: 0.0011  max mem: 9147
Epoch: [217] Total time: 0:04:20 (0.2081 s / it)
Averaged stats: lr: 0.000789  min_lr: 0.000789  loss: 2.4392 (2.9194)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0279 (1.1133)
Test:  [ 0/25]  eta: 0:01:21  loss: 0.7159 (0.7159)  acc1: 86.4000 (86.4000)  acc5: 98.8000 (98.8000)  time: 3.2666  data: 3.1863  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 0.9068 (0.9114)  acc1: 82.4000 (81.3818)  acc5: 96.4000 (96.0000)  time: 0.5718  data: 0.4981  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1351 (1.0845)  acc1: 74.8000 (77.3714)  acc5: 92.4000 (93.4286)  time: 0.2777  data: 0.2039  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1591 (1.0959)  acc1: 74.8000 (76.9120)  acc5: 92.0000 (93.4080)  time: 0.2078  data: 0.1359  max mem: 9147
Test: Total time: 0:00:09 (0.3867 s / it)
* Acc@1 77.004 Acc@5 93.530 loss 1.092
Accuracy of the model on the 50000 test images: 77.0%
Max accuracy: 77.02%
Epoch: [218]  [   0/1251]  eta: 0:58:45  lr: 0.000789  min_lr: 0.000789  loss: 4.0005 (4.0005)  weight_decay: 0.0500 (0.0500)  time: 2.8182  data: 1.9500  max mem: 9147
Epoch: [218]  [ 200/1251]  eta: 0:03:53  lr: 0.000786  min_lr: 0.000786  loss: 2.4373 (2.8420)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1446 (1.1202)  time: 0.2008  data: 0.0006  max mem: 9147
Epoch: [218]  [ 400/1251]  eta: 0:03:06  lr: 0.000784  min_lr: 0.000784  loss: 2.3128 (2.8745)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1707 (1.1573)  time: 0.2238  data: 0.0007  max mem: 9147
Epoch: [218]  [ 600/1251]  eta: 0:02:21  lr: 0.000781  min_lr: 0.000781  loss: 2.4478 (2.8861)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0219 (1.1292)  time: 0.2206  data: 0.0007  max mem: 9147
Epoch: [218]  [ 800/1251]  eta: 0:01:37  lr: 0.000778  min_lr: 0.000778  loss: 2.6851 (2.9037)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0108 (1.1128)  time: 0.1942  data: 0.0006  max mem: 9147
Epoch: [218]  [1000/1251]  eta: 0:00:53  lr: 0.000775  min_lr: 0.000775  loss: 2.2886 (2.8916)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1590 (1.1221)  time: 0.2251  data: 0.0006  max mem: 9147
Epoch: [218]  [1200/1251]  eta: 0:00:10  lr: 0.000772  min_lr: 0.000772  loss: 2.3679 (2.8991)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1524 (1.1295)  time: 0.1845  data: 0.0018  max mem: 9147
Epoch: [218]  [1250/1251]  eta: 0:00:00  lr: 0.000772  min_lr: 0.000772  loss: 3.6520 (2.9094)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0997 (1.1275)  time: 0.1381  data: 0.0009  max mem: 9147
Epoch: [218] Total time: 0:04:21 (0.2089 s / it)
Averaged stats: lr: 0.000772  min_lr: 0.000772  loss: 3.6520 (2.9036)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0997 (1.1275)
Test:  [ 0/25]  eta: 0:02:24  loss: 0.7773 (0.7773)  acc1: 86.0000 (86.0000)  acc5: 99.2000 (99.2000)  time: 5.7735  data: 5.6779  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9654 (0.9871)  acc1: 80.8000 (80.9818)  acc5: 96.4000 (96.3636)  time: 0.7447  data: 0.6617  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2367 (1.1692)  acc1: 74.0000 (77.4476)  acc5: 92.4000 (93.9619)  time: 0.1989  data: 0.1234  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.3168 (1.1848)  acc1: 74.0000 (76.8640)  acc5: 92.0000 (93.8400)  time: 0.1929  data: 0.1226  max mem: 9147
Test: Total time: 0:00:10 (0.4043 s / it)
* Acc@1 76.818 Acc@5 93.528 loss 1.186
Accuracy of the model on the 50000 test images: 76.8%
Max accuracy: 77.02%
Epoch: [219]  [   0/1251]  eta: 1:06:21  lr: 0.000771  min_lr: 0.000771  loss: 2.8968 (2.8968)  weight_decay: 0.0500 (0.0500)  time: 3.1829  data: 2.0978  max mem: 9147
Epoch: [219]  [ 200/1251]  eta: 0:03:57  lr: 0.000769  min_lr: 0.000769  loss: 2.4670 (2.9782)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3506 (1.1986)  time: 0.2335  data: 0.0006  max mem: 9147
Epoch: [219]  [ 400/1251]  eta: 0:03:07  lr: 0.000766  min_lr: 0.000766  loss: 2.9790 (2.8830)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1055 (1.1873)  time: 0.2242  data: 0.0007  max mem: 9147
Epoch: [219]  [ 600/1251]  eta: 0:02:21  lr: 0.000763  min_lr: 0.000763  loss: 2.7481 (2.9049)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0814 (1.1394)  time: 0.1937  data: 0.0005  max mem: 9147
Epoch: [219]  [ 800/1251]  eta: 0:01:35  lr: 0.000760  min_lr: 0.000760  loss: 2.3249 (2.8871)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1199 (1.1563)  time: 0.1964  data: 0.0006  max mem: 9147
Epoch: [219]  [1000/1251]  eta: 0:00:52  lr: 0.000757  min_lr: 0.000757  loss: 2.3191 (2.8866)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0947 (1.1389)  time: 0.2243  data: 0.0007  max mem: 9147
Epoch: [219]  [1200/1251]  eta: 0:00:10  lr: 0.000755  min_lr: 0.000755  loss: 2.3532 (2.8757)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1766 (1.1334)  time: 0.2242  data: 0.0006  max mem: 9147
Epoch: [219]  [1250/1251]  eta: 0:00:00  lr: 0.000754  min_lr: 0.000754  loss: 2.3602 (2.8768)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1192 (1.1331)  time: 0.1484  data: 0.0015  max mem: 9147
Epoch: [219] Total time: 0:04:23 (0.2105 s / it)
Averaged stats: lr: 0.000754  min_lr: 0.000754  loss: 2.3602 (2.8942)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1192 (1.1331)
Test:  [ 0/25]  eta: 0:02:24  loss: 0.6631 (0.6631)  acc1: 86.4000 (86.4000)  acc5: 98.4000 (98.4000)  time: 5.7932  data: 5.7129  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8933 (0.8701)  acc1: 81.2000 (81.2000)  acc5: 96.8000 (96.5455)  time: 0.7738  data: 0.6983  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1122 (1.0824)  acc1: 75.2000 (76.9905)  acc5: 93.6000 (93.9810)  time: 0.2066  data: 0.1342  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2348 (1.0960)  acc1: 73.2000 (76.5600)  acc5: 92.4000 (93.8880)  time: 0.2073  data: 0.1364  max mem: 9147
Test: Total time: 0:00:10 (0.4134 s / it)
* Acc@1 76.884 Acc@5 93.682 loss 1.097
Accuracy of the model on the 50000 test images: 76.9%
Max accuracy: 77.02%
Epoch: [220]  [   0/1251]  eta: 1:05:02  lr: 0.000754  min_lr: 0.000754  loss: 2.1823 (2.1823)  weight_decay: 0.0500 (0.0500)  time: 3.1195  data: 2.8884  max mem: 9147
Epoch: [220]  [ 200/1251]  eta: 0:03:57  lr: 0.000751  min_lr: 0.000751  loss: 2.3122 (2.8605)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0201 (1.0949)  time: 0.2200  data: 0.0186  max mem: 9147
Epoch: [220]  [ 400/1251]  eta: 0:03:05  lr: 0.000748  min_lr: 0.000748  loss: 2.1931 (2.8569)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0120 (1.0751)  time: 0.2191  data: 0.0006  max mem: 9147
Epoch: [220]  [ 600/1251]  eta: 0:02:20  lr: 0.000745  min_lr: 0.000745  loss: 2.6091 (2.8611)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0100 (1.1018)  time: 0.2288  data: 0.0153  max mem: 9147
Epoch: [220]  [ 800/1251]  eta: 0:01:35  lr: 0.000743  min_lr: 0.000743  loss: 3.3440 (2.8616)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1066 (1.1146)  time: 0.1909  data: 0.0006  max mem: 9147
Epoch: [220]  [1000/1251]  eta: 0:00:52  lr: 0.000740  min_lr: 0.000740  loss: 2.5581 (2.8701)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0052 (1.1114)  time: 0.1944  data: 0.0006  max mem: 9147
Epoch: [220]  [1200/1251]  eta: 0:00:10  lr: 0.000737  min_lr: 0.000737  loss: 2.5210 (2.8580)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0381 (1.1116)  time: 0.1948  data: 0.0006  max mem: 9147
Epoch: [220]  [1250/1251]  eta: 0:00:00  lr: 0.000736  min_lr: 0.000736  loss: 2.5052 (2.8644)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0381 (1.1122)  time: 0.1393  data: 0.0012  max mem: 9147
Epoch: [220] Total time: 0:04:16 (0.2047 s / it)
Averaged stats: lr: 0.000736  min_lr: 0.000736  loss: 2.5052 (2.8843)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0381 (1.1122)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.6803 (0.6803)  acc1: 86.0000 (86.0000)  acc5: 99.2000 (99.2000)  time: 5.4665  data: 5.3574  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8878 (0.8817)  acc1: 82.4000 (81.9273)  acc5: 96.0000 (96.3636)  time: 0.7272  data: 0.6477  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1550 (1.0681)  acc1: 76.0000 (77.7333)  acc5: 92.8000 (93.8667)  time: 0.2084  data: 0.1325  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2015 (1.0799)  acc1: 74.0000 (77.1520)  acc5: 92.4000 (93.7600)  time: 0.2144  data: 0.1412  max mem: 9147
Test: Total time: 0:00:10 (0.4137 s / it)
* Acc@1 77.106 Acc@5 93.638 loss 1.081
Accuracy of the model on the 50000 test images: 77.1%
Max accuracy: 77.11%
Epoch: [221]  [   0/1251]  eta: 1:09:41  lr: 0.000736  min_lr: 0.000736  loss: 2.2676 (2.2676)  weight_decay: 0.0500 (0.0500)  time: 3.3428  data: 3.1587  max mem: 9147
Epoch: [221]  [ 200/1251]  eta: 0:03:43  lr: 0.000734  min_lr: 0.000734  loss: 2.4032 (2.8125)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2407 (1.1674)  time: 0.2165  data: 0.0008  max mem: 9147
Epoch: [221]  [ 400/1251]  eta: 0:02:57  lr: 0.000731  min_lr: 0.000731  loss: 2.4297 (2.8436)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0888 (1.1578)  time: 0.1953  data: 0.0009  max mem: 9147
Epoch: [221]  [ 600/1251]  eta: 0:02:16  lr: 0.000728  min_lr: 0.000728  loss: 2.2029 (2.8169)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0592 (1.1762)  time: 0.2000  data: 0.0007  max mem: 9147
Epoch: [221]  [ 800/1251]  eta: 0:01:35  lr: 0.000725  min_lr: 0.000725  loss: 2.7849 (2.8387)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1210 (1.1733)  time: 0.2250  data: 0.0007  max mem: 9147
Epoch: [221]  [1000/1251]  eta: 0:00:53  lr: 0.000722  min_lr: 0.000722  loss: 2.9408 (2.8347)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2535 (1.1637)  time: 0.2143  data: 0.0005  max mem: 9147
Epoch: [221]  [1200/1251]  eta: 0:00:10  lr: 0.000720  min_lr: 0.000720  loss: 2.2441 (2.8366)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0679 (1.1683)  time: 0.2126  data: 0.0008  max mem: 9147
Epoch: [221]  [1250/1251]  eta: 0:00:00  lr: 0.000719  min_lr: 0.000719  loss: 3.2225 (2.8433)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0679 (1.1666)  time: 0.1450  data: 0.0011  max mem: 9147
Epoch: [221] Total time: 0:04:25 (0.2124 s / it)
Averaged stats: lr: 0.000719  min_lr: 0.000719  loss: 3.2225 (2.8815)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0679 (1.1666)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.7230 (0.7230)  acc1: 88.0000 (88.0000)  acc5: 98.4000 (98.4000)  time: 5.6009  data: 5.5206  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9295 (0.9301)  acc1: 82.4000 (81.7455)  acc5: 96.4000 (96.4000)  time: 0.7614  data: 0.6749  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1554 (1.1324)  acc1: 75.2000 (77.4667)  acc5: 93.6000 (93.7905)  time: 0.2135  data: 0.1348  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2802 (1.1431)  acc1: 75.2000 (77.0560)  acc5: 92.8000 (93.7760)  time: 0.2088  data: 0.1347  max mem: 9147
Test: Total time: 0:00:10 (0.4095 s / it)
* Acc@1 77.098 Acc@5 93.698 loss 1.136
Accuracy of the model on the 50000 test images: 77.1%
Max accuracy: 77.11%
Epoch: [222]  [   0/1251]  eta: 1:05:42  lr: 0.000719  min_lr: 0.000719  loss: 2.2885 (2.2885)  weight_decay: 0.0500 (0.0500)  time: 3.1518  data: 1.5942  max mem: 9147
Epoch: [222]  [ 200/1251]  eta: 0:03:56  lr: 0.000716  min_lr: 0.000716  loss: 2.3417 (2.9594)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0491 (1.1327)  time: 0.2004  data: 0.0006  max mem: 9147
Epoch: [222]  [ 400/1251]  eta: 0:03:03  lr: 0.000714  min_lr: 0.000714  loss: 2.5401 (2.9457)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9910 (1.1208)  time: 0.1993  data: 0.0012  max mem: 9147
Epoch: [222]  [ 600/1251]  eta: 0:02:16  lr: 0.000711  min_lr: 0.000711  loss: 2.3196 (2.9319)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0478 (1.1140)  time: 0.2007  data: 0.0007  max mem: 9147
Epoch: [222]  [ 800/1251]  eta: 0:01:35  lr: 0.000708  min_lr: 0.000708  loss: 3.2225 (2.9096)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0333 (1.1412)  time: 0.2009  data: 0.0006  max mem: 9147
Epoch: [222]  [1000/1251]  eta: 0:00:53  lr: 0.000705  min_lr: 0.000705  loss: 2.7804 (2.8991)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0950 (1.1581)  time: 0.1997  data: 0.0006  max mem: 9147
Epoch: [222]  [1200/1251]  eta: 0:00:10  lr: 0.000703  min_lr: 0.000703  loss: 2.8966 (2.8901)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9868 (1.1556)  time: 0.2013  data: 0.0007  max mem: 9147
Epoch: [222]  [1250/1251]  eta: 0:00:00  lr: 0.000702  min_lr: 0.000702  loss: 3.4554 (2.8937)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0154 (1.1534)  time: 0.1397  data: 0.0012  max mem: 9147
Epoch: [222] Total time: 0:04:24 (0.2115 s / it)
Averaged stats: lr: 0.000702  min_lr: 0.000702  loss: 3.4554 (2.8973)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0154 (1.1534)
Test:  [ 0/25]  eta: 0:02:23  loss: 0.7196 (0.7196)  acc1: 86.4000 (86.4000)  acc5: 98.0000 (98.0000)  time: 5.7531  data: 5.6727  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9211 (0.9192)  acc1: 82.8000 (81.6000)  acc5: 96.4000 (96.1091)  time: 0.7569  data: 0.6656  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1620 (1.1211)  acc1: 74.4000 (77.4095)  acc5: 92.8000 (93.8286)  time: 0.2022  data: 0.1214  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2855 (1.1388)  acc1: 73.2000 (76.9760)  acc5: 92.0000 (93.7760)  time: 0.1981  data: 0.1213  max mem: 9147
Test: Total time: 0:00:10 (0.4063 s / it)
* Acc@1 77.148 Acc@5 93.702 loss 1.133
Accuracy of the model on the 50000 test images: 77.1%
Max accuracy: 77.15%
Epoch: [223]  [   0/1251]  eta: 1:06:04  lr: 0.000702  min_lr: 0.000702  loss: 3.9872 (3.9872)  weight_decay: 0.0500 (0.0500)  time: 3.1694  data: 2.9845  max mem: 9147
Epoch: [223]  [ 200/1251]  eta: 0:03:41  lr: 0.000699  min_lr: 0.000699  loss: 3.1135 (2.9618)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0169 (1.1477)  time: 0.2096  data: 0.0006  max mem: 9147
Epoch: [223]  [ 400/1251]  eta: 0:03:02  lr: 0.000696  min_lr: 0.000696  loss: 2.2133 (2.9120)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1703 (1.1753)  time: 0.2225  data: 0.0006  max mem: 9147
Epoch: [223]  [ 600/1251]  eta: 0:02:20  lr: 0.000694  min_lr: 0.000694  loss: 2.5650 (2.9047)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1110 (1.1763)  time: 0.2162  data: 0.0006  max mem: 9147
Epoch: [223]  [ 800/1251]  eta: 0:01:36  lr: 0.000691  min_lr: 0.000691  loss: 2.4208 (2.8956)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0221 (1.1695)  time: 0.2111  data: 0.0006  max mem: 9147
Epoch: [223]  [1000/1251]  eta: 0:00:53  lr: 0.000688  min_lr: 0.000688  loss: 2.7499 (2.8967)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0835 (inf)  time: 0.2007  data: 0.0006  max mem: 9147
Epoch: [223]  [1200/1251]  eta: 0:00:10  lr: 0.000686  min_lr: 0.000686  loss: 2.3490 (2.8828)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0167 (inf)  time: 0.2150  data: 0.0008  max mem: 9147
Epoch: [223]  [1250/1251]  eta: 0:00:00  lr: 0.000685  min_lr: 0.000685  loss: 2.9593 (2.8914)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0481 (inf)  time: 0.1498  data: 0.0014  max mem: 9147
Epoch: [223] Total time: 0:04:29 (0.2154 s / it)
Averaged stats: lr: 0.000685  min_lr: 0.000685  loss: 2.9593 (2.8839)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0481 (inf)
Test:  [ 0/25]  eta: 0:02:11  loss: 0.7889 (0.7889)  acc1: 87.2000 (87.2000)  acc5: 98.4000 (98.4000)  time: 5.2677  data: 5.1837  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9665 (0.9690)  acc1: 82.0000 (81.8182)  acc5: 96.8000 (96.0364)  time: 0.7326  data: 0.6496  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1882 (1.1448)  acc1: 75.6000 (77.6191)  acc5: 92.4000 (93.7333)  time: 0.2285  data: 0.1509  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2605 (1.1535)  acc1: 75.2000 (77.2320)  acc5: 92.4000 (93.8080)  time: 0.2124  data: 0.1387  max mem: 9147
Test: Total time: 0:00:10 (0.4076 s / it)
* Acc@1 77.300 Acc@5 93.766 loss 1.153
Accuracy of the model on the 50000 test images: 77.3%
Max accuracy: 77.30%
Epoch: [224]  [   0/1251]  eta: 1:09:15  lr: 0.000685  min_lr: 0.000685  loss: 2.7407 (2.7407)  weight_decay: 0.0500 (0.0500)  time: 3.3221  data: 3.1507  max mem: 9147
Epoch: [224]  [ 200/1251]  eta: 0:03:33  lr: 0.000682  min_lr: 0.000682  loss: 2.2927 (2.7919)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1167 (1.1329)  time: 0.1951  data: 0.0007  max mem: 9147
Epoch: [224]  [ 400/1251]  eta: 0:02:48  lr: 0.000680  min_lr: 0.000680  loss: 2.2767 (2.8225)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0943 (1.1718)  time: 0.1958  data: 0.0005  max mem: 9147
Epoch: [224]  [ 600/1251]  eta: 0:02:08  lr: 0.000677  min_lr: 0.000677  loss: 2.5191 (2.8477)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1122 (1.1560)  time: 0.2004  data: 0.0007  max mem: 9147
Epoch: [224]  [ 800/1251]  eta: 0:01:31  lr: 0.000674  min_lr: 0.000674  loss: 2.4798 (2.8623)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9681 (1.1659)  time: 0.2192  data: 0.0011  max mem: 9147
Epoch: [224]  [1000/1251]  eta: 0:00:51  lr: 0.000671  min_lr: 0.000671  loss: 2.5626 (2.8576)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1442 (1.1628)  time: 0.2204  data: 0.0007  max mem: 9147
Epoch: [224]  [1200/1251]  eta: 0:00:10  lr: 0.000669  min_lr: 0.000669  loss: 2.2340 (2.8468)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1149 (1.1535)  time: 0.1912  data: 0.0005  max mem: 9147
Epoch: [224]  [1250/1251]  eta: 0:00:00  lr: 0.000668  min_lr: 0.000668  loss: 2.4758 (2.8505)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0010 (1.1532)  time: 0.1383  data: 0.0009  max mem: 9147
Epoch: [224] Total time: 0:04:14 (0.2033 s / it)
Averaged stats: lr: 0.000668  min_lr: 0.000668  loss: 2.4758 (2.8966)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0010 (1.1532)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.7436 (0.7436)  acc1: 86.4000 (86.4000)  acc5: 99.2000 (99.2000)  time: 5.6501  data: 5.5696  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9358 (0.9301)  acc1: 82.8000 (81.2727)  acc5: 96.4000 (96.4000)  time: 0.6968  data: 0.6204  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1566 (1.1305)  acc1: 75.2000 (77.3714)  acc5: 93.2000 (93.7524)  time: 0.1921  data: 0.1180  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2853 (1.1437)  acc1: 74.4000 (77.0080)  acc5: 92.0000 (93.7760)  time: 0.2035  data: 0.1307  max mem: 9147
Test: Total time: 0:00:10 (0.4046 s / it)
* Acc@1 77.134 Acc@5 93.726 loss 1.134
Accuracy of the model on the 50000 test images: 77.1%
Max accuracy: 77.30%
Epoch: [225]  [   0/1251]  eta: 1:02:12  lr: 0.000668  min_lr: 0.000668  loss: 2.1009 (2.1009)  weight_decay: 0.0500 (0.0500)  time: 2.9834  data: 2.7242  max mem: 9147
Epoch: [225]  [ 200/1251]  eta: 0:03:39  lr: 0.000665  min_lr: 0.000665  loss: 3.1549 (2.9121)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1103 (1.1122)  time: 0.1802  data: 0.0005  max mem: 9147
Epoch: [225]  [ 400/1251]  eta: 0:02:49  lr: 0.000663  min_lr: 0.000663  loss: 3.6260 (2.9167)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0368 (1.1093)  time: 0.1914  data: 0.0006  max mem: 9147
Epoch: [225]  [ 600/1251]  eta: 0:02:09  lr: 0.000660  min_lr: 0.000660  loss: 2.3609 (2.8849)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1911 (1.1218)  time: 0.2048  data: 0.0006  max mem: 9147
Epoch: [225]  [ 800/1251]  eta: 0:01:29  lr: 0.000657  min_lr: 0.000657  loss: 2.4180 (2.8902)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0746 (1.1112)  time: 0.1985  data: 0.0005  max mem: 9147
Epoch: [225]  [1000/1251]  eta: 0:00:49  lr: 0.000655  min_lr: 0.000655  loss: 2.3641 (2.9115)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1640 (1.1219)  time: 0.1893  data: 0.0011  max mem: 9147
Epoch: [225]  [1200/1251]  eta: 0:00:10  lr: 0.000652  min_lr: 0.000652  loss: 2.8855 (2.9130)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0781 (1.1280)  time: 0.2090  data: 0.0006  max mem: 9147
Epoch: [225]  [1250/1251]  eta: 0:00:00  lr: 0.000652  min_lr: 0.000652  loss: 2.3376 (2.9112)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1756 (1.1301)  time: 0.1696  data: 0.0016  max mem: 9147
Epoch: [225] Total time: 0:04:07 (0.1981 s / it)
Averaged stats: lr: 0.000652  min_lr: 0.000652  loss: 2.3376 (2.8767)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1756 (1.1301)
Test:  [ 0/25]  eta: 0:02:14  loss: 0.7185 (0.7185)  acc1: 86.0000 (86.0000)  acc5: 98.0000 (98.0000)  time: 5.3741  data: 5.2938  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8722 (0.8743)  acc1: 82.0000 (81.4182)  acc5: 96.4000 (96.2182)  time: 0.7047  data: 0.6100  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0951 (1.0548)  acc1: 76.0000 (77.6381)  acc5: 93.6000 (94.1333)  time: 0.1975  data: 0.1117  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1659 (1.0695)  acc1: 76.0000 (77.2160)  acc5: 92.8000 (94.0160)  time: 0.2252  data: 0.1453  max mem: 9147
Test: Total time: 0:00:10 (0.4141 s / it)
* Acc@1 77.316 Acc@5 93.880 loss 1.066
Accuracy of the model on the 50000 test images: 77.3%
Max accuracy: 77.32%
Epoch: [226]  [   0/1251]  eta: 1:00:08  lr: 0.000651  min_lr: 0.000651  loss: 2.3540 (2.3540)  weight_decay: 0.0500 (0.0500)  time: 2.8846  data: 2.6941  max mem: 9147
Epoch: [226]  [ 200/1251]  eta: 0:03:48  lr: 0.000649  min_lr: 0.000649  loss: 2.6811 (2.9162)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0244 (1.0680)  time: 0.1941  data: 0.0005  max mem: 9147
Epoch: [226]  [ 400/1251]  eta: 0:02:53  lr: 0.000646  min_lr: 0.000646  loss: 2.3893 (2.8611)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2168 (1.1268)  time: 0.1996  data: 0.0006  max mem: 9147
Epoch: [226]  [ 600/1251]  eta: 0:02:11  lr: 0.000644  min_lr: 0.000644  loss: 2.2218 (2.8812)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1593 (inf)  time: 0.2202  data: 0.0008  max mem: 9147
Epoch: [226]  [ 800/1251]  eta: 0:01:32  lr: 0.000641  min_lr: 0.000641  loss: 2.2335 (2.8746)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2826 (inf)  time: 0.2058  data: 0.0007  max mem: 9147
Epoch: [226]  [1000/1251]  eta: 0:00:51  lr: 0.000638  min_lr: 0.000638  loss: 2.3039 (2.8730)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0478 (inf)  time: 0.2228  data: 0.0007  max mem: 9147
Epoch: [226]  [1200/1251]  eta: 0:00:10  lr: 0.000636  min_lr: 0.000636  loss: 2.3937 (2.8591)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0750 (inf)  time: 0.2439  data: 0.0006  max mem: 9147
Epoch: [226]  [1250/1251]  eta: 0:00:00  lr: 0.000635  min_lr: 0.000635  loss: 2.4916 (2.8593)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1075 (inf)  time: 0.1462  data: 0.0014  max mem: 9147
Epoch: [226] Total time: 0:04:19 (0.2074 s / it)
Averaged stats: lr: 0.000635  min_lr: 0.000635  loss: 2.4916 (2.8774)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1075 (inf)
Test:  [ 0/25]  eta: 0:02:25  loss: 0.7028 (0.7028)  acc1: 87.2000 (87.2000)  acc5: 98.8000 (98.8000)  time: 5.8394  data: 5.7591  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9165 (0.9028)  acc1: 82.0000 (81.5636)  acc5: 96.4000 (96.2909)  time: 0.7747  data: 0.6894  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1440 (1.0984)  acc1: 74.4000 (77.3143)  acc5: 93.2000 (94.0381)  time: 0.2053  data: 0.1271  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2462 (1.1084)  acc1: 73.6000 (77.0560)  acc5: 92.8000 (93.9680)  time: 0.2000  data: 0.1270  max mem: 9147
Test: Total time: 0:00:10 (0.4121 s / it)
* Acc@1 77.468 Acc@5 93.834 loss 1.101
Accuracy of the model on the 50000 test images: 77.5%
Max accuracy: 77.47%
Epoch: [227]  [   0/1251]  eta: 1:09:56  lr: 0.000635  min_lr: 0.000635  loss: 2.2177 (2.2177)  weight_decay: 0.0500 (0.0500)  time: 3.3547  data: 3.1588  max mem: 9147
Epoch: [227]  [ 200/1251]  eta: 0:03:35  lr: 0.000632  min_lr: 0.000632  loss: 3.0329 (2.8917)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2122 (1.1656)  time: 0.1857  data: 0.0016  max mem: 9147
Epoch: [227]  [ 400/1251]  eta: 0:02:48  lr: 0.000630  min_lr: 0.000630  loss: 2.7279 (2.9032)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1787 (1.1718)  time: 0.2058  data: 0.0008  max mem: 9147
Epoch: [227]  [ 600/1251]  eta: 0:02:11  lr: 0.000627  min_lr: 0.000627  loss: 2.3554 (2.8899)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1398 (1.1853)  time: 0.1810  data: 0.0005  max mem: 9147
Epoch: [227]  [ 800/1251]  eta: 0:01:32  lr: 0.000625  min_lr: 0.000625  loss: 2.3806 (2.8790)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1057 (1.1804)  time: 0.2294  data: 0.0100  max mem: 9147
Epoch: [227]  [1000/1251]  eta: 0:00:51  lr: 0.000622  min_lr: 0.000622  loss: 2.5568 (2.8587)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0441 (1.1769)  time: 0.1950  data: 0.0006  max mem: 9147
Epoch: [227]  [1200/1251]  eta: 0:00:10  lr: 0.000619  min_lr: 0.000619  loss: 2.2930 (2.8520)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0921 (1.1644)  time: 0.1931  data: 0.0006  max mem: 9147
Epoch: [227]  [1250/1251]  eta: 0:00:00  lr: 0.000619  min_lr: 0.000619  loss: 2.7849 (2.8563)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2430 (1.1688)  time: 0.1399  data: 0.0010  max mem: 9147
Epoch: [227] Total time: 0:04:14 (0.2037 s / it)
Averaged stats: lr: 0.000619  min_lr: 0.000619  loss: 2.7849 (2.8786)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2430 (1.1688)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.7617 (0.7617)  acc1: 88.0000 (88.0000)  acc5: 98.4000 (98.4000)  time: 5.5643  data: 5.4776  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9308 (0.9424)  acc1: 81.6000 (82.0000)  acc5: 96.4000 (96.2546)  time: 0.7009  data: 0.6170  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1642 (1.1253)  acc1: 76.8000 (78.1143)  acc5: 93.2000 (94.0571)  time: 0.1878  data: 0.1114  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2103 (1.1448)  acc1: 76.8000 (77.4400)  acc5: 92.8000 (93.9360)  time: 0.1845  data: 0.1123  max mem: 9147
Test: Total time: 0:00:09 (0.3938 s / it)
* Acc@1 77.304 Acc@5 93.800 loss 1.142
Accuracy of the model on the 50000 test images: 77.3%
Max accuracy: 77.47%
Epoch: [228]  [   0/1251]  eta: 1:01:58  lr: 0.000619  min_lr: 0.000619  loss: 2.1424 (2.1424)  weight_decay: 0.0500 (0.0500)  time: 2.9723  data: 2.7159  max mem: 9147
Epoch: [228]  [ 200/1251]  eta: 0:03:55  lr: 0.000616  min_lr: 0.000616  loss: 2.4698 (2.7398)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1512 (1.2564)  time: 0.2045  data: 0.0007  max mem: 9147
Epoch: [228]  [ 400/1251]  eta: 0:03:05  lr: 0.000614  min_lr: 0.000614  loss: 2.2590 (2.8097)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0336 (1.2231)  time: 0.2149  data: 0.0007  max mem: 9147
Epoch: [228]  [ 600/1251]  eta: 0:02:21  lr: 0.000611  min_lr: 0.000611  loss: 3.2623 (2.8750)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1350 (1.2502)  time: 0.1953  data: 0.0006  max mem: 9147
Epoch: [228]  [ 800/1251]  eta: 0:01:37  lr: 0.000608  min_lr: 0.000608  loss: 2.2527 (2.8807)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1953 (1.2259)  time: 0.2108  data: 0.0006  max mem: 9147
Epoch: [228]  [1000/1251]  eta: 0:00:54  lr: 0.000606  min_lr: 0.000606  loss: 2.5241 (2.8853)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0544 (1.2258)  time: 0.2007  data: 0.0007  max mem: 9147
Epoch: [228]  [1200/1251]  eta: 0:00:11  lr: 0.000603  min_lr: 0.000603  loss: 2.8441 (2.8934)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1018 (1.2092)  time: 0.2206  data: 0.0019  max mem: 9147
Epoch: [228]  [1250/1251]  eta: 0:00:00  lr: 0.000603  min_lr: 0.000603  loss: 2.5217 (2.8968)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1018 (1.2075)  time: 0.1468  data: 0.0014  max mem: 9147
Epoch: [228] Total time: 0:04:30 (0.2165 s / it)
Averaged stats: lr: 0.000603  min_lr: 0.000603  loss: 2.5217 (2.8675)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1018 (1.2075)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.7287 (0.7287)  acc1: 87.6000 (87.6000)  acc5: 98.0000 (98.0000)  time: 5.5273  data: 5.4388  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8776 (0.8948)  acc1: 81.6000 (81.2000)  acc5: 96.4000 (96.4000)  time: 0.7108  data: 0.6166  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1568 (1.0791)  acc1: 74.4000 (77.5619)  acc5: 93.6000 (94.0191)  time: 0.2044  data: 0.1221  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1874 (1.0905)  acc1: 74.4000 (77.2320)  acc5: 92.8000 (93.9520)  time: 0.2036  data: 0.1271  max mem: 9147
Test: Total time: 0:00:10 (0.4030 s / it)
* Acc@1 77.264 Acc@5 93.858 loss 1.087
Accuracy of the model on the 50000 test images: 77.3%
Max accuracy: 77.47%
Epoch: [229]  [   0/1251]  eta: 1:04:22  lr: 0.000603  min_lr: 0.000603  loss: 2.1119 (2.1119)  weight_decay: 0.0500 (0.0500)  time: 3.0877  data: 2.2118  max mem: 9147
Epoch: [229]  [ 200/1251]  eta: 0:03:48  lr: 0.000600  min_lr: 0.000600  loss: 2.2446 (2.8847)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2050 (1.1627)  time: 0.1952  data: 0.0012  max mem: 9147
Epoch: [229]  [ 400/1251]  eta: 0:02:53  lr: 0.000597  min_lr: 0.000597  loss: 2.5300 (2.9046)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0562 (1.1706)  time: 0.1992  data: 0.0005  max mem: 9147
Epoch: [229]  [ 600/1251]  eta: 0:02:14  lr: 0.000595  min_lr: 0.000595  loss: 2.4814 (2.9048)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1429 (1.1638)  time: 0.2011  data: 0.0006  max mem: 9147
Epoch: [229]  [ 800/1251]  eta: 0:01:33  lr: 0.000592  min_lr: 0.000592  loss: 3.2732 (2.9018)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0371 (1.1598)  time: 0.1776  data: 0.0005  max mem: 9147
Epoch: [229]  [1000/1251]  eta: 0:00:51  lr: 0.000590  min_lr: 0.000590  loss: 2.1490 (2.8974)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0681 (1.1476)  time: 0.2434  data: 0.0008  max mem: 9147
Epoch: [229]  [1200/1251]  eta: 0:00:10  lr: 0.000587  min_lr: 0.000587  loss: 2.2059 (2.8908)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0282 (1.1460)  time: 0.2080  data: 0.0006  max mem: 9147
Epoch: [229]  [1250/1251]  eta: 0:00:00  lr: 0.000587  min_lr: 0.000587  loss: 2.3599 (2.8904)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1063 (1.1482)  time: 0.1507  data: 0.0014  max mem: 9147
Epoch: [229] Total time: 0:04:19 (0.2073 s / it)
Averaged stats: lr: 0.000587  min_lr: 0.000587  loss: 2.3599 (2.8759)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1063 (1.1482)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.6870 (0.6870)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.5974  data: 5.5170  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9054 (0.9111)  acc1: 83.2000 (82.4000)  acc5: 96.0000 (96.0364)  time: 0.7118  data: 0.6365  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0809 (1.0793)  acc1: 76.0000 (78.2667)  acc5: 93.6000 (93.9619)  time: 0.1947  data: 0.1227  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2170 (1.0864)  acc1: 76.0000 (77.8400)  acc5: 92.4000 (93.9040)  time: 0.1943  data: 0.1240  max mem: 9147
Test: Total time: 0:00:09 (0.3982 s / it)
* Acc@1 77.482 Acc@5 93.850 loss 1.091
Accuracy of the model on the 50000 test images: 77.5%
Max accuracy: 77.48%
Epoch: [230]  [   0/1251]  eta: 1:02:49  lr: 0.000587  min_lr: 0.000587  loss: 2.5261 (2.5261)  weight_decay: 0.0500 (0.0500)  time: 3.0133  data: 2.7704  max mem: 9147
Epoch: [230]  [ 200/1251]  eta: 0:03:41  lr: 0.000584  min_lr: 0.000584  loss: 2.2897 (2.9130)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4853 (1.2996)  time: 0.1997  data: 0.0006  max mem: 9147
Epoch: [230]  [ 400/1251]  eta: 0:02:50  lr: 0.000582  min_lr: 0.000582  loss: 2.7455 (2.8709)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1445 (1.2505)  time: 0.1898  data: 0.0011  max mem: 9147
Epoch: [230]  [ 600/1251]  eta: 0:02:12  lr: 0.000579  min_lr: 0.000579  loss: 2.3062 (2.9019)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0546 (1.2069)  time: 0.2163  data: 0.0006  max mem: 9147
Epoch: [230]  [ 800/1251]  eta: 0:01:33  lr: 0.000577  min_lr: 0.000577  loss: 2.4783 (2.9046)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3014 (1.2019)  time: 0.2149  data: 0.0006  max mem: 9147
Epoch: [230]  [1000/1251]  eta: 0:00:51  lr: 0.000574  min_lr: 0.000574  loss: 2.3070 (2.8888)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2978 (1.2148)  time: 0.1948  data: 0.0012  max mem: 9147
Epoch: [230]  [1200/1251]  eta: 0:00:10  lr: 0.000571  min_lr: 0.000571  loss: 2.4925 (2.8927)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1052 (1.2053)  time: 0.2001  data: 0.0007  max mem: 9147
Epoch: [230]  [1250/1251]  eta: 0:00:00  lr: 0.000571  min_lr: 0.000571  loss: 2.9644 (2.8988)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1377 (1.2012)  time: 0.1568  data: 0.0015  max mem: 9147
Epoch: [230] Total time: 0:04:18 (0.2064 s / it)
Averaged stats: lr: 0.000571  min_lr: 0.000571  loss: 2.9644 (2.8726)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1377 (1.2012)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.7906 (0.7906)  acc1: 87.6000 (87.6000)  acc5: 98.0000 (98.0000)  time: 5.5671  data: 5.4358  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.9065 (0.9610)  acc1: 84.4000 (82.0364)  acc5: 96.8000 (96.2545)  time: 0.6970  data: 0.5990  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.2049 (1.1471)  acc1: 74.4000 (77.7333)  acc5: 93.6000 (93.8476)  time: 0.1980  data: 0.1161  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2641 (1.1572)  acc1: 74.4000 (77.5040)  acc5: 92.8000 (93.9360)  time: 0.1928  data: 0.1168  max mem: 9147
Test: Total time: 0:00:09 (0.3986 s / it)
* Acc@1 77.314 Acc@5 93.846 loss 1.160
Accuracy of the model on the 50000 test images: 77.3%
Max accuracy: 77.48%
Epoch: [231]  [   0/1251]  eta: 0:54:26  lr: 0.000571  min_lr: 0.000571  loss: 3.7357 (3.7357)  weight_decay: 0.0500 (0.0500)  time: 2.6112  data: 2.3474  max mem: 9147
Epoch: [231]  [ 200/1251]  eta: 0:03:59  lr: 0.000568  min_lr: 0.000568  loss: 2.9867 (2.8077)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1320 (1.2899)  time: 0.2147  data: 0.0007  max mem: 9147
Epoch: [231]  [ 400/1251]  eta: 0:03:08  lr: 0.000566  min_lr: 0.000566  loss: 2.3864 (2.8182)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2977 (1.2483)  time: 0.2244  data: 0.0007  max mem: 9147
Epoch: [231]  [ 600/1251]  eta: 0:02:20  lr: 0.000563  min_lr: 0.000563  loss: 2.6189 (2.8617)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0459 (1.2258)  time: 0.2056  data: 0.0007  max mem: 9147
Epoch: [231]  [ 800/1251]  eta: 0:01:37  lr: 0.000561  min_lr: 0.000561  loss: 2.5094 (2.8498)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1463 (1.2132)  time: 0.2057  data: 0.0006  max mem: 9147
Epoch: [231]  [1000/1251]  eta: 0:00:53  lr: 0.000558  min_lr: 0.000558  loss: 2.5467 (2.8458)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3531 (1.2365)  time: 0.2152  data: 0.0007  max mem: 9147
Epoch: [231]  [1200/1251]  eta: 0:00:10  lr: 0.000556  min_lr: 0.000556  loss: 2.3000 (2.8472)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1235 (1.2235)  time: 0.2097  data: 0.0069  max mem: 9147
Epoch: [231]  [1250/1251]  eta: 0:00:00  lr: 0.000555  min_lr: 0.000555  loss: 2.2765 (2.8510)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0890 (1.2164)  time: 0.1471  data: 0.0014  max mem: 9147
Epoch: [231] Total time: 0:04:28 (0.2143 s / it)
Averaged stats: lr: 0.000555  min_lr: 0.000555  loss: 2.2765 (2.8626)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0890 (1.2164)
Test:  [ 0/25]  eta: 0:02:25  loss: 0.7061 (0.7061)  acc1: 87.2000 (87.2000)  acc5: 98.4000 (98.4000)  time: 5.8133  data: 5.7331  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8974 (0.8854)  acc1: 82.4000 (81.8545)  acc5: 96.8000 (96.5091)  time: 0.6949  data: 0.6215  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1023 (1.0624)  acc1: 75.2000 (77.9619)  acc5: 92.8000 (94.2095)  time: 0.1679  data: 0.0970  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2044 (1.0795)  acc1: 75.2000 (77.6160)  acc5: 92.8000 (94.0800)  time: 0.1857  data: 0.1159  max mem: 9147
Test: Total time: 0:00:09 (0.3962 s / it)
* Acc@1 77.662 Acc@5 93.900 loss 1.080
Accuracy of the model on the 50000 test images: 77.7%
Max accuracy: 77.66%
Epoch: [232]  [   0/1251]  eta: 0:43:50  lr: 0.000555  min_lr: 0.000555  loss: 2.4810 (2.4810)  weight_decay: 0.0500 (0.0500)  time: 2.1027  data: 1.8240  max mem: 9147
Epoch: [232]  [ 200/1251]  eta: 0:03:52  lr: 0.000553  min_lr: 0.000553  loss: 3.4017 (2.8955)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0811 (1.1704)  time: 0.2111  data: 0.0007  max mem: 9147
Epoch: [232]  [ 400/1251]  eta: 0:03:00  lr: 0.000550  min_lr: 0.000550  loss: 2.2347 (2.8554)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2140 (1.1840)  time: 0.1889  data: 0.0005  max mem: 9147
Epoch: [232]  [ 600/1251]  eta: 0:02:16  lr: 0.000548  min_lr: 0.000548  loss: 2.3569 (2.8482)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1001 (1.1808)  time: 0.2248  data: 0.0071  max mem: 9147
Epoch: [232]  [ 800/1251]  eta: 0:01:34  lr: 0.000545  min_lr: 0.000545  loss: 2.2190 (2.8428)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1725 (1.1908)  time: 0.2195  data: 0.0006  max mem: 9147
Epoch: [232]  [1000/1251]  eta: 0:00:52  lr: 0.000543  min_lr: 0.000543  loss: 2.3940 (2.8500)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0848 (1.1783)  time: 0.2325  data: 0.0006  max mem: 9147
Epoch: [232]  [1200/1251]  eta: 0:00:10  lr: 0.000540  min_lr: 0.000540  loss: 2.1528 (2.8514)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1035 (1.1682)  time: 0.2155  data: 0.0008  max mem: 9147
Epoch: [232]  [1250/1251]  eta: 0:00:00  lr: 0.000540  min_lr: 0.000540  loss: 2.3594 (2.8538)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1209 (1.1684)  time: 0.1545  data: 0.0013  max mem: 9147
Epoch: [232] Total time: 0:04:20 (0.2086 s / it)
Averaged stats: lr: 0.000540  min_lr: 0.000540  loss: 2.3594 (2.8646)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1209 (1.1684)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.6766 (0.6766)  acc1: 87.2000 (87.2000)  acc5: 98.4000 (98.4000)  time: 5.4433  data: 5.3558  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 0.8736 (0.8570)  acc1: 85.2000 (82.6182)  acc5: 96.4000 (96.3636)  time: 0.6480  data: 0.5607  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0769 (1.0422)  acc1: 76.8000 (78.2095)  acc5: 93.6000 (94.1143)  time: 0.1830  data: 0.1036  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1686 (1.0570)  acc1: 76.4000 (77.6960)  acc5: 92.8000 (94.0160)  time: 0.2060  data: 0.1325  max mem: 9147
Test: Total time: 0:00:10 (0.4019 s / it)
* Acc@1 77.762 Acc@5 93.906 loss 1.061
Accuracy of the model on the 50000 test images: 77.8%
Max accuracy: 77.76%
Epoch: [233]  [   0/1251]  eta: 1:00:33  lr: 0.000540  min_lr: 0.000540  loss: 4.6913 (4.6913)  weight_decay: 0.0500 (0.0500)  time: 2.9048  data: 2.7127  max mem: 9147
Epoch: [233]  [ 200/1251]  eta: 0:03:37  lr: 0.000537  min_lr: 0.000537  loss: 2.7740 (2.8496)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1742 (1.2735)  time: 0.2089  data: 0.0008  max mem: 9147
Epoch: [233]  [ 400/1251]  eta: 0:03:00  lr: 0.000535  min_lr: 0.000535  loss: 2.4124 (2.8976)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1599 (1.2382)  time: 0.1998  data: 0.0006  max mem: 9147
Epoch: [233]  [ 600/1251]  eta: 0:02:16  lr: 0.000533  min_lr: 0.000533  loss: 2.2746 (2.8840)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2178 (1.2333)  time: 0.2008  data: 0.0006  max mem: 9147
Epoch: [233]  [ 800/1251]  eta: 0:01:34  lr: 0.000530  min_lr: 0.000530  loss: 2.3232 (2.8838)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0651 (1.2154)  time: 0.2155  data: 0.0014  max mem: 9147
Epoch: [233]  [1000/1251]  eta: 0:00:52  lr: 0.000528  min_lr: 0.000528  loss: 3.4575 (2.8766)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1860 (1.2094)  time: 0.2247  data: 0.0007  max mem: 9147
Epoch: [233]  [1200/1251]  eta: 0:00:10  lr: 0.000525  min_lr: 0.000525  loss: 2.4469 (2.8746)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3273 (1.2207)  time: 0.1941  data: 0.0005  max mem: 9147
Epoch: [233]  [1250/1251]  eta: 0:00:00  lr: 0.000525  min_lr: 0.000525  loss: 2.3335 (2.8738)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3046 (1.2224)  time: 0.1386  data: 0.0016  max mem: 9147
Epoch: [233] Total time: 0:04:21 (0.2093 s / it)
Averaged stats: lr: 0.000525  min_lr: 0.000525  loss: 2.3335 (2.8846)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3046 (1.2224)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.6801 (0.6801)  acc1: 87.2000 (87.2000)  acc5: 98.0000 (98.0000)  time: 5.4774  data: 5.3703  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8557 (0.8624)  acc1: 83.2000 (81.3818)  acc5: 96.8000 (96.4364)  time: 0.7269  data: 0.6447  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0967 (1.0559)  acc1: 75.2000 (77.4857)  acc5: 93.2000 (94.1333)  time: 0.2053  data: 0.1294  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1785 (1.0681)  acc1: 74.4000 (77.1680)  acc5: 92.8000 (94.1120)  time: 0.2140  data: 0.1392  max mem: 9147
Test: Total time: 0:00:10 (0.4053 s / it)
* Acc@1 77.606 Acc@5 93.884 loss 1.064
Accuracy of the model on the 50000 test images: 77.6%
Max accuracy: 77.76%
Epoch: [234]  [   0/1251]  eta: 1:02:24  lr: 0.000525  min_lr: 0.000525  loss: 2.2629 (2.2629)  weight_decay: 0.0500 (0.0500)  time: 2.9932  data: 1.6144  max mem: 9147
Epoch: [234]  [ 200/1251]  eta: 0:03:58  lr: 0.000522  min_lr: 0.000522  loss: 2.4124 (2.8826)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0984 (1.0984)  time: 0.2148  data: 0.0007  max mem: 9147
Epoch: [234]  [ 400/1251]  eta: 0:03:02  lr: 0.000520  min_lr: 0.000520  loss: 2.4405 (2.9359)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4113 (1.2110)  time: 0.2049  data: 0.0006  max mem: 9147
Epoch: [234]  [ 600/1251]  eta: 0:02:17  lr: 0.000517  min_lr: 0.000517  loss: 2.2915 (2.9270)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0625 (1.1886)  time: 0.1949  data: 0.0006  max mem: 9147
Epoch: [234]  [ 800/1251]  eta: 0:01:32  lr: 0.000515  min_lr: 0.000515  loss: 3.1352 (2.9050)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1714 (1.1857)  time: 0.1914  data: 0.0005  max mem: 9147
Epoch: [234]  [1000/1251]  eta: 0:00:50  lr: 0.000513  min_lr: 0.000513  loss: 3.1428 (2.9180)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0859 (1.1845)  time: 0.1949  data: 0.0024  max mem: 9147
Epoch: [234]  [1200/1251]  eta: 0:00:10  lr: 0.000510  min_lr: 0.000510  loss: 2.3491 (2.9114)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3854 (1.1953)  time: 0.2249  data: 0.0006  max mem: 9147
Epoch: [234]  [1250/1251]  eta: 0:00:00  lr: 0.000510  min_lr: 0.000510  loss: 2.1637 (2.9035)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3950 (1.1995)  time: 0.1391  data: 0.0009  max mem: 9147
Epoch: [234] Total time: 0:04:14 (0.2038 s / it)
Averaged stats: lr: 0.000510  min_lr: 0.000510  loss: 2.1637 (2.8577)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3950 (1.1995)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.6488 (0.6488)  acc1: 87.2000 (87.2000)  acc5: 98.4000 (98.4000)  time: 5.4538  data: 5.3655  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8560 (0.8323)  acc1: 82.8000 (81.8545)  acc5: 96.4000 (96.5091)  time: 0.6791  data: 0.6046  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0566 (1.0205)  acc1: 76.4000 (78.1143)  acc5: 93.2000 (94.3619)  time: 0.1794  data: 0.1062  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1879 (1.0383)  acc1: 76.4000 (77.5360)  acc5: 92.8000 (94.3040)  time: 0.2142  data: 0.1427  max mem: 9147
Test: Total time: 0:00:10 (0.4063 s / it)
* Acc@1 77.592 Acc@5 94.058 loss 1.037
Accuracy of the model on the 50000 test images: 77.6%
Max accuracy: 77.76%
Epoch: [235]  [   0/1251]  eta: 1:06:20  lr: 0.000510  min_lr: 0.000510  loss: 2.6075 (2.6075)  weight_decay: 0.0500 (0.0500)  time: 3.1819  data: 1.7673  max mem: 9147
Epoch: [235]  [ 200/1251]  eta: 0:03:50  lr: 0.000507  min_lr: 0.000507  loss: 2.3065 (2.9123)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0960 (1.1754)  time: 0.2245  data: 0.0007  max mem: 9147
Epoch: [235]  [ 400/1251]  eta: 0:03:05  lr: 0.000505  min_lr: 0.000505  loss: 3.0473 (2.9097)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1921 (1.2217)  time: 0.2242  data: 0.0006  max mem: 9147
Epoch: [235]  [ 600/1251]  eta: 0:02:18  lr: 0.000502  min_lr: 0.000502  loss: 2.3992 (2.8882)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0884 (1.2058)  time: 0.1900  data: 0.0016  max mem: 9147
Epoch: [235]  [ 800/1251]  eta: 0:01:35  lr: 0.000500  min_lr: 0.000500  loss: 2.4737 (2.8973)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2241 (1.2111)  time: 0.2195  data: 0.0006  max mem: 9147
Epoch: [235]  [1000/1251]  eta: 0:00:53  lr: 0.000498  min_lr: 0.000498  loss: 2.9390 (2.8980)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1582 (1.2055)  time: 0.2050  data: 0.0006  max mem: 9147
Epoch: [235]  [1200/1251]  eta: 0:00:10  lr: 0.000495  min_lr: 0.000495  loss: 3.2102 (2.8934)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0425 (1.2035)  time: 0.1857  data: 0.0006  max mem: 9147
Epoch: [235]  [1250/1251]  eta: 0:00:00  lr: 0.000495  min_lr: 0.000495  loss: 2.2922 (2.8909)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0425 (1.1974)  time: 0.1531  data: 0.0012  max mem: 9147
Epoch: [235] Total time: 0:04:23 (0.2108 s / it)
Averaged stats: lr: 0.000495  min_lr: 0.000495  loss: 2.2922 (2.8382)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0425 (1.1974)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.6719 (0.6719)  acc1: 85.6000 (85.6000)  acc5: 99.2000 (99.2000)  time: 5.6421  data: 5.5430  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8301 (0.8495)  acc1: 82.8000 (81.7455)  acc5: 96.8000 (96.3636)  time: 0.7011  data: 0.6262  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0923 (1.0307)  acc1: 76.4000 (78.0000)  acc5: 92.8000 (94.1524)  time: 0.1825  data: 0.1109  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1791 (1.0456)  acc1: 75.6000 (77.3920)  acc5: 92.8000 (94.1440)  time: 0.2094  data: 0.1391  max mem: 9147
Test: Total time: 0:00:10 (0.4085 s / it)
* Acc@1 77.726 Acc@5 93.970 loss 1.047
Accuracy of the model on the 50000 test images: 77.7%
Max accuracy: 77.76%
Epoch: [236]  [   0/1251]  eta: 0:59:37  lr: 0.000495  min_lr: 0.000495  loss: 3.7830 (3.7830)  weight_decay: 0.0500 (0.0500)  time: 2.8599  data: 2.2030  max mem: 9147
Epoch: [236]  [ 200/1251]  eta: 0:03:45  lr: 0.000492  min_lr: 0.000492  loss: 2.3027 (2.7693)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1327 (1.1701)  time: 0.2005  data: 0.0006  max mem: 9147
Epoch: [236]  [ 400/1251]  eta: 0:03:01  lr: 0.000490  min_lr: 0.000490  loss: 2.2059 (2.7872)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2219 (1.1549)  time: 0.2061  data: 0.0007  max mem: 9147
Epoch: [236]  [ 600/1251]  eta: 0:02:17  lr: 0.000488  min_lr: 0.000488  loss: 2.4756 (2.8134)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1862 (1.1748)  time: 0.1896  data: 0.0006  max mem: 9147
Epoch: [236]  [ 800/1251]  eta: 0:01:33  lr: 0.000485  min_lr: 0.000485  loss: 2.3980 (2.8251)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1758 (1.1936)  time: 0.2011  data: 0.0007  max mem: 9147
Epoch: [236]  [1000/1251]  eta: 0:00:52  lr: 0.000483  min_lr: 0.000483  loss: 2.3723 (2.8150)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2349 (1.2203)  time: 0.1998  data: 0.0006  max mem: 9147
Epoch: [236]  [1200/1251]  eta: 0:00:10  lr: 0.000481  min_lr: 0.000481  loss: 2.3179 (2.8263)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2212 (1.2181)  time: 0.1902  data: 0.0006  max mem: 9147
Epoch: [236]  [1250/1251]  eta: 0:00:00  lr: 0.000480  min_lr: 0.000480  loss: 2.3711 (2.8275)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2302 (1.2171)  time: 0.1390  data: 0.0009  max mem: 9147
Epoch: [236] Total time: 0:04:15 (0.2046 s / it)
Averaged stats: lr: 0.000480  min_lr: 0.000480  loss: 2.3711 (2.8504)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2302 (1.2171)
Test:  [ 0/25]  eta: 0:01:21  loss: 0.7469 (0.7469)  acc1: 87.6000 (87.6000)  acc5: 98.4000 (98.4000)  time: 3.2662  data: 3.1858  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 0.8937 (0.8841)  acc1: 82.8000 (82.2909)  acc5: 96.4000 (96.4727)  time: 0.6164  data: 0.5394  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0931 (1.0751)  acc1: 77.2000 (78.2286)  acc5: 93.2000 (94.1714)  time: 0.2791  data: 0.2031  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1962 (1.0882)  acc1: 74.8000 (77.7600)  acc5: 93.2000 (94.2080)  time: 0.2106  data: 0.1372  max mem: 9147
Test: Total time: 0:00:09 (0.3961 s / it)
* Acc@1 77.586 Acc@5 93.926 loss 1.094
Accuracy of the model on the 50000 test images: 77.6%
Max accuracy: 77.76%
Epoch: [237]  [   0/1251]  eta: 1:04:25  lr: 0.000480  min_lr: 0.000480  loss: 2.3220 (2.3220)  weight_decay: 0.0500 (0.0500)  time: 3.0903  data: 2.6042  max mem: 9147
Epoch: [237]  [ 200/1251]  eta: 0:03:49  lr: 0.000478  min_lr: 0.000478  loss: 3.4468 (2.7952)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2216 (1.2095)  time: 0.1990  data: 0.0005  max mem: 9147
Epoch: [237]  [ 400/1251]  eta: 0:02:54  lr: 0.000475  min_lr: 0.000475  loss: 2.2408 (2.8411)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0663 (1.2118)  time: 0.1912  data: 0.0006  max mem: 9147
Epoch: [237]  [ 600/1251]  eta: 0:02:10  lr: 0.000473  min_lr: 0.000473  loss: 2.6487 (2.8219)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3157 (1.2495)  time: 0.1812  data: 0.0005  max mem: 9147
Epoch: [237]  [ 800/1251]  eta: 0:01:29  lr: 0.000471  min_lr: 0.000471  loss: 2.2254 (2.8308)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2725 (1.2805)  time: 0.1946  data: 0.0005  max mem: 9147
Epoch: [237]  [1000/1251]  eta: 0:00:49  lr: 0.000468  min_lr: 0.000468  loss: 2.4130 (2.8415)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2234 (1.2730)  time: 0.1953  data: 0.0007  max mem: 9147
Epoch: [237]  [1200/1251]  eta: 0:00:10  lr: 0.000466  min_lr: 0.000466  loss: 2.4129 (2.8404)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1213 (1.2576)  time: 0.2147  data: 0.0007  max mem: 9147
Epoch: [237]  [1250/1251]  eta: 0:00:00  lr: 0.000466  min_lr: 0.000466  loss: 2.8733 (2.8420)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1296 (1.2531)  time: 0.1432  data: 0.0008  max mem: 9147
Epoch: [237] Total time: 0:04:11 (0.2011 s / it)
Averaged stats: lr: 0.000466  min_lr: 0.000466  loss: 2.8733 (2.8564)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1296 (1.2531)
Test:  [ 0/25]  eta: 0:02:25  loss: 0.7157 (0.7157)  acc1: 86.4000 (86.4000)  acc5: 98.8000 (98.8000)  time: 5.8046  data: 5.7080  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9010 (0.8965)  acc1: 82.8000 (81.5273)  acc5: 96.4000 (96.3636)  time: 0.7727  data: 0.6935  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1738 (1.0961)  acc1: 75.2000 (77.9048)  acc5: 92.8000 (93.9048)  time: 0.2169  data: 0.1417  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2593 (1.1101)  acc1: 75.2000 (77.5840)  acc5: 92.4000 (93.9040)  time: 0.2149  data: 0.1417  max mem: 9147
Test: Total time: 0:00:10 (0.4227 s / it)
* Acc@1 77.726 Acc@5 93.916 loss 1.109
Accuracy of the model on the 50000 test images: 77.7%
Max accuracy: 77.76%
Epoch: [238]  [   0/1251]  eta: 0:58:10  lr: 0.000466  min_lr: 0.000466  loss: 2.7285 (2.7285)  weight_decay: 0.0500 (0.0500)  time: 2.7901  data: 1.6303  max mem: 9147
Epoch: [238]  [ 200/1251]  eta: 0:03:54  lr: 0.000463  min_lr: 0.000463  loss: 2.6859 (2.8394)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0151 (1.1171)  time: 0.2006  data: 0.0007  max mem: 9147
Epoch: [238]  [ 400/1251]  eta: 0:02:56  lr: 0.000461  min_lr: 0.000461  loss: 2.4728 (2.8262)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1471 (1.1747)  time: 0.1898  data: 0.0005  max mem: 9147
Epoch: [238]  [ 600/1251]  eta: 0:02:10  lr: 0.000459  min_lr: 0.000459  loss: 2.2992 (2.8333)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0482 (1.1605)  time: 0.1985  data: 0.0012  max mem: 9147
Epoch: [238]  [ 800/1251]  eta: 0:01:29  lr: 0.000456  min_lr: 0.000456  loss: 2.2807 (2.8134)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1972 (1.1875)  time: 0.1917  data: 0.0007  max mem: 9147
Epoch: [238]  [1000/1251]  eta: 0:00:50  lr: 0.000454  min_lr: 0.000454  loss: 2.4116 (2.8161)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0880 (1.1801)  time: 0.2391  data: 0.0009  max mem: 9147
Epoch: [238]  [1200/1251]  eta: 0:00:10  lr: 0.000452  min_lr: 0.000452  loss: 2.3639 (2.8194)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2000 (1.1988)  time: 0.2051  data: 0.0007  max mem: 9147
Epoch: [238]  [1250/1251]  eta: 0:00:00  lr: 0.000451  min_lr: 0.000451  loss: 2.4900 (2.8215)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1305 (1.1984)  time: 0.1483  data: 0.0015  max mem: 9147
Epoch: [238] Total time: 0:04:12 (0.2019 s / it)
Averaged stats: lr: 0.000451  min_lr: 0.000451  loss: 2.4900 (2.8482)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1305 (1.1984)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.7144 (0.7144)  acc1: 88.0000 (88.0000)  acc5: 98.4000 (98.4000)  time: 5.6553  data: 5.5607  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9066 (0.9109)  acc1: 83.2000 (82.2545)  acc5: 97.2000 (96.3273)  time: 0.7680  data: 0.6798  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1571 (1.0860)  acc1: 75.2000 (78.0191)  acc5: 93.2000 (94.1333)  time: 0.2058  data: 0.1260  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2198 (1.0961)  acc1: 74.8000 (77.8720)  acc5: 92.8000 (94.0320)  time: 0.1997  data: 0.1260  max mem: 9147
Test: Total time: 0:00:10 (0.4057 s / it)
* Acc@1 77.780 Acc@5 94.050 loss 1.095
Accuracy of the model on the 50000 test images: 77.8%
Max accuracy: 77.78%
Epoch: [239]  [   0/1251]  eta: 1:01:27  lr: 0.000451  min_lr: 0.000451  loss: 2.1169 (2.1169)  weight_decay: 0.0500 (0.0500)  time: 2.9472  data: 2.6865  max mem: 9147
Epoch: [239]  [ 200/1251]  eta: 0:03:52  lr: 0.000449  min_lr: 0.000449  loss: 2.4526 (2.7663)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1056 (1.1713)  time: 0.2077  data: 0.0007  max mem: 9147
Epoch: [239]  [ 400/1251]  eta: 0:03:07  lr: 0.000447  min_lr: 0.000447  loss: 2.4837 (2.7924)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2541 (1.2203)  time: 0.2230  data: 0.0006  max mem: 9147
Epoch: [239]  [ 600/1251]  eta: 0:02:18  lr: 0.000445  min_lr: 0.000445  loss: 2.8951 (2.8259)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1393 (1.2037)  time: 0.2085  data: 0.0008  max mem: 9147
Epoch: [239]  [ 800/1251]  eta: 0:01:35  lr: 0.000442  min_lr: 0.000442  loss: 2.2597 (2.8317)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1746 (1.2094)  time: 0.2189  data: 0.0008  max mem: 9147
Epoch: [239]  [1000/1251]  eta: 0:00:53  lr: 0.000440  min_lr: 0.000440  loss: 2.3492 (2.8292)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2163 (1.2087)  time: 0.2005  data: 0.0006  max mem: 9147
Epoch: [239]  [1200/1251]  eta: 0:00:10  lr: 0.000438  min_lr: 0.000438  loss: 2.9043 (2.8291)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0962 (1.2140)  time: 0.2050  data: 0.0007  max mem: 9147
Epoch: [239]  [1250/1251]  eta: 0:00:00  lr: 0.000437  min_lr: 0.000437  loss: 3.5634 (2.8345)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1202 (1.2123)  time: 0.1512  data: 0.0011  max mem: 9147
Epoch: [239] Total time: 0:04:22 (0.2097 s / it)
Averaged stats: lr: 0.000437  min_lr: 0.000437  loss: 3.5634 (2.8453)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1202 (1.2123)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.7350 (0.7350)  acc1: 87.6000 (87.6000)  acc5: 98.8000 (98.8000)  time: 5.5275  data: 5.4289  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.9097 (0.9208)  acc1: 83.6000 (81.7455)  acc5: 96.4000 (96.5091)  time: 0.7596  data: 0.6817  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1167 (1.0954)  acc1: 75.2000 (77.9810)  acc5: 94.0000 (94.1905)  time: 0.2178  data: 0.1437  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2441 (1.1051)  acc1: 75.2000 (77.6480)  acc5: 92.4000 (94.1120)  time: 0.2194  data: 0.1468  max mem: 9147
Test: Total time: 0:00:10 (0.4125 s / it)
* Acc@1 77.696 Acc@5 93.988 loss 1.104
Accuracy of the model on the 50000 test images: 77.7%
Max accuracy: 77.78%
Epoch: [240]  [   0/1251]  eta: 1:04:09  lr: 0.000437  min_lr: 0.000437  loss: 2.4290 (2.4290)  weight_decay: 0.0500 (0.0500)  time: 3.0771  data: 1.7013  max mem: 9147
Epoch: [240]  [ 200/1251]  eta: 0:03:53  lr: 0.000435  min_lr: 0.000435  loss: 2.2523 (2.8517)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1846 (1.2818)  time: 0.2002  data: 0.0006  max mem: 9147
Epoch: [240]  [ 400/1251]  eta: 0:03:04  lr: 0.000433  min_lr: 0.000433  loss: 2.8088 (2.8252)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2022 (inf)  time: 0.2344  data: 0.0006  max mem: 9147
Epoch: [240]  [ 600/1251]  eta: 0:02:21  lr: 0.000431  min_lr: 0.000431  loss: 2.3148 (2.8249)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1689 (inf)  time: 0.2204  data: 0.0007  max mem: 9147
Epoch: [240]  [ 800/1251]  eta: 0:01:37  lr: 0.000428  min_lr: 0.000428  loss: 2.3084 (2.8236)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2580 (inf)  time: 0.2060  data: 0.0007  max mem: 9147
Epoch: [240]  [1000/1251]  eta: 0:00:54  lr: 0.000426  min_lr: 0.000426  loss: 2.8661 (2.8330)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0818 (inf)  time: 0.2195  data: 0.0007  max mem: 9147
Epoch: [240]  [1200/1251]  eta: 0:00:10  lr: 0.000424  min_lr: 0.000424  loss: 2.9185 (2.8275)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2114 (inf)  time: 0.2003  data: 0.0006  max mem: 9147
Epoch: [240]  [1250/1251]  eta: 0:00:00  lr: 0.000423  min_lr: 0.000423  loss: 2.8550 (2.8265)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1554 (inf)  time: 0.1430  data: 0.0020  max mem: 9147
Epoch: [240] Total time: 0:04:27 (0.2140 s / it)
Averaged stats: lr: 0.000423  min_lr: 0.000423  loss: 2.8550 (2.8361)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1554 (inf)
Test:  [ 0/25]  eta: 0:01:29  loss: 0.6711 (0.6711)  acc1: 87.6000 (87.6000)  acc5: 98.4000 (98.4000)  time: 3.5922  data: 3.5057  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 0.8808 (0.8684)  acc1: 85.2000 (82.5091)  acc5: 96.0000 (96.4364)  time: 0.5936  data: 0.5183  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1320 (1.0498)  acc1: 76.0000 (78.6095)  acc5: 94.0000 (94.4952)  time: 0.2514  data: 0.1775  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1469 (1.0611)  acc1: 75.2000 (78.2400)  acc5: 94.0000 (94.3680)  time: 0.1996  data: 0.1277  max mem: 9147
Test: Total time: 0:00:09 (0.3907 s / it)
* Acc@1 77.998 Acc@5 94.116 loss 1.062
Accuracy of the model on the 50000 test images: 78.0%
Max accuracy: 78.00%
Epoch: [241]  [   0/1251]  eta: 1:00:46  lr: 0.000423  min_lr: 0.000423  loss: 2.0497 (2.0497)  weight_decay: 0.0500 (0.0500)  time: 2.9148  data: 2.7112  max mem: 9147
Epoch: [241]  [ 200/1251]  eta: 0:03:56  lr: 0.000421  min_lr: 0.000421  loss: 2.6521 (2.9542)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1943 (1.3107)  time: 0.2165  data: 0.0006  max mem: 9147
Epoch: [241]  [ 400/1251]  eta: 0:03:08  lr: 0.000419  min_lr: 0.000419  loss: 2.7226 (2.9186)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2645 (1.2864)  time: 0.1955  data: 0.0006  max mem: 9147
Epoch: [241]  [ 600/1251]  eta: 0:02:22  lr: 0.000417  min_lr: 0.000417  loss: 2.2181 (2.8596)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0566 (1.2527)  time: 0.2142  data: 0.0022  max mem: 9147
Epoch: [241]  [ 800/1251]  eta: 0:01:38  lr: 0.000415  min_lr: 0.000415  loss: 2.2124 (2.8604)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2329 (1.2334)  time: 0.1997  data: 0.0006  max mem: 9147
Epoch: [241]  [1000/1251]  eta: 0:00:54  lr: 0.000412  min_lr: 0.000412  loss: 2.3134 (2.8542)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3962 (1.2464)  time: 0.2243  data: 0.0006  max mem: 9147
Epoch: [241]  [1200/1251]  eta: 0:00:11  lr: 0.000410  min_lr: 0.000410  loss: 2.3520 (2.8369)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2359 (1.2484)  time: 0.1939  data: 0.0006  max mem: 9147
Epoch: [241]  [1250/1251]  eta: 0:00:00  lr: 0.000410  min_lr: 0.000410  loss: 2.3173 (2.8357)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2785 (1.2525)  time: 0.1405  data: 0.0014  max mem: 9147
Epoch: [241] Total time: 0:04:31 (0.2167 s / it)
Averaged stats: lr: 0.000410  min_lr: 0.000410  loss: 2.3173 (2.8456)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2785 (1.2525)
Test:  [ 0/25]  eta: 0:01:46  loss: 0.6554 (0.6554)  acc1: 86.4000 (86.4000)  acc5: 98.0000 (98.0000)  time: 4.2560  data: 4.1732  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 0.8310 (0.8354)  acc1: 83.6000 (82.3273)  acc5: 96.4000 (96.3273)  time: 0.6577  data: 0.5810  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0614 (1.0212)  acc1: 76.0000 (78.3810)  acc5: 93.6000 (94.3429)  time: 0.2461  data: 0.1720  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1685 (1.0315)  acc1: 75.2000 (77.9840)  acc5: 93.2000 (94.2880)  time: 0.2073  data: 0.1361  max mem: 9147
Test: Total time: 0:00:09 (0.3935 s / it)
* Acc@1 78.064 Acc@5 94.108 loss 1.035
Accuracy of the model on the 50000 test images: 78.1%
Max accuracy: 78.06%
Epoch: [242]  [   0/1251]  eta: 1:06:01  lr: 0.000410  min_lr: 0.000410  loss: 3.5991 (3.5991)  weight_decay: 0.0500 (0.0500)  time: 3.1667  data: 2.9709  max mem: 9147
Epoch: [242]  [ 200/1251]  eta: 0:03:38  lr: 0.000407  min_lr: 0.000407  loss: 2.8760 (2.8465)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1289 (1.1915)  time: 0.1934  data: 0.0006  max mem: 9147
Epoch: [242]  [ 400/1251]  eta: 0:02:59  lr: 0.000405  min_lr: 0.000405  loss: 2.6412 (2.8105)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1962 (1.2222)  time: 0.2140  data: 0.0006  max mem: 9147
Epoch: [242]  [ 600/1251]  eta: 0:02:13  lr: 0.000403  min_lr: 0.000403  loss: 2.7892 (2.8207)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0659 (1.2373)  time: 0.1883  data: 0.0005  max mem: 9147
Epoch: [242]  [ 800/1251]  eta: 0:01:32  lr: 0.000401  min_lr: 0.000401  loss: 2.1726 (2.7925)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2004 (1.2441)  time: 0.2296  data: 0.0008  max mem: 9147
Epoch: [242]  [1000/1251]  eta: 0:00:51  lr: 0.000399  min_lr: 0.000399  loss: 3.1222 (2.8036)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0415 (1.2261)  time: 0.1892  data: 0.0010  max mem: 9147
Epoch: [242]  [1200/1251]  eta: 0:00:10  lr: 0.000397  min_lr: 0.000397  loss: 2.7627 (2.8066)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2829 (1.2317)  time: 0.1898  data: 0.0005  max mem: 9147
Epoch: [242]  [1250/1251]  eta: 0:00:00  lr: 0.000396  min_lr: 0.000396  loss: 2.5179 (2.8062)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2213 (1.2331)  time: 0.1376  data: 0.0008  max mem: 9147
Epoch: [242] Total time: 0:04:13 (0.2024 s / it)
Averaged stats: lr: 0.000396  min_lr: 0.000396  loss: 2.5179 (2.8238)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2213 (1.2331)
Test:  [ 0/25]  eta: 0:02:14  loss: 0.6911 (0.6911)  acc1: 86.8000 (86.8000)  acc5: 98.0000 (98.0000)  time: 5.3607  data: 5.2802  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8838 (0.8790)  acc1: 82.8000 (82.4364)  acc5: 96.8000 (96.5818)  time: 0.7274  data: 0.6329  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1034 (1.0637)  acc1: 76.8000 (78.3810)  acc5: 93.6000 (94.2857)  time: 0.2056  data: 0.1213  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1849 (1.0745)  acc1: 76.4000 (78.0000)  acc5: 93.2000 (94.2880)  time: 0.2123  data: 0.1335  max mem: 9147
Test: Total time: 0:00:10 (0.4032 s / it)
* Acc@1 77.910 Acc@5 94.132 loss 1.078
Accuracy of the model on the 50000 test images: 77.9%
Max accuracy: 78.06%
Epoch: [243]  [   0/1251]  eta: 1:02:36  lr: 0.000396  min_lr: 0.000396  loss: 3.4167 (3.4167)  weight_decay: 0.0500 (0.0500)  time: 3.0028  data: 2.4066  max mem: 9147
Epoch: [243]  [ 200/1251]  eta: 0:03:55  lr: 0.000394  min_lr: 0.000394  loss: 2.6812 (2.7773)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2026 (1.2347)  time: 0.2062  data: 0.0007  max mem: 9147
Epoch: [243]  [ 400/1251]  eta: 0:03:04  lr: 0.000392  min_lr: 0.000392  loss: 2.4289 (2.8099)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2409 (1.2601)  time: 0.2007  data: 0.0006  max mem: 9147
Epoch: [243]  [ 600/1251]  eta: 0:02:14  lr: 0.000390  min_lr: 0.000390  loss: 2.3489 (2.7951)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2110 (1.2457)  time: 0.1954  data: 0.0006  max mem: 9147
Epoch: [243]  [ 800/1251]  eta: 0:01:34  lr: 0.000388  min_lr: 0.000388  loss: 2.3868 (2.8104)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1624 (1.2415)  time: 0.2056  data: 0.0007  max mem: 9147
Epoch: [243]  [1000/1251]  eta: 0:00:51  lr: 0.000385  min_lr: 0.000385  loss: 2.7954 (2.7993)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1464 (1.2307)  time: 0.1903  data: 0.0005  max mem: 9147
Epoch: [243]  [1200/1251]  eta: 0:00:10  lr: 0.000383  min_lr: 0.000383  loss: 2.2004 (2.8087)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2884 (1.2327)  time: 0.1842  data: 0.0006  max mem: 9147
Epoch: [243]  [1250/1251]  eta: 0:00:00  lr: 0.000383  min_lr: 0.000383  loss: 2.1961 (2.8076)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2821 (1.2374)  time: 0.1403  data: 0.0012  max mem: 9147
Epoch: [243] Total time: 0:04:14 (0.2032 s / it)
Averaged stats: lr: 0.000383  min_lr: 0.000383  loss: 2.1961 (2.8251)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2821 (1.2374)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.6797 (0.6797)  acc1: 86.8000 (86.8000)  acc5: 98.8000 (98.8000)  time: 5.4767  data: 5.3962  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8191 (0.8426)  acc1: 84.0000 (82.1455)  acc5: 96.8000 (96.4727)  time: 0.7563  data: 0.6620  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0559 (1.0239)  acc1: 76.8000 (78.3810)  acc5: 93.6000 (94.2857)  time: 0.2230  data: 0.1386  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1660 (1.0384)  acc1: 76.4000 (77.9200)  acc5: 93.6000 (94.2560)  time: 0.2178  data: 0.1385  max mem: 9147
Test: Total time: 0:00:10 (0.4118 s / it)
* Acc@1 78.128 Acc@5 94.102 loss 1.039
Accuracy of the model on the 50000 test images: 78.1%
Max accuracy: 78.13%
Epoch: [244]  [   0/1251]  eta: 1:03:47  lr: 0.000383  min_lr: 0.000383  loss: 3.8894 (3.8894)  weight_decay: 0.0500 (0.0500)  time: 3.0593  data: 2.8755  max mem: 9147
Epoch: [244]  [ 200/1251]  eta: 0:03:50  lr: 0.000381  min_lr: 0.000381  loss: 2.1448 (2.8154)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1104 (1.2306)  time: 0.2000  data: 0.0010  max mem: 9147
Epoch: [244]  [ 400/1251]  eta: 0:03:03  lr: 0.000379  min_lr: 0.000379  loss: 3.5432 (2.8313)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3952 (1.2874)  time: 0.2205  data: 0.0007  max mem: 9147
Epoch: [244]  [ 600/1251]  eta: 0:02:17  lr: 0.000377  min_lr: 0.000377  loss: 2.1809 (2.8219)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1639 (1.2635)  time: 0.2008  data: 0.0006  max mem: 9147
Epoch: [244]  [ 800/1251]  eta: 0:01:35  lr: 0.000374  min_lr: 0.000374  loss: 2.2191 (2.8264)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2260 (1.2714)  time: 0.2181  data: 0.0007  max mem: 9147
Epoch: [244]  [1000/1251]  eta: 0:00:53  lr: 0.000372  min_lr: 0.000372  loss: 2.2571 (2.8363)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1631 (1.2571)  time: 0.2186  data: 0.0021  max mem: 9147
Epoch: [244]  [1200/1251]  eta: 0:00:10  lr: 0.000370  min_lr: 0.000370  loss: 3.1455 (2.8454)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2339 (1.2633)  time: 0.2178  data: 0.0006  max mem: 9147
Epoch: [244]  [1250/1251]  eta: 0:00:00  lr: 0.000370  min_lr: 0.000370  loss: 2.4655 (2.8428)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2722 (1.2657)  time: 0.1389  data: 0.0009  max mem: 9147
Epoch: [244] Total time: 0:04:24 (0.2118 s / it)
Averaged stats: lr: 0.000370  min_lr: 0.000370  loss: 2.4655 (2.8296)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2722 (1.2657)
Test:  [ 0/25]  eta: 0:02:23  loss: 0.6899 (0.6899)  acc1: 87.2000 (87.2000)  acc5: 97.6000 (97.6000)  time: 5.7360  data: 5.6556  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8536 (0.8631)  acc1: 83.6000 (82.2909)  acc5: 96.4000 (96.2182)  time: 0.7536  data: 0.6786  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0828 (1.0370)  acc1: 75.2000 (78.3429)  acc5: 93.2000 (94.0952)  time: 0.2028  data: 0.1274  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1673 (1.0502)  acc1: 75.2000 (77.9360)  acc5: 92.8000 (94.0800)  time: 0.2018  data: 0.1274  max mem: 9147
Test: Total time: 0:00:10 (0.4071 s / it)
* Acc@1 77.996 Acc@5 94.056 loss 1.051
Accuracy of the model on the 50000 test images: 78.0%
Max accuracy: 78.13%
Epoch: [245]  [   0/1251]  eta: 1:06:00  lr: 0.000370  min_lr: 0.000370  loss: 2.2174 (2.2174)  weight_decay: 0.0500 (0.0500)  time: 3.1660  data: 1.6187  max mem: 9147
Epoch: [245]  [ 200/1251]  eta: 0:03:54  lr: 0.000368  min_lr: 0.000368  loss: 2.9067 (2.8020)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0737 (1.2049)  time: 0.2195  data: 0.0007  max mem: 9147
Epoch: [245]  [ 400/1251]  eta: 0:03:03  lr: 0.000366  min_lr: 0.000366  loss: 2.5701 (2.7985)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2477 (1.3034)  time: 0.2146  data: 0.0008  max mem: 9147
Epoch: [245]  [ 600/1251]  eta: 0:02:19  lr: 0.000364  min_lr: 0.000364  loss: 3.2314 (2.7995)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1596 (1.2944)  time: 0.1910  data: 0.0020  max mem: 9147
Epoch: [245]  [ 800/1251]  eta: 0:01:37  lr: 0.000362  min_lr: 0.000362  loss: 2.2363 (2.8054)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3675 (1.2828)  time: 0.2295  data: 0.0107  max mem: 9147
Epoch: [245]  [1000/1251]  eta: 0:00:53  lr: 0.000359  min_lr: 0.000359  loss: 2.4366 (2.8125)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1856 (1.2677)  time: 0.2097  data: 0.0007  max mem: 9147
Epoch: [245]  [1200/1251]  eta: 0:00:10  lr: 0.000357  min_lr: 0.000357  loss: 2.3162 (2.8142)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2096 (1.2542)  time: 0.1949  data: 0.0005  max mem: 9147
Epoch: [245]  [1250/1251]  eta: 0:00:00  lr: 0.000357  min_lr: 0.000357  loss: 2.3014 (2.8139)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2507 (1.2571)  time: 0.1384  data: 0.0010  max mem: 9147
Epoch: [245] Total time: 0:04:24 (0.2115 s / it)
Averaged stats: lr: 0.000357  min_lr: 0.000357  loss: 2.3014 (2.8197)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2507 (1.2571)
Test:  [ 0/25]  eta: 0:01:53  loss: 0.6709 (0.6709)  acc1: 86.4000 (86.4000)  acc5: 98.8000 (98.8000)  time: 4.5556  data: 4.4746  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8370 (0.8452)  acc1: 82.0000 (82.3273)  acc5: 96.0000 (96.1455)  time: 0.7194  data: 0.6334  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0746 (1.0212)  acc1: 77.2000 (78.6286)  acc5: 94.0000 (94.3429)  time: 0.2548  data: 0.1767  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1411 (1.0294)  acc1: 76.4000 (78.2240)  acc5: 93.2000 (94.2560)  time: 0.2302  data: 0.1571  max mem: 9147
Test: Total time: 0:00:10 (0.4018 s / it)
* Acc@1 78.118 Acc@5 94.178 loss 1.033
Accuracy of the model on the 50000 test images: 78.1%
Max accuracy: 78.13%
Epoch: [246]  [   0/1251]  eta: 1:03:00  lr: 0.000357  min_lr: 0.000357  loss: 2.2080 (2.2080)  weight_decay: 0.0500 (0.0500)  time: 3.0221  data: 1.5818  max mem: 9147
Epoch: [246]  [ 200/1251]  eta: 0:03:45  lr: 0.000355  min_lr: 0.000355  loss: 2.2509 (2.8597)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2663 (1.2542)  time: 0.1764  data: 0.0005  max mem: 9147
Epoch: [246]  [ 400/1251]  eta: 0:02:58  lr: 0.000353  min_lr: 0.000353  loss: 2.1104 (2.8290)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1656 (1.2696)  time: 0.2209  data: 0.0007  max mem: 9147
Epoch: [246]  [ 600/1251]  eta: 0:02:16  lr: 0.000351  min_lr: 0.000351  loss: 2.1612 (2.7957)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2278 (1.2682)  time: 0.2206  data: 0.0007  max mem: 9147
Epoch: [246]  [ 800/1251]  eta: 0:01:35  lr: 0.000349  min_lr: 0.000349  loss: 2.2202 (2.8085)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3125 (1.2949)  time: 0.1857  data: 0.0013  max mem: 9147
Epoch: [246]  [1000/1251]  eta: 0:00:52  lr: 0.000347  min_lr: 0.000347  loss: 3.3560 (2.8125)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1462 (1.2785)  time: 0.2303  data: 0.0007  max mem: 9147
Epoch: [246]  [1200/1251]  eta: 0:00:10  lr: 0.000345  min_lr: 0.000345  loss: 3.0847 (2.8313)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1633 (1.2760)  time: 0.1900  data: 0.0005  max mem: 9147
Epoch: [246]  [1250/1251]  eta: 0:00:00  lr: 0.000344  min_lr: 0.000344  loss: 2.2450 (2.8273)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1127 (1.2697)  time: 0.1380  data: 0.0011  max mem: 9147
Epoch: [246] Total time: 0:04:20 (0.2085 s / it)
Averaged stats: lr: 0.000344  min_lr: 0.000344  loss: 2.2450 (2.8021)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1127 (1.2697)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.6297 (0.6297)  acc1: 86.8000 (86.8000)  acc5: 98.8000 (98.8000)  time: 5.5368  data: 5.4565  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8042 (0.8099)  acc1: 84.4000 (82.1091)  acc5: 96.4000 (96.4364)  time: 0.6668  data: 0.5913  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0067 (0.9930)  acc1: 76.8000 (78.3810)  acc5: 94.0000 (94.4000)  time: 0.1819  data: 0.1089  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1335 (1.0037)  acc1: 76.8000 (78.0800)  acc5: 93.6000 (94.4000)  time: 0.2044  data: 0.1333  max mem: 9147
Test: Total time: 0:00:10 (0.4033 s / it)
* Acc@1 78.150 Acc@5 94.210 loss 1.008
Accuracy of the model on the 50000 test images: 78.2%
Max accuracy: 78.15%
Epoch: [247]  [   0/1251]  eta: 1:04:02  lr: 0.000344  min_lr: 0.000344  loss: 2.0900 (2.0900)  weight_decay: 0.0500 (0.0500)  time: 3.0719  data: 2.8302  max mem: 9147
Epoch: [247]  [ 200/1251]  eta: 0:03:40  lr: 0.000342  min_lr: 0.000342  loss: 2.4030 (2.7727)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2169 (1.2246)  time: 0.2000  data: 0.0006  max mem: 9147
Epoch: [247]  [ 400/1251]  eta: 0:02:51  lr: 0.000340  min_lr: 0.000340  loss: 2.3379 (2.7925)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2216 (1.2614)  time: 0.2001  data: 0.0013  max mem: 9147
Epoch: [247]  [ 600/1251]  eta: 0:02:12  lr: 0.000338  min_lr: 0.000338  loss: 2.3897 (2.8027)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.1957  data: 0.0010  max mem: 9147
Epoch: [247]  [ 800/1251]  eta: 0:01:31  lr: 0.000336  min_lr: 0.000336  loss: 2.2334 (2.8082)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1777 (nan)  time: 0.2208  data: 0.0139  max mem: 9147
Epoch: [247]  [1000/1251]  eta: 0:00:51  lr: 0.000334  min_lr: 0.000334  loss: 3.0294 (2.7968)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2366 (nan)  time: 0.2154  data: 0.0006  max mem: 9147
Epoch: [247]  [1200/1251]  eta: 0:00:10  lr: 0.000332  min_lr: 0.000332  loss: 2.2112 (2.7862)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2304 (nan)  time: 0.2302  data: 0.0010  max mem: 9147
Epoch: [247]  [1250/1251]  eta: 0:00:00  lr: 0.000332  min_lr: 0.000332  loss: 2.6230 (2.7955)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3373 (nan)  time: 0.1377  data: 0.0008  max mem: 9147
Epoch: [247] Total time: 0:04:19 (0.2075 s / it)
Averaged stats: lr: 0.000332  min_lr: 0.000332  loss: 2.6230 (2.8080)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3373 (nan)
Test:  [ 0/25]  eta: 0:02:25  loss: 0.6630 (0.6630)  acc1: 86.4000 (86.4000)  acc5: 98.8000 (98.8000)  time: 5.8043  data: 5.6939  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8595 (0.8539)  acc1: 83.2000 (82.2545)  acc5: 96.4000 (96.2909)  time: 0.7651  data: 0.6886  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0626 (1.0385)  acc1: 75.6000 (78.6095)  acc5: 92.8000 (94.1905)  time: 0.2153  data: 0.1433  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1662 (1.0503)  acc1: 75.2000 (78.2560)  acc5: 92.8000 (94.1760)  time: 0.2143  data: 0.1433  max mem: 9147
Test: Total time: 0:00:10 (0.4184 s / it)
* Acc@1 78.118 Acc@5 94.142 loss 1.049
Accuracy of the model on the 50000 test images: 78.1%
Max accuracy: 78.15%
Epoch: [248]  [   0/1251]  eta: 1:08:07  lr: 0.000332  min_lr: 0.000332  loss: 3.8183 (3.8183)  weight_decay: 0.0500 (0.0500)  time: 3.2673  data: 2.5145  max mem: 9147
Epoch: [248]  [ 200/1251]  eta: 0:03:58  lr: 0.000330  min_lr: 0.000330  loss: 2.2741 (2.7707)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1728 (1.2609)  time: 0.2145  data: 0.0007  max mem: 9147
Epoch: [248]  [ 400/1251]  eta: 0:03:07  lr: 0.000328  min_lr: 0.000328  loss: 2.1400 (2.7786)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1484 (1.2579)  time: 0.2249  data: 0.0006  max mem: 9147
Epoch: [248]  [ 600/1251]  eta: 0:02:20  lr: 0.000326  min_lr: 0.000326  loss: 2.3859 (2.7713)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1302 (1.2497)  time: 0.2356  data: 0.0008  max mem: 9147
Epoch: [248]  [ 800/1251]  eta: 0:01:37  lr: 0.000324  min_lr: 0.000324  loss: 2.2380 (2.7700)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0960 (1.2227)  time: 0.2149  data: 0.0006  max mem: 9147
Epoch: [248]  [1000/1251]  eta: 0:00:54  lr: 0.000322  min_lr: 0.000322  loss: 2.2349 (2.7762)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2593 (1.2387)  time: 0.2195  data: 0.0006  max mem: 9147
Epoch: [248]  [1200/1251]  eta: 0:00:10  lr: 0.000320  min_lr: 0.000320  loss: 2.5954 (2.7927)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2529 (1.2368)  time: 0.1906  data: 0.0005  max mem: 9147
Epoch: [248]  [1250/1251]  eta: 0:00:00  lr: 0.000320  min_lr: 0.000320  loss: 2.3607 (2.7905)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2529 (1.2394)  time: 0.1376  data: 0.0006  max mem: 9147
Epoch: [248] Total time: 0:04:25 (0.2120 s / it)
Averaged stats: lr: 0.000320  min_lr: 0.000320  loss: 2.3607 (2.8075)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2529 (1.2394)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.6694 (0.6694)  acc1: 87.6000 (87.6000)  acc5: 98.4000 (98.4000)  time: 5.4770  data: 5.3965  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8543 (0.8736)  acc1: 82.4000 (82.0000)  acc5: 96.0000 (96.1455)  time: 0.7290  data: 0.6355  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1010 (1.0413)  acc1: 76.8000 (78.3810)  acc5: 93.6000 (94.1524)  time: 0.2062  data: 0.1226  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1599 (1.0537)  acc1: 76.8000 (78.0800)  acc5: 93.2000 (94.2080)  time: 0.2208  data: 0.1424  max mem: 9147
Test: Total time: 0:00:10 (0.4143 s / it)
* Acc@1 78.292 Acc@5 94.210 loss 1.050
Accuracy of the model on the 50000 test images: 78.3%
Max accuracy: 78.29%
Epoch: [249]  [   0/1251]  eta: 0:55:00  lr: 0.000320  min_lr: 0.000320  loss: 2.2543 (2.2543)  weight_decay: 0.0500 (0.0500)  time: 2.6379  data: 2.4283  max mem: 9147
Epoch: [249]  [ 200/1251]  eta: 0:03:58  lr: 0.000318  min_lr: 0.000318  loss: 2.3649 (2.7741)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1700 (1.3238)  time: 0.2334  data: 0.0008  max mem: 9147
Epoch: [249]  [ 400/1251]  eta: 0:03:08  lr: 0.000316  min_lr: 0.000316  loss: 2.0770 (2.7610)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2398 (1.2774)  time: 0.2155  data: 0.0006  max mem: 9147
Epoch: [249]  [ 600/1251]  eta: 0:02:22  lr: 0.000314  min_lr: 0.000314  loss: 2.4268 (2.8062)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2193 (1.2653)  time: 0.1996  data: 0.0006  max mem: 9147
Epoch: [249]  [ 800/1251]  eta: 0:01:35  lr: 0.000312  min_lr: 0.000312  loss: 2.3271 (2.8112)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2149 (1.2676)  time: 0.1857  data: 0.0005  max mem: 9147
Epoch: [249]  [1000/1251]  eta: 0:00:51  lr: 0.000310  min_lr: 0.000310  loss: 2.1730 (2.7924)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1436 (1.2698)  time: 0.1855  data: 0.0006  max mem: 9147
Epoch: [249]  [1200/1251]  eta: 0:00:10  lr: 0.000308  min_lr: 0.000308  loss: 2.2824 (2.8032)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1795 (1.2593)  time: 0.2006  data: 0.0006  max mem: 9147
Epoch: [249]  [1250/1251]  eta: 0:00:00  lr: 0.000308  min_lr: 0.000308  loss: 2.1368 (2.7977)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1825 (1.2597)  time: 0.1458  data: 0.0012  max mem: 9147
Epoch: [249] Total time: 0:04:16 (0.2053 s / it)
Averaged stats: lr: 0.000308  min_lr: 0.000308  loss: 2.1368 (2.8086)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1825 (1.2597)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.6515 (0.6515)  acc1: 86.4000 (86.4000)  acc5: 98.8000 (98.8000)  time: 5.6215  data: 5.4944  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.7967 (0.8288)  acc1: 84.8000 (82.4364)  acc5: 96.4000 (96.5455)  time: 0.7943  data: 0.6969  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0583 (1.0082)  acc1: 76.4000 (78.5714)  acc5: 93.6000 (94.3429)  time: 0.2194  data: 0.1374  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1477 (1.0215)  acc1: 75.6000 (78.0000)  acc5: 93.2000 (94.3040)  time: 0.2133  data: 0.1373  max mem: 9147
Test: Total time: 0:00:10 (0.4146 s / it)
* Acc@1 78.142 Acc@5 94.106 loss 1.023
Accuracy of the model on the 50000 test images: 78.1%
Max accuracy: 78.29%
Epoch: [250]  [   0/1251]  eta: 1:00:24  lr: 0.000307  min_lr: 0.000307  loss: 2.0780 (2.0780)  weight_decay: 0.0500 (0.0500)  time: 2.8973  data: 1.5875  max mem: 9147
Epoch: [250]  [ 200/1251]  eta: 0:03:47  lr: 0.000306  min_lr: 0.000306  loss: 2.3589 (2.8786)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2809 (1.2307)  time: 0.1903  data: 0.0011  max mem: 9147
Epoch: [250]  [ 400/1251]  eta: 0:02:58  lr: 0.000304  min_lr: 0.000304  loss: 2.5601 (2.8208)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0686 (1.2730)  time: 0.2196  data: 0.0006  max mem: 9147
Epoch: [250]  [ 600/1251]  eta: 0:02:16  lr: 0.000302  min_lr: 0.000302  loss: 2.2151 (2.8205)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1428 (1.2631)  time: 0.1957  data: 0.0005  max mem: 9147
Epoch: [250]  [ 800/1251]  eta: 0:01:35  lr: 0.000300  min_lr: 0.000300  loss: 2.3129 (2.8074)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1507 (1.2536)  time: 0.2163  data: 0.0007  max mem: 9147
Epoch: [250]  [1000/1251]  eta: 0:00:52  lr: 0.000298  min_lr: 0.000298  loss: 2.3196 (2.8035)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2213 (1.2672)  time: 0.1994  data: 0.0007  max mem: 9147
Epoch: [250]  [1200/1251]  eta: 0:00:10  lr: 0.000296  min_lr: 0.000296  loss: 2.3592 (2.8054)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1761 (1.2696)  time: 0.2065  data: 0.0006  max mem: 9147
Epoch: [250]  [1250/1251]  eta: 0:00:00  lr: 0.000296  min_lr: 0.000296  loss: 2.9356 (2.8038)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1713 (1.2651)  time: 0.1410  data: 0.0010  max mem: 9147
Epoch: [250] Total time: 0:04:25 (0.2125 s / it)
Averaged stats: lr: 0.000296  min_lr: 0.000296  loss: 2.9356 (2.8041)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1713 (1.2651)
Test:  [ 0/25]  eta: 0:02:14  loss: 0.7114 (0.7114)  acc1: 87.2000 (87.2000)  acc5: 98.8000 (98.8000)  time: 5.3695  data: 5.2754  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8817 (0.8832)  acc1: 84.4000 (82.5091)  acc5: 96.4000 (96.3273)  time: 0.6705  data: 0.5820  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1278 (1.0636)  acc1: 76.0000 (78.7619)  acc5: 94.4000 (94.5905)  time: 0.1858  data: 0.1071  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2100 (1.0771)  acc1: 76.0000 (78.2720)  acc5: 93.6000 (94.4960)  time: 0.2058  data: 0.1332  max mem: 9147
Test: Total time: 0:00:09 (0.3990 s / it)
* Acc@1 78.238 Acc@5 94.190 loss 1.082
Accuracy of the model on the 50000 test images: 78.2%
Max accuracy: 78.29%
Epoch: [251]  [   0/1251]  eta: 1:01:41  lr: 0.000296  min_lr: 0.000296  loss: 2.2734 (2.2734)  weight_decay: 0.0500 (0.0500)  time: 2.9590  data: 2.4876  max mem: 9147
Epoch: [251]  [ 200/1251]  eta: 0:03:44  lr: 0.000294  min_lr: 0.000294  loss: 2.5097 (2.8436)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1211 (inf)  time: 0.1956  data: 0.0006  max mem: 9147
Epoch: [251]  [ 400/1251]  eta: 0:03:00  lr: 0.000292  min_lr: 0.000292  loss: 2.1960 (2.8160)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2704 (inf)  time: 0.1923  data: 0.0005  max mem: 9147
Epoch: [251]  [ 600/1251]  eta: 0:02:13  lr: 0.000290  min_lr: 0.000290  loss: 2.6192 (2.8276)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1568 (inf)  time: 0.1961  data: 0.0006  max mem: 9147
Epoch: [251]  [ 800/1251]  eta: 0:01:31  lr: 0.000288  min_lr: 0.000288  loss: 2.2038 (2.8189)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2545 (inf)  time: 0.1943  data: 0.0006  max mem: 9147
Epoch: [251]  [1000/1251]  eta: 0:00:50  lr: 0.000286  min_lr: 0.000286  loss: 2.7874 (2.8150)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2777 (inf)  time: 0.2144  data: 0.0095  max mem: 9147
Epoch: [251]  [1200/1251]  eta: 0:00:10  lr: 0.000284  min_lr: 0.000284  loss: 2.1947 (2.8155)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2273 (inf)  time: 0.2240  data: 0.0007  max mem: 9147
Epoch: [251]  [1250/1251]  eta: 0:00:00  lr: 0.000284  min_lr: 0.000284  loss: 2.1389 (2.8067)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1626 (inf)  time: 0.1433  data: 0.0014  max mem: 9147
Epoch: [251] Total time: 0:04:15 (0.2046 s / it)
Averaged stats: lr: 0.000284  min_lr: 0.000284  loss: 2.1389 (2.8007)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1626 (inf)
Test:  [ 0/25]  eta: 0:01:17  loss: 0.6281 (0.6281)  acc1: 88.8000 (88.8000)  acc5: 98.4000 (98.4000)  time: 3.0969  data: 3.0109  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 0.8170 (0.8129)  acc1: 83.2000 (82.4364)  acc5: 96.4000 (96.4000)  time: 0.5943  data: 0.5064  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0331 (0.9863)  acc1: 76.4000 (78.7238)  acc5: 93.6000 (94.4762)  time: 0.2770  data: 0.1973  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1288 (0.9943)  acc1: 76.0000 (78.3520)  acc5: 93.6000 (94.4640)  time: 0.2293  data: 0.1520  max mem: 9147
Test: Total time: 0:00:09 (0.3911 s / it)
* Acc@1 78.360 Acc@5 94.344 loss 0.999
Accuracy of the model on the 50000 test images: 78.4%
Max accuracy: 78.36%
Epoch: [252]  [   0/1251]  eta: 1:10:04  lr: 0.000284  min_lr: 0.000284  loss: 2.3610 (2.3610)  weight_decay: 0.0500 (0.0500)  time: 3.3608  data: 3.1422  max mem: 9147
Epoch: [252]  [ 200/1251]  eta: 0:03:33  lr: 0.000282  min_lr: 0.000282  loss: 2.1582 (2.7980)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2866 (1.3181)  time: 0.1853  data: 0.0013  max mem: 9147
Epoch: [252]  [ 400/1251]  eta: 0:02:53  lr: 0.000280  min_lr: 0.000280  loss: 2.1597 (2.7219)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1433 (1.3080)  time: 0.2216  data: 0.0008  max mem: 9147
Epoch: [252]  [ 600/1251]  eta: 0:02:13  lr: 0.000279  min_lr: 0.000279  loss: 2.2391 (2.7009)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0810 (1.3028)  time: 0.1953  data: 0.0006  max mem: 9147
Epoch: [252]  [ 800/1251]  eta: 0:01:32  lr: 0.000277  min_lr: 0.000277  loss: 2.3590 (2.7477)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1916 (1.2861)  time: 0.1991  data: 0.0007  max mem: 9147
Epoch: [252]  [1000/1251]  eta: 0:00:51  lr: 0.000275  min_lr: 0.000275  loss: 2.3031 (2.7681)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2368 (1.2813)  time: 0.2052  data: 0.0006  max mem: 9147
Epoch: [252]  [1200/1251]  eta: 0:00:10  lr: 0.000273  min_lr: 0.000273  loss: 2.3279 (2.7637)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2867 (1.2791)  time: 0.1935  data: 0.0005  max mem: 9147
Epoch: [252]  [1250/1251]  eta: 0:00:00  lr: 0.000273  min_lr: 0.000273  loss: 2.0918 (2.7606)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2983 (1.2831)  time: 0.1394  data: 0.0010  max mem: 9147
Epoch: [252] Total time: 0:04:19 (0.2071 s / it)
Averaged stats: lr: 0.000273  min_lr: 0.000273  loss: 2.0918 (2.8071)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2983 (1.2831)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.6597 (0.6597)  acc1: 87.2000 (87.2000)  acc5: 98.4000 (98.4000)  time: 5.5860  data: 5.4940  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8474 (0.8420)  acc1: 82.0000 (82.4000)  acc5: 96.8000 (96.5818)  time: 0.7729  data: 0.6968  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0727 (1.0089)  acc1: 77.2000 (78.7238)  acc5: 94.4000 (94.6476)  time: 0.2141  data: 0.1418  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1148 (1.0198)  acc1: 77.2000 (78.3520)  acc5: 93.2000 (94.5920)  time: 0.2187  data: 0.1476  max mem: 9147
Test: Total time: 0:00:10 (0.4138 s / it)
* Acc@1 78.354 Acc@5 94.268 loss 1.021
Accuracy of the model on the 50000 test images: 78.4%
Max accuracy: 78.36%
Epoch: [253]  [   0/1251]  eta: 1:07:30  lr: 0.000273  min_lr: 0.000273  loss: 2.5771 (2.5771)  weight_decay: 0.0500 (0.0500)  time: 3.2381  data: 2.6008  max mem: 9147
Epoch: [253]  [ 200/1251]  eta: 0:03:39  lr: 0.000271  min_lr: 0.000271  loss: 3.6888 (2.9379)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2167 (1.2800)  time: 0.1884  data: 0.0011  max mem: 9147
Epoch: [253]  [ 400/1251]  eta: 0:02:53  lr: 0.000269  min_lr: 0.000269  loss: 3.1153 (2.8745)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2676 (1.3006)  time: 0.2254  data: 0.0005  max mem: 9147
Epoch: [253]  [ 600/1251]  eta: 0:02:14  lr: 0.000267  min_lr: 0.000267  loss: 2.3348 (2.8236)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1935 (1.2570)  time: 0.2193  data: 0.0236  max mem: 9147
Epoch: [253]  [ 800/1251]  eta: 0:01:34  lr: 0.000265  min_lr: 0.000265  loss: 2.1491 (2.8135)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2749 (1.2701)  time: 0.2111  data: 0.0007  max mem: 9147
Epoch: [253]  [1000/1251]  eta: 0:00:52  lr: 0.000264  min_lr: 0.000264  loss: 2.2126 (2.7913)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2525 (1.2777)  time: 0.1950  data: 0.0006  max mem: 9147
Epoch: [253]  [1200/1251]  eta: 0:00:10  lr: 0.000262  min_lr: 0.000262  loss: 2.3451 (2.7861)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2429 (1.2924)  time: 0.2050  data: 0.0006  max mem: 9147
Epoch: [253]  [1250/1251]  eta: 0:00:00  lr: 0.000261  min_lr: 0.000261  loss: 3.0195 (2.7863)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1657 (1.2897)  time: 0.1415  data: 0.0008  max mem: 9147
Epoch: [253] Total time: 0:04:19 (0.2074 s / it)
Averaged stats: lr: 0.000261  min_lr: 0.000261  loss: 3.0195 (2.7868)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1657 (1.2897)
Test:  [ 0/25]  eta: 0:02:17  loss: 0.6725 (0.6725)  acc1: 88.4000 (88.4000)  acc5: 98.8000 (98.8000)  time: 5.5164  data: 5.4226  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8810 (0.8676)  acc1: 83.6000 (82.0727)  acc5: 96.4000 (96.5818)  time: 0.6928  data: 0.5966  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0740 (1.0423)  acc1: 76.4000 (78.6286)  acc5: 94.0000 (94.6095)  time: 0.1984  data: 0.1136  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1623 (1.0554)  acc1: 76.4000 (78.3040)  acc5: 93.6000 (94.5600)  time: 0.2096  data: 0.1305  max mem: 9147
Test: Total time: 0:00:10 (0.4077 s / it)
* Acc@1 78.186 Acc@5 94.290 loss 1.054
Accuracy of the model on the 50000 test images: 78.2%
Max accuracy: 78.36%
Epoch: [254]  [   0/1251]  eta: 1:01:16  lr: 0.000261  min_lr: 0.000261  loss: 2.1309 (2.1309)  weight_decay: 0.0500 (0.0500)  time: 2.9389  data: 1.7464  max mem: 9147
Epoch: [254]  [ 200/1251]  eta: 0:03:49  lr: 0.000260  min_lr: 0.000260  loss: 2.5103 (2.7987)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2576 (1.3672)  time: 0.2201  data: 0.0008  max mem: 9147
Epoch: [254]  [ 400/1251]  eta: 0:03:04  lr: 0.000258  min_lr: 0.000258  loss: 3.1966 (2.8064)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2341 (1.3402)  time: 0.2044  data: 0.0007  max mem: 9147
Epoch: [254]  [ 600/1251]  eta: 0:02:20  lr: 0.000256  min_lr: 0.000256  loss: 2.8383 (2.8253)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2808 (1.3421)  time: 0.2247  data: 0.0273  max mem: 9147
Epoch: [254]  [ 800/1251]  eta: 0:01:36  lr: 0.000254  min_lr: 0.000254  loss: 2.4665 (2.7850)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2188 (1.3293)  time: 0.2157  data: 0.0008  max mem: 9147
Epoch: [254]  [1000/1251]  eta: 0:00:53  lr: 0.000253  min_lr: 0.000253  loss: 2.1898 (2.7679)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2520 (1.3333)  time: 0.2154  data: 0.0075  max mem: 9147
Epoch: [254]  [1200/1251]  eta: 0:00:10  lr: 0.000251  min_lr: 0.000251  loss: 2.7494 (2.7753)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1588 (1.3245)  time: 0.2251  data: 0.0011  max mem: 9147
Epoch: [254]  [1250/1251]  eta: 0:00:00  lr: 0.000251  min_lr: 0.000251  loss: 2.3781 (2.7706)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0908 (1.3184)  time: 0.1528  data: 0.0007  max mem: 9147
Epoch: [254] Total time: 0:04:28 (0.2143 s / it)
Averaged stats: lr: 0.000251  min_lr: 0.000251  loss: 2.3781 (2.7775)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0908 (1.3184)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.6684 (0.6684)  acc1: 87.6000 (87.6000)  acc5: 98.8000 (98.8000)  time: 5.5454  data: 5.4649  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8646 (0.8495)  acc1: 82.8000 (82.4000)  acc5: 96.4000 (96.5455)  time: 0.6947  data: 0.6003  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0346 (1.0173)  acc1: 76.4000 (78.7048)  acc5: 94.0000 (94.4952)  time: 0.2001  data: 0.1175  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1516 (1.0276)  acc1: 76.0000 (78.2400)  acc5: 93.2000 (94.4640)  time: 0.1945  data: 0.1175  max mem: 9147
Test: Total time: 0:00:09 (0.3987 s / it)
* Acc@1 78.428 Acc@5 94.332 loss 1.031
Accuracy of the model on the 50000 test images: 78.4%
Max accuracy: 78.43%
Epoch: [255]  [   0/1251]  eta: 0:58:59  lr: 0.000250  min_lr: 0.000250  loss: 2.0571 (2.0571)  weight_decay: 0.0500 (0.0500)  time: 2.8297  data: 2.3726  max mem: 9147
Epoch: [255]  [ 200/1251]  eta: 0:03:56  lr: 0.000249  min_lr: 0.000249  loss: 2.5347 (2.7442)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2964 (1.3038)  time: 0.2193  data: 0.0106  max mem: 9147
Epoch: [255]  [ 400/1251]  eta: 0:03:03  lr: 0.000247  min_lr: 0.000247  loss: 2.2289 (2.7573)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3993 (1.4328)  time: 0.2052  data: 0.0007  max mem: 9147
Epoch: [255]  [ 600/1251]  eta: 0:02:20  lr: 0.000245  min_lr: 0.000245  loss: 2.3637 (2.7749)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3013 (1.4113)  time: 0.2102  data: 0.0006  max mem: 9147
Epoch: [255]  [ 800/1251]  eta: 0:01:36  lr: 0.000244  min_lr: 0.000244  loss: 2.3872 (2.7812)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3957 (1.3866)  time: 0.2131  data: 0.0021  max mem: 9147
Epoch: [255]  [1000/1251]  eta: 0:00:53  lr: 0.000242  min_lr: 0.000242  loss: 2.2657 (2.7689)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1994 (1.3555)  time: 0.2039  data: 0.0005  max mem: 9147
Epoch: [255]  [1200/1251]  eta: 0:00:10  lr: 0.000240  min_lr: 0.000240  loss: 2.1889 (2.7701)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1476 (1.3343)  time: 0.2111  data: 0.0010  max mem: 9147
Epoch: [255]  [1250/1251]  eta: 0:00:00  lr: 0.000240  min_lr: 0.000240  loss: 2.2230 (2.7709)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2318 (1.3318)  time: 0.1511  data: 0.0011  max mem: 9147
Epoch: [255] Total time: 0:04:26 (0.2133 s / it)
Averaged stats: lr: 0.000240  min_lr: 0.000240  loss: 2.2230 (2.7862)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2318 (1.3318)
Test:  [ 0/25]  eta: 0:02:09  loss: 0.5902 (0.5902)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.1769  data: 5.0771  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8077 (0.8040)  acc1: 84.4000 (82.8000)  acc5: 96.8000 (96.5818)  time: 0.7113  data: 0.6336  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0239 (0.9866)  acc1: 78.4000 (79.0095)  acc5: 92.8000 (94.4381)  time: 0.2153  data: 0.1424  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.0988 (0.9998)  acc1: 76.4000 (78.4640)  acc5: 92.8000 (94.4320)  time: 0.2120  data: 0.1401  max mem: 9147
Test: Total time: 0:00:09 (0.3973 s / it)
* Acc@1 78.434 Acc@5 94.242 loss 1.002
Accuracy of the model on the 50000 test images: 78.4%
Max accuracy: 78.43%
Epoch: [256]  [   0/1251]  eta: 0:58:23  lr: 0.000240  min_lr: 0.000240  loss: 2.2431 (2.2431)  weight_decay: 0.0500 (0.0500)  time: 2.8008  data: 2.5947  max mem: 9147
Epoch: [256]  [ 200/1251]  eta: 0:03:58  lr: 0.000238  min_lr: 0.000238  loss: 2.2229 (2.6897)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2016 (1.2334)  time: 0.2248  data: 0.0265  max mem: 9147
Epoch: [256]  [ 400/1251]  eta: 0:03:06  lr: 0.000236  min_lr: 0.000236  loss: 2.2917 (2.7483)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1324 (1.2172)  time: 0.2396  data: 0.0007  max mem: 9147
Epoch: [256]  [ 600/1251]  eta: 0:02:16  lr: 0.000235  min_lr: 0.000235  loss: 2.1833 (2.7485)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3463 (1.2252)  time: 0.1950  data: 0.0018  max mem: 9147
Epoch: [256]  [ 800/1251]  eta: 0:01:33  lr: 0.000233  min_lr: 0.000233  loss: 2.3096 (2.7671)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3882 (1.2555)  time: 0.1931  data: 0.0006  max mem: 9147
Epoch: [256]  [1000/1251]  eta: 0:00:51  lr: 0.000231  min_lr: 0.000231  loss: 2.4567 (2.7747)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1723 (1.2848)  time: 0.2065  data: 0.0007  max mem: 9147
Epoch: [256]  [1200/1251]  eta: 0:00:10  lr: 0.000230  min_lr: 0.000230  loss: 2.2447 (2.7681)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1972 (1.2790)  time: 0.2056  data: 0.0008  max mem: 9147
Epoch: [256]  [1250/1251]  eta: 0:00:00  lr: 0.000229  min_lr: 0.000229  loss: 2.5205 (2.7682)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2245 (1.2767)  time: 0.1404  data: 0.0010  max mem: 9147
Epoch: [256] Total time: 0:04:17 (0.2062 s / it)
Averaged stats: lr: 0.000229  min_lr: 0.000229  loss: 2.5205 (2.7914)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2245 (1.2767)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.6524 (0.6524)  acc1: 87.6000 (87.6000)  acc5: 98.8000 (98.8000)  time: 5.6407  data: 5.5407  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8170 (0.8217)  acc1: 82.4000 (82.6909)  acc5: 96.8000 (96.7636)  time: 0.7364  data: 0.6487  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0281 (0.9957)  acc1: 76.0000 (78.8000)  acc5: 93.6000 (94.7810)  time: 0.2020  data: 0.1242  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1044 (1.0058)  acc1: 76.0000 (78.4320)  acc5: 93.2000 (94.7360)  time: 0.1977  data: 0.1254  max mem: 9147
Test: Total time: 0:00:10 (0.4054 s / it)
* Acc@1 78.472 Acc@5 94.330 loss 1.009
Accuracy of the model on the 50000 test images: 78.5%
Max accuracy: 78.47%
Epoch: [257]  [   0/1251]  eta: 1:00:12  lr: 0.000229  min_lr: 0.000229  loss: 2.3356 (2.3356)  weight_decay: 0.0500 (0.0500)  time: 2.8875  data: 2.5731  max mem: 9147
Epoch: [257]  [ 200/1251]  eta: 0:03:42  lr: 0.000228  min_lr: 0.000228  loss: 2.2289 (2.7780)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2680 (1.2690)  time: 0.2045  data: 0.0007  max mem: 9147
Epoch: [257]  [ 400/1251]  eta: 0:03:03  lr: 0.000226  min_lr: 0.000226  loss: 2.3420 (2.8251)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2758 (1.2815)  time: 0.2098  data: 0.0007  max mem: 9147
Epoch: [257]  [ 600/1251]  eta: 0:02:18  lr: 0.000224  min_lr: 0.000224  loss: 2.2572 (2.8423)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2814 (1.3022)  time: 0.2053  data: 0.0006  max mem: 9147
Epoch: [257]  [ 800/1251]  eta: 0:01:35  lr: 0.000223  min_lr: 0.000223  loss: 2.1715 (2.8384)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2694 (1.3041)  time: 0.2044  data: 0.0007  max mem: 9147
Epoch: [257]  [1000/1251]  eta: 0:00:52  lr: 0.000221  min_lr: 0.000221  loss: 2.5782 (2.8149)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2195 (1.2957)  time: 0.1992  data: 0.0006  max mem: 9147
Epoch: [257]  [1200/1251]  eta: 0:00:10  lr: 0.000219  min_lr: 0.000219  loss: 2.1822 (2.8044)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2235 (1.2877)  time: 0.2134  data: 0.0006  max mem: 9147
Epoch: [257]  [1250/1251]  eta: 0:00:00  lr: 0.000219  min_lr: 0.000219  loss: 2.1454 (2.8000)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2819 (1.2890)  time: 0.1418  data: 0.0011  max mem: 9147
Epoch: [257] Total time: 0:04:23 (0.2110 s / it)
Averaged stats: lr: 0.000219  min_lr: 0.000219  loss: 2.1454 (2.7786)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2819 (1.2890)
Test:  [ 0/25]  eta: 0:02:17  loss: 0.6328 (0.6328)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.4891  data: 5.3805  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8294 (0.8236)  acc1: 84.0000 (82.9455)  acc5: 96.4000 (96.3636)  time: 0.7043  data: 0.6270  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0355 (0.9951)  acc1: 76.8000 (78.9333)  acc5: 93.6000 (94.4381)  time: 0.1940  data: 0.1207  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1238 (1.0048)  acc1: 75.6000 (78.5600)  acc5: 93.2000 (94.3680)  time: 0.2082  data: 0.1361  max mem: 9147
Test: Total time: 0:00:10 (0.4023 s / it)
* Acc@1 78.504 Acc@5 94.322 loss 1.007
Accuracy of the model on the 50000 test images: 78.5%
Max accuracy: 78.50%
Epoch: [258]  [   0/1251]  eta: 1:01:52  lr: 0.000219  min_lr: 0.000219  loss: 2.0768 (2.0768)  weight_decay: 0.0500 (0.0500)  time: 2.9675  data: 2.7134  max mem: 9147
Epoch: [258]  [ 200/1251]  eta: 0:03:46  lr: 0.000217  min_lr: 0.000217  loss: 2.1519 (2.7157)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3682 (nan)  time: 0.1907  data: 0.0019  max mem: 9147
Epoch: [258]  [ 400/1251]  eta: 0:03:00  lr: 0.000216  min_lr: 0.000216  loss: 2.3399 (2.7433)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3312 (nan)  time: 0.2193  data: 0.0053  max mem: 9147
Epoch: [258]  [ 600/1251]  eta: 0:02:18  lr: 0.000214  min_lr: 0.000214  loss: 2.4113 (2.7581)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3003 (nan)  time: 0.2298  data: 0.0007  max mem: 9147
Epoch: [258]  [ 800/1251]  eta: 0:01:36  lr: 0.000212  min_lr: 0.000212  loss: 2.4175 (2.7769)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1348 (nan)  time: 0.2033  data: 0.0007  max mem: 9147
Epoch: [258]  [1000/1251]  eta: 0:00:52  lr: 0.000211  min_lr: 0.000211  loss: 2.6652 (2.7771)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1939 (nan)  time: 0.2012  data: 0.0007  max mem: 9147
Epoch: [258]  [1200/1251]  eta: 0:00:10  lr: 0.000209  min_lr: 0.000209  loss: 2.3602 (2.7846)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3051 (nan)  time: 0.1845  data: 0.0015  max mem: 9147
Epoch: [258]  [1250/1251]  eta: 0:00:00  lr: 0.000209  min_lr: 0.000209  loss: 2.3578 (2.7870)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2564 (nan)  time: 0.1390  data: 0.0010  max mem: 9147
Epoch: [258] Total time: 0:04:23 (0.2105 s / it)
Averaged stats: lr: 0.000209  min_lr: 0.000209  loss: 2.3578 (2.7728)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2564 (nan)
Test:  [ 0/25]  eta: 0:02:13  loss: 0.6607 (0.6607)  acc1: 87.2000 (87.2000)  acc5: 98.4000 (98.4000)  time: 5.3564  data: 5.2760  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8306 (0.8382)  acc1: 83.6000 (82.6182)  acc5: 96.4000 (96.4727)  time: 0.6871  data: 0.5947  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0714 (1.0217)  acc1: 76.8000 (78.7238)  acc5: 93.2000 (94.4381)  time: 0.2022  data: 0.1190  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1481 (1.0333)  acc1: 76.8000 (78.2400)  acc5: 93.2000 (94.4000)  time: 0.2175  data: 0.1388  max mem: 9147
Test: Total time: 0:00:10 (0.4089 s / it)
* Acc@1 78.544 Acc@5 94.340 loss 1.034
Accuracy of the model on the 50000 test images: 78.5%
Max accuracy: 78.54%
Epoch: [259]  [   0/1251]  eta: 1:01:45  lr: 0.000209  min_lr: 0.000209  loss: 3.7191 (3.7191)  weight_decay: 0.0500 (0.0500)  time: 2.9620  data: 2.7469  max mem: 9147
Epoch: [259]  [ 200/1251]  eta: 0:03:30  lr: 0.000207  min_lr: 0.000207  loss: 2.3796 (2.8056)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2262 (1.3513)  time: 0.1901  data: 0.0012  max mem: 9147
Epoch: [259]  [ 400/1251]  eta: 0:02:47  lr: 0.000206  min_lr: 0.000206  loss: 2.1293 (2.7971)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2893 (1.3382)  time: 0.2045  data: 0.0008  max mem: 9147
Epoch: [259]  [ 600/1251]  eta: 0:02:11  lr: 0.000204  min_lr: 0.000204  loss: 2.1085 (2.8138)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1630 (1.3029)  time: 0.2059  data: 0.0007  max mem: 9147
Epoch: [259]  [ 800/1251]  eta: 0:01:33  lr: 0.000203  min_lr: 0.000203  loss: 2.9581 (2.8138)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1763 (1.2865)  time: 0.2327  data: 0.0008  max mem: 9147
Epoch: [259]  [1000/1251]  eta: 0:00:52  lr: 0.000201  min_lr: 0.000201  loss: 2.5325 (2.8116)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1990 (1.2824)  time: 0.1998  data: 0.0006  max mem: 9147
Epoch: [259]  [1200/1251]  eta: 0:00:10  lr: 0.000199  min_lr: 0.000199  loss: 2.3343 (2.8133)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1029 (1.2803)  time: 0.1953  data: 0.0013  max mem: 9147
Epoch: [259]  [1250/1251]  eta: 0:00:00  lr: 0.000199  min_lr: 0.000199  loss: 2.7689 (2.8189)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1881 (1.2771)  time: 0.1372  data: 0.0006  max mem: 9147
Epoch: [259] Total time: 0:04:16 (0.2050 s / it)
Averaged stats: lr: 0.000199  min_lr: 0.000199  loss: 2.7689 (2.7767)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1881 (1.2771)
Test:  [ 0/25]  eta: 0:02:21  loss: 0.7310 (0.7310)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.6427  data: 5.5150  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 0.9080 (0.9046)  acc1: 83.2000 (82.3636)  acc5: 96.4000 (96.4364)  time: 0.6405  data: 0.5444  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1186 (1.0790)  acc1: 76.8000 (78.8191)  acc5: 93.6000 (94.4381)  time: 0.1659  data: 0.0848  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1985 (1.0872)  acc1: 76.8000 (78.4800)  acc5: 93.6000 (94.4000)  time: 0.1902  data: 0.1151  max mem: 9147
Test: Total time: 0:00:09 (0.3983 s / it)
* Acc@1 78.450 Acc@5 94.340 loss 1.087
Accuracy of the model on the 50000 test images: 78.5%
Max accuracy: 78.54%
Epoch: [260]  [   0/1251]  eta: 1:06:20  lr: 0.000199  min_lr: 0.000199  loss: 2.3398 (2.3398)  weight_decay: 0.0500 (0.0500)  time: 3.1819  data: 1.5952  max mem: 9147
Epoch: [260]  [ 200/1251]  eta: 0:03:56  lr: 0.000197  min_lr: 0.000197  loss: 2.6266 (2.8128)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2354 (1.3055)  time: 0.2008  data: 0.0007  max mem: 9147
Epoch: [260]  [ 400/1251]  eta: 0:03:04  lr: 0.000196  min_lr: 0.000196  loss: 2.4502 (2.8253)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3011 (1.2748)  time: 0.2005  data: 0.0006  max mem: 9147
Epoch: [260]  [ 600/1251]  eta: 0:02:19  lr: 0.000194  min_lr: 0.000194  loss: 2.2445 (2.8053)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2181 (1.3073)  time: 0.1948  data: 0.0006  max mem: 9147
Epoch: [260]  [ 800/1251]  eta: 0:01:36  lr: 0.000193  min_lr: 0.000193  loss: 2.5852 (2.7642)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3736 (1.3364)  time: 0.2014  data: 0.0006  max mem: 9147
Epoch: [260]  [1000/1251]  eta: 0:00:53  lr: 0.000191  min_lr: 0.000191  loss: 2.2200 (2.7719)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1381 (1.3152)  time: 0.2192  data: 0.0022  max mem: 9147
Epoch: [260]  [1200/1251]  eta: 0:00:10  lr: 0.000190  min_lr: 0.000190  loss: 2.6612 (2.7657)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3501 (1.3122)  time: 0.2055  data: 0.0007  max mem: 9147
Epoch: [260]  [1250/1251]  eta: 0:00:00  lr: 0.000189  min_lr: 0.000189  loss: 2.3405 (2.7607)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1839 (1.3114)  time: 0.1415  data: 0.0010  max mem: 9147
Epoch: [260] Total time: 0:04:27 (0.2140 s / it)
Averaged stats: lr: 0.000189  min_lr: 0.000189  loss: 2.3405 (2.7695)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1839 (1.3114)
Test:  [ 0/25]  eta: 0:01:33  loss: 0.6591 (0.6591)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 3.7413  data: 3.6605  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 0.8384 (0.8498)  acc1: 83.2000 (82.2545)  acc5: 96.8000 (96.7273)  time: 0.6043  data: 0.5288  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0640 (1.0259)  acc1: 76.4000 (78.6286)  acc5: 93.2000 (94.7048)  time: 0.2676  data: 0.1920  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1677 (1.0387)  acc1: 76.4000 (78.2240)  acc5: 93.2000 (94.5760)  time: 0.2148  data: 0.1415  max mem: 9147
Test: Total time: 0:00:10 (0.4083 s / it)
* Acc@1 78.406 Acc@5 94.350 loss 1.037
Accuracy of the model on the 50000 test images: 78.4%
Max accuracy: 78.54%
Epoch: [261]  [   0/1251]  eta: 1:05:30  lr: 0.000189  min_lr: 0.000189  loss: 1.9684 (1.9684)  weight_decay: 0.0500 (0.0500)  time: 3.1420  data: 1.6725  max mem: 9147
Epoch: [261]  [ 200/1251]  eta: 0:03:55  lr: 0.000188  min_lr: 0.000188  loss: 2.1791 (2.6759)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2619 (1.2870)  time: 0.2097  data: 0.0006  max mem: 9147
Epoch: [261]  [ 400/1251]  eta: 0:02:55  lr: 0.000186  min_lr: 0.000186  loss: 2.4816 (2.6845)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3002 (1.2777)  time: 0.1890  data: 0.0022  max mem: 9147
Epoch: [261]  [ 600/1251]  eta: 0:02:11  lr: 0.000185  min_lr: 0.000185  loss: 2.4034 (2.7149)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2124 (1.2939)  time: 0.1900  data: 0.0013  max mem: 9147
Epoch: [261]  [ 800/1251]  eta: 0:01:30  lr: 0.000183  min_lr: 0.000183  loss: 2.3320 (2.7421)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3504 (1.3061)  time: 0.2051  data: 0.0007  max mem: 9147
Epoch: [261]  [1000/1251]  eta: 0:00:50  lr: 0.000182  min_lr: 0.000182  loss: 2.7650 (2.7392)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2353 (1.3169)  time: 0.1948  data: 0.0006  max mem: 9147
Epoch: [261]  [1200/1251]  eta: 0:00:10  lr: 0.000180  min_lr: 0.000180  loss: 2.4874 (2.7583)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2253 (1.3148)  time: 0.2298  data: 0.0304  max mem: 9147
Epoch: [261]  [1250/1251]  eta: 0:00:00  lr: 0.000180  min_lr: 0.000180  loss: 2.1615 (2.7535)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2444 (1.3132)  time: 0.1443  data: 0.0013  max mem: 9147
Epoch: [261] Total time: 0:04:12 (0.2018 s / it)
Averaged stats: lr: 0.000180  min_lr: 0.000180  loss: 2.1615 (2.7613)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2444 (1.3132)
Test:  [ 0/25]  eta: 0:02:17  loss: 0.6031 (0.6031)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.5181  data: 5.4377  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.7951 (0.8031)  acc1: 84.0000 (82.7273)  acc5: 96.8000 (96.6909)  time: 0.6820  data: 0.6066  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0569 (0.9875)  acc1: 77.2000 (79.2000)  acc5: 93.2000 (94.6095)  time: 0.1909  data: 0.1181  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1344 (0.9983)  acc1: 77.2000 (78.8160)  acc5: 92.8000 (94.5440)  time: 0.2087  data: 0.1374  max mem: 9147
Test: Total time: 0:00:10 (0.4030 s / it)
* Acc@1 78.724 Acc@5 94.388 loss 0.995
Accuracy of the model on the 50000 test images: 78.7%
Max accuracy: 78.72%
Epoch: [262]  [   0/1251]  eta: 0:59:16  lr: 0.000180  min_lr: 0.000180  loss: 2.9733 (2.9733)  weight_decay: 0.0500 (0.0500)  time: 2.8431  data: 2.6497  max mem: 9147
Epoch: [262]  [ 200/1251]  eta: 0:03:57  lr: 0.000179  min_lr: 0.000179  loss: 2.2067 (2.7539)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1316 (1.1955)  time: 0.2140  data: 0.0006  max mem: 9147
Epoch: [262]  [ 400/1251]  eta: 0:03:07  lr: 0.000177  min_lr: 0.000177  loss: 2.4099 (2.7868)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2821 (1.2384)  time: 0.2246  data: 0.0007  max mem: 9147
Epoch: [262]  [ 600/1251]  eta: 0:02:20  lr: 0.000176  min_lr: 0.000176  loss: 2.2092 (2.7569)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2361 (1.2543)  time: 0.2299  data: 0.0006  max mem: 9147
Epoch: [262]  [ 800/1251]  eta: 0:01:36  lr: 0.000174  min_lr: 0.000174  loss: 2.0961 (2.7470)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3110 (1.2746)  time: 0.2014  data: 0.0007  max mem: 9147
Epoch: [262]  [1000/1251]  eta: 0:00:54  lr: 0.000173  min_lr: 0.000173  loss: 2.1989 (2.7505)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2506 (1.2683)  time: 0.2201  data: 0.0009  max mem: 9147
Epoch: [262]  [1200/1251]  eta: 0:00:10  lr: 0.000171  min_lr: 0.000171  loss: 2.9431 (2.7483)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2023 (1.2768)  time: 0.2131  data: 0.0007  max mem: 9147
Epoch: [262]  [1250/1251]  eta: 0:00:00  lr: 0.000171  min_lr: 0.000171  loss: 3.3071 (2.7523)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2222 (1.2800)  time: 0.1563  data: 0.0067  max mem: 9147
Epoch: [262] Total time: 0:04:27 (0.2138 s / it)
Averaged stats: lr: 0.000171  min_lr: 0.000171  loss: 3.3071 (2.7614)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2222 (1.2800)
Test:  [ 0/25]  eta: 0:01:44  loss: 0.7260 (0.7260)  acc1: 89.2000 (89.2000)  acc5: 98.8000 (98.8000)  time: 4.1623  data: 4.0819  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 0.8912 (0.9169)  acc1: 83.6000 (82.9091)  acc5: 96.8000 (96.5455)  time: 0.6325  data: 0.5551  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1289 (1.0960)  acc1: 76.8000 (79.0667)  acc5: 93.6000 (94.6286)  time: 0.2248  data: 0.1516  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2270 (1.1045)  acc1: 76.4000 (78.5760)  acc5: 93.2000 (94.5440)  time: 0.2201  data: 0.1496  max mem: 9147
Test: Total time: 0:00:09 (0.3977 s / it)
* Acc@1 78.512 Acc@5 94.274 loss 1.107
Accuracy of the model on the 50000 test images: 78.5%
Max accuracy: 78.72%
Epoch: [263]  [   0/1251]  eta: 1:08:27  lr: 0.000171  min_lr: 0.000171  loss: 3.2787 (3.2787)  weight_decay: 0.0500 (0.0500)  time: 3.2833  data: 2.2881  max mem: 9147
Epoch: [263]  [ 200/1251]  eta: 0:03:36  lr: 0.000169  min_lr: 0.000169  loss: 2.6751 (2.9103)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2367 (1.3160)  time: 0.1949  data: 0.0006  max mem: 9147
Epoch: [263]  [ 400/1251]  eta: 0:02:57  lr: 0.000168  min_lr: 0.000168  loss: 2.1434 (2.8589)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5596 (1.3626)  time: 0.2047  data: 0.0006  max mem: 9147
Epoch: [263]  [ 600/1251]  eta: 0:02:13  lr: 0.000167  min_lr: 0.000167  loss: 2.2020 (2.8505)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1608 (1.3114)  time: 0.1856  data: 0.0015  max mem: 9147
Epoch: [263]  [ 800/1251]  eta: 0:01:30  lr: 0.000165  min_lr: 0.000165  loss: 2.1343 (2.8534)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1953 (1.3147)  time: 0.1864  data: 0.0006  max mem: 9147
Epoch: [263]  [1000/1251]  eta: 0:00:50  lr: 0.000164  min_lr: 0.000164  loss: 3.1367 (2.8201)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2816 (1.3207)  time: 0.2103  data: 0.0006  max mem: 9147
Epoch: [263]  [1200/1251]  eta: 0:00:10  lr: 0.000162  min_lr: 0.000162  loss: 2.1904 (2.8311)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2898 (1.3162)  time: 0.1884  data: 0.0005  max mem: 9147
Epoch: [263]  [1250/1251]  eta: 0:00:00  lr: 0.000162  min_lr: 0.000162  loss: 2.4396 (2.8301)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2973 (1.3145)  time: 0.1393  data: 0.0008  max mem: 9147
Epoch: [263] Total time: 0:04:14 (0.2032 s / it)
Averaged stats: lr: 0.000162  min_lr: 0.000162  loss: 2.4396 (2.7596)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2973 (1.3145)
Test:  [ 0/25]  eta: 0:02:13  loss: 0.6264 (0.6264)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.3513  data: 5.2255  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8244 (0.8297)  acc1: 84.4000 (82.9818)  acc5: 96.8000 (96.8000)  time: 0.7428  data: 0.6457  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0570 (1.0106)  acc1: 77.2000 (79.1238)  acc5: 93.6000 (94.6857)  time: 0.2100  data: 0.1278  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1357 (1.0208)  acc1: 77.2000 (78.6400)  acc5: 93.6000 (94.6400)  time: 0.2078  data: 0.1315  max mem: 9147
Test: Total time: 0:00:10 (0.4006 s / it)
* Acc@1 78.656 Acc@5 94.432 loss 1.024
Accuracy of the model on the 50000 test images: 78.7%
Max accuracy: 78.72%
Epoch: [264]  [   0/1251]  eta: 1:08:33  lr: 0.000162  min_lr: 0.000162  loss: 1.9460 (1.9460)  weight_decay: 0.0500 (0.0500)  time: 3.2879  data: 1.5682  max mem: 9147
Epoch: [264]  [ 200/1251]  eta: 0:03:43  lr: 0.000160  min_lr: 0.000160  loss: 2.3742 (2.9298)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1960 (1.2437)  time: 0.2005  data: 0.0006  max mem: 9147
Epoch: [264]  [ 400/1251]  eta: 0:03:02  lr: 0.000159  min_lr: 0.000159  loss: 2.2081 (2.8499)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3049 (1.2603)  time: 0.2404  data: 0.0007  max mem: 9147
Epoch: [264]  [ 600/1251]  eta: 0:02:19  lr: 0.000158  min_lr: 0.000158  loss: 3.3958 (2.8331)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3071 (1.2761)  time: 0.1988  data: 0.0011  max mem: 9147
Epoch: [264]  [ 800/1251]  eta: 0:01:36  lr: 0.000156  min_lr: 0.000156  loss: 3.4038 (2.8370)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2539 (1.2860)  time: 0.2047  data: 0.0008  max mem: 9147
Epoch: [264]  [1000/1251]  eta: 0:00:53  lr: 0.000155  min_lr: 0.000155  loss: 2.3264 (2.8226)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3019 (1.2982)  time: 0.2184  data: 0.0007  max mem: 9147
Epoch: [264]  [1200/1251]  eta: 0:00:10  lr: 0.000154  min_lr: 0.000154  loss: 2.2156 (2.8157)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4363 (1.3048)  time: 0.2003  data: 0.0007  max mem: 9147
Epoch: [264]  [1250/1251]  eta: 0:00:00  lr: 0.000153  min_lr: 0.000153  loss: 2.1681 (2.8100)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3379 (1.3054)  time: 0.1458  data: 0.0014  max mem: 9147
Epoch: [264] Total time: 0:04:24 (0.2117 s / it)
Averaged stats: lr: 0.000153  min_lr: 0.000153  loss: 2.1681 (2.7600)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3379 (1.3054)
Test:  [ 0/25]  eta: 0:02:25  loss: 0.6106 (0.6106)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.8388  data: 5.7583  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8152 (0.8092)  acc1: 83.2000 (82.9091)  acc5: 96.8000 (96.8000)  time: 0.7434  data: 0.6680  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0249 (0.9890)  acc1: 77.2000 (79.2000)  acc5: 94.0000 (94.6857)  time: 0.1976  data: 0.1256  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1193 (1.0009)  acc1: 76.8000 (78.7520)  acc5: 92.8000 (94.6400)  time: 0.1962  data: 0.1255  max mem: 9147
Test: Total time: 0:00:10 (0.4062 s / it)
* Acc@1 78.778 Acc@5 94.476 loss 1.000
Accuracy of the model on the 50000 test images: 78.8%
Max accuracy: 78.78%
Epoch: [265]  [   0/1251]  eta: 0:59:20  lr: 0.000153  min_lr: 0.000153  loss: 2.0222 (2.0222)  weight_decay: 0.0500 (0.0500)  time: 2.8459  data: 2.6512  max mem: 9147
Epoch: [265]  [ 200/1251]  eta: 0:03:54  lr: 0.000152  min_lr: 0.000152  loss: 2.3569 (2.6934)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3058 (1.2690)  time: 0.2058  data: 0.0007  max mem: 9147
Epoch: [265]  [ 400/1251]  eta: 0:03:05  lr: 0.000150  min_lr: 0.000150  loss: 3.0807 (2.7083)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1904 (1.3076)  time: 0.1925  data: 0.0005  max mem: 9147
Epoch: [265]  [ 600/1251]  eta: 0:02:21  lr: 0.000149  min_lr: 0.000149  loss: 2.1960 (2.7199)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2068 (1.3101)  time: 0.2149  data: 0.0007  max mem: 9147
Epoch: [265]  [ 800/1251]  eta: 0:01:37  lr: 0.000148  min_lr: 0.000148  loss: 2.2011 (2.7381)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3471 (1.2998)  time: 0.2217  data: 0.0006  max mem: 9147
Epoch: [265]  [1000/1251]  eta: 0:00:53  lr: 0.000146  min_lr: 0.000146  loss: 2.2082 (2.7505)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2201 (1.2870)  time: 0.1943  data: 0.0013  max mem: 9147
Epoch: [265]  [1200/1251]  eta: 0:00:10  lr: 0.000145  min_lr: 0.000145  loss: 2.2615 (2.7632)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5875 (1.3142)  time: 0.2345  data: 0.0009  max mem: 9147
Epoch: [265]  [1250/1251]  eta: 0:00:00  lr: 0.000145  min_lr: 0.000145  loss: 2.3398 (2.7693)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2732 (1.3131)  time: 0.1419  data: 0.0011  max mem: 9147
Epoch: [265] Total time: 0:04:25 (0.2125 s / it)
Averaged stats: lr: 0.000145  min_lr: 0.000145  loss: 2.3398 (2.7711)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2732 (1.3131)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.6470 (0.6470)  acc1: 89.6000 (89.6000)  acc5: 99.2000 (99.2000)  time: 5.5476  data: 5.4671  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8398 (0.8486)  acc1: 84.0000 (83.1273)  acc5: 96.4000 (96.7636)  time: 0.7301  data: 0.6549  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0804 (1.0295)  acc1: 77.2000 (79.2571)  acc5: 93.2000 (94.6667)  time: 0.2017  data: 0.1263  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1574 (1.0417)  acc1: 76.4000 (78.8320)  acc5: 92.8000 (94.5920)  time: 0.2208  data: 0.1470  max mem: 9147
Test: Total time: 0:00:10 (0.4140 s / it)
* Acc@1 78.598 Acc@5 94.416 loss 1.043
Accuracy of the model on the 50000 test images: 78.6%
Max accuracy: 78.78%
Epoch: [266]  [   0/1251]  eta: 1:06:50  lr: 0.000145  min_lr: 0.000145  loss: 3.1552 (3.1552)  weight_decay: 0.0500 (0.0500)  time: 3.2061  data: 2.6874  max mem: 9147
Epoch: [266]  [ 200/1251]  eta: 0:03:43  lr: 0.000143  min_lr: 0.000143  loss: 2.2334 (2.7383)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2394 (1.3353)  time: 0.1949  data: 0.0012  max mem: 9147
Epoch: [266]  [ 400/1251]  eta: 0:02:59  lr: 0.000142  min_lr: 0.000142  loss: 2.3358 (2.7336)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2011 (1.2926)  time: 0.1950  data: 0.0014  max mem: 9147
Epoch: [266]  [ 600/1251]  eta: 0:02:16  lr: 0.000141  min_lr: 0.000141  loss: 2.1807 (2.7410)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3535 (1.3382)  time: 0.1936  data: 0.0005  max mem: 9147
Epoch: [266]  [ 800/1251]  eta: 0:01:34  lr: 0.000139  min_lr: 0.000139  loss: 2.2621 (2.7578)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3626 (1.3586)  time: 0.2255  data: 0.0007  max mem: 9147
Epoch: [266]  [1000/1251]  eta: 0:00:52  lr: 0.000138  min_lr: 0.000138  loss: 2.3351 (2.7662)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.1994  data: 0.0005  max mem: 9147
Epoch: [266]  [1200/1251]  eta: 0:00:10  lr: 0.000137  min_lr: 0.000137  loss: 3.4682 (2.7661)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0997 (nan)  time: 0.2248  data: 0.0008  max mem: 9147
Epoch: [266]  [1250/1251]  eta: 0:00:00  lr: 0.000137  min_lr: 0.000137  loss: 2.0959 (2.7597)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2149 (nan)  time: 0.1417  data: 0.0010  max mem: 9147
Epoch: [266] Total time: 0:04:22 (0.2096 s / it)
Averaged stats: lr: 0.000137  min_lr: 0.000137  loss: 2.0959 (2.7488)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2149 (nan)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.6091 (0.6091)  acc1: 87.6000 (87.6000)  acc5: 98.8000 (98.8000)  time: 5.6374  data: 5.5374  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8051 (0.7980)  acc1: 83.2000 (82.9091)  acc5: 96.8000 (96.6909)  time: 0.7112  data: 0.6185  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0121 (0.9769)  acc1: 77.2000 (78.9714)  acc5: 94.0000 (94.7429)  time: 0.1862  data: 0.1054  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1160 (0.9883)  acc1: 76.8000 (78.5280)  acc5: 93.6000 (94.6240)  time: 0.1953  data: 0.1206  max mem: 9147
Test: Total time: 0:00:10 (0.4034 s / it)
* Acc@1 78.732 Acc@5 94.508 loss 0.987
Accuracy of the model on the 50000 test images: 78.7%
Max accuracy: 78.78%
Epoch: [267]  [   0/1251]  eta: 1:07:42  lr: 0.000136  min_lr: 0.000136  loss: 2.9067 (2.9067)  weight_decay: 0.0500 (0.0500)  time: 3.2475  data: 1.5882  max mem: 9147
Epoch: [267]  [ 200/1251]  eta: 0:03:57  lr: 0.000135  min_lr: 0.000135  loss: 2.3615 (2.7782)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2614 (1.3821)  time: 0.2341  data: 0.0007  max mem: 9147
Epoch: [267]  [ 400/1251]  eta: 0:03:02  lr: 0.000134  min_lr: 0.000134  loss: 2.4862 (2.7682)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2619 (1.3472)  time: 0.2003  data: 0.0005  max mem: 9147
Epoch: [267]  [ 600/1251]  eta: 0:02:19  lr: 0.000133  min_lr: 0.000133  loss: 2.2868 (2.7631)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2716 (1.3470)  time: 0.2297  data: 0.0013  max mem: 9147
Epoch: [267]  [ 800/1251]  eta: 0:01:36  lr: 0.000131  min_lr: 0.000131  loss: 2.5770 (2.7540)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2902 (1.3402)  time: 0.2193  data: 0.0005  max mem: 9147
Epoch: [267]  [1000/1251]  eta: 0:00:53  lr: 0.000130  min_lr: 0.000130  loss: 2.1546 (2.7539)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1876 (1.3189)  time: 0.2002  data: 0.0007  max mem: 9147
Epoch: [267]  [1200/1251]  eta: 0:00:10  lr: 0.000129  min_lr: 0.000129  loss: 2.1804 (2.7580)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2886 (1.3230)  time: 0.2023  data: 0.0007  max mem: 9147
Epoch: [267]  [1250/1251]  eta: 0:00:00  lr: 0.000129  min_lr: 0.000129  loss: 2.4306 (2.7635)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2175 (1.3192)  time: 0.1438  data: 0.0015  max mem: 9147
Epoch: [267] Total time: 0:04:25 (0.2119 s / it)
Averaged stats: lr: 0.000129  min_lr: 0.000129  loss: 2.4306 (2.7573)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2175 (1.3192)
Test:  [ 0/25]  eta: 0:02:12  loss: 0.6566 (0.6566)  acc1: 88.8000 (88.8000)  acc5: 98.8000 (98.8000)  time: 5.2874  data: 5.2038  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8395 (0.8544)  acc1: 83.6000 (82.6909)  acc5: 96.8000 (96.8000)  time: 0.6977  data: 0.6109  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0662 (1.0314)  acc1: 76.8000 (79.0095)  acc5: 94.0000 (94.7429)  time: 0.2083  data: 0.1288  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1500 (1.0408)  acc1: 76.8000 (78.6720)  acc5: 94.0000 (94.6880)  time: 0.2075  data: 0.1323  max mem: 9147
Test: Total time: 0:00:09 (0.3963 s / it)
* Acc@1 78.610 Acc@5 94.570 loss 1.042
Accuracy of the model on the 50000 test images: 78.6%
Max accuracy: 78.78%
Epoch: [268]  [   0/1251]  eta: 1:00:38  lr: 0.000128  min_lr: 0.000128  loss: 1.8757 (1.8757)  weight_decay: 0.0500 (0.0500)  time: 2.9087  data: 1.5573  max mem: 9147
Epoch: [268]  [ 200/1251]  eta: 0:03:42  lr: 0.000127  min_lr: 0.000127  loss: 2.9091 (2.7547)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2275 (1.2739)  time: 0.2001  data: 0.0006  max mem: 9147
Epoch: [268]  [ 400/1251]  eta: 0:03:01  lr: 0.000126  min_lr: 0.000126  loss: 2.8048 (2.7703)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2600 (1.3005)  time: 0.2248  data: 0.0009  max mem: 9147
Epoch: [268]  [ 600/1251]  eta: 0:02:17  lr: 0.000125  min_lr: 0.000125  loss: 2.0779 (2.7208)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3140 (1.3180)  time: 0.2051  data: 0.0007  max mem: 9147
Epoch: [268]  [ 800/1251]  eta: 0:01:35  lr: 0.000123  min_lr: 0.000123  loss: 2.1116 (2.7085)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3043 (1.3327)  time: 0.2139  data: 0.0007  max mem: 9147
Epoch: [268]  [1000/1251]  eta: 0:00:53  lr: 0.000122  min_lr: 0.000122  loss: 2.1360 (2.7118)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1788 (1.3166)  time: 0.2346  data: 0.0007  max mem: 9147
Epoch: [268]  [1200/1251]  eta: 0:00:10  lr: 0.000121  min_lr: 0.000121  loss: 2.0807 (2.7250)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2801 (1.3126)  time: 0.2293  data: 0.0007  max mem: 9147
Epoch: [268]  [1250/1251]  eta: 0:00:00  lr: 0.000121  min_lr: 0.000121  loss: 2.0811 (2.7257)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2970 (1.3119)  time: 0.1388  data: 0.0013  max mem: 9147
Epoch: [268] Total time: 0:04:26 (0.2127 s / it)
Averaged stats: lr: 0.000121  min_lr: 0.000121  loss: 2.0811 (2.7387)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2970 (1.3119)
Test:  [ 0/25]  eta: 0:02:24  loss: 0.6272 (0.6272)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.7893  data: 5.7089  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8102 (0.8183)  acc1: 84.4000 (82.7273)  acc5: 96.8000 (96.6909)  time: 0.7694  data: 0.6820  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0250 (0.9954)  acc1: 78.0000 (79.1810)  acc5: 94.0000 (94.6667)  time: 0.2146  data: 0.1342  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1246 (1.0056)  acc1: 77.6000 (78.7520)  acc5: 93.2000 (94.5280)  time: 0.2098  data: 0.1341  max mem: 9147
Test: Total time: 0:00:10 (0.4179 s / it)
* Acc@1 78.800 Acc@5 94.432 loss 1.010
Accuracy of the model on the 50000 test images: 78.8%
Max accuracy: 78.80%
Epoch: [269]  [   0/1251]  eta: 0:57:21  lr: 0.000121  min_lr: 0.000121  loss: 2.0214 (2.0214)  weight_decay: 0.0500 (0.0500)  time: 2.7513  data: 2.5066  max mem: 9147
Epoch: [269]  [ 200/1251]  eta: 0:03:57  lr: 0.000120  min_lr: 0.000120  loss: 2.1218 (2.6654)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3507 (1.3529)  time: 0.2006  data: 0.0006  max mem: 9147
Epoch: [269]  [ 400/1251]  eta: 0:03:06  lr: 0.000118  min_lr: 0.000118  loss: 3.0127 (2.7073)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3170 (1.3646)  time: 0.2152  data: 0.0007  max mem: 9147
Epoch: [269]  [ 600/1251]  eta: 0:02:22  lr: 0.000117  min_lr: 0.000117  loss: 2.3138 (2.6986)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2010 (1.3597)  time: 0.2247  data: 0.0007  max mem: 9147
Epoch: [269]  [ 800/1251]  eta: 0:01:36  lr: 0.000116  min_lr: 0.000116  loss: 3.2424 (2.7141)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2205 (1.3424)  time: 0.2055  data: 0.0007  max mem: 9147
Epoch: [269]  [1000/1251]  eta: 0:00:53  lr: 0.000115  min_lr: 0.000115  loss: 2.3058 (2.7265)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2208 (1.3358)  time: 0.2267  data: 0.0007  max mem: 9147
Epoch: [269]  [1200/1251]  eta: 0:00:10  lr: 0.000113  min_lr: 0.000113  loss: 2.4947 (2.7369)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3218 (1.3304)  time: 0.2317  data: 0.0007  max mem: 9147
Epoch: [269]  [1250/1251]  eta: 0:00:00  lr: 0.000113  min_lr: 0.000113  loss: 3.0721 (2.7447)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3210 (1.3277)  time: 0.1398  data: 0.0008  max mem: 9147
Epoch: [269] Total time: 0:04:26 (0.2130 s / it)
Averaged stats: lr: 0.000113  min_lr: 0.000113  loss: 3.0721 (2.7401)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3210 (1.3277)
Test:  [ 0/25]  eta: 0:02:15  loss: 0.6823 (0.6823)  acc1: 89.6000 (89.6000)  acc5: 98.8000 (98.8000)  time: 5.4222  data: 5.3418  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8861 (0.8909)  acc1: 84.8000 (83.2000)  acc5: 96.8000 (96.6545)  time: 0.7324  data: 0.6379  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1132 (1.0767)  acc1: 77.2000 (79.3143)  acc5: 94.0000 (94.5333)  time: 0.2126  data: 0.1301  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2187 (1.0877)  acc1: 76.8000 (79.0240)  acc5: 92.8000 (94.4480)  time: 0.2068  data: 0.1300  max mem: 9147
Test: Total time: 0:00:10 (0.4017 s / it)
* Acc@1 78.632 Acc@5 94.412 loss 1.090
Accuracy of the model on the 50000 test images: 78.6%
Max accuracy: 78.80%
Epoch: [270]  [   0/1251]  eta: 1:08:57  lr: 0.000113  min_lr: 0.000113  loss: 1.9946 (1.9946)  weight_decay: 0.0500 (0.0500)  time: 3.3077  data: 2.9118  max mem: 9147
Epoch: [270]  [ 200/1251]  eta: 0:03:59  lr: 0.000112  min_lr: 0.000112  loss: 2.9211 (2.7368)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3986 (1.3479)  time: 0.2035  data: 0.0006  max mem: 9147
Epoch: [270]  [ 400/1251]  eta: 0:03:10  lr: 0.000111  min_lr: 0.000111  loss: 2.5861 (2.7806)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3579 (1.3434)  time: 0.2104  data: 0.0006  max mem: 9147
Epoch: [270]  [ 600/1251]  eta: 0:02:24  lr: 0.000110  min_lr: 0.000110  loss: 2.1991 (2.7452)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3279 (1.3709)  time: 0.2305  data: 0.0006  max mem: 9147
Epoch: [270]  [ 800/1251]  eta: 0:01:37  lr: 0.000109  min_lr: 0.000109  loss: 2.1616 (2.7455)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2755 (1.3539)  time: 0.2122  data: 0.0006  max mem: 9147
Epoch: [270]  [1000/1251]  eta: 0:00:53  lr: 0.000107  min_lr: 0.000107  loss: 2.2399 (2.7491)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1411 (1.3370)  time: 0.1950  data: 0.0006  max mem: 9147
Epoch: [270]  [1200/1251]  eta: 0:00:10  lr: 0.000106  min_lr: 0.000106  loss: 2.1040 (2.7412)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2662 (1.3262)  time: 0.1995  data: 0.0006  max mem: 9147
Epoch: [270]  [1250/1251]  eta: 0:00:00  lr: 0.000106  min_lr: 0.000106  loss: 2.1830 (2.7379)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2837 (1.3254)  time: 0.1384  data: 0.0010  max mem: 9147
Epoch: [270] Total time: 0:04:21 (0.2090 s / it)
Averaged stats: lr: 0.000106  min_lr: 0.000106  loss: 2.1830 (2.7512)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2837 (1.3254)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.5730 (0.5730)  acc1: 90.0000 (90.0000)  acc5: 98.8000 (98.8000)  time: 5.4756  data: 5.3949  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.7644 (0.7804)  acc1: 84.4000 (83.3818)  acc5: 96.8000 (96.6909)  time: 0.7203  data: 0.6452  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 0.9819 (0.9583)  acc1: 78.0000 (79.5619)  acc5: 94.4000 (94.7238)  time: 0.2113  data: 0.1394  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.0973 (0.9688)  acc1: 76.4000 (79.1040)  acc5: 93.6000 (94.6400)  time: 0.2173  data: 0.1471  max mem: 9147
Test: Total time: 0:00:10 (0.4087 s / it)
* Acc@1 79.060 Acc@5 94.510 loss 0.969
Accuracy of the model on the 50000 test images: 79.1%
Max accuracy: 79.06%
Epoch: [271]  [   0/1251]  eta: 0:57:58  lr: 0.000106  min_lr: 0.000106  loss: 2.2473 (2.2473)  weight_decay: 0.0500 (0.0500)  time: 2.7804  data: 2.5699  max mem: 9147
Epoch: [271]  [ 200/1251]  eta: 0:03:53  lr: 0.000105  min_lr: 0.000105  loss: 2.2334 (2.6887)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1946 (1.2807)  time: 0.2069  data: 0.0006  max mem: 9147
Epoch: [271]  [ 400/1251]  eta: 0:03:07  lr: 0.000104  min_lr: 0.000104  loss: 2.1160 (2.7016)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1665 (1.3071)  time: 0.2140  data: 0.0009  max mem: 9147
Epoch: [271]  [ 600/1251]  eta: 0:02:21  lr: 0.000102  min_lr: 0.000102  loss: 2.7908 (2.7696)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2736 (1.2945)  time: 0.2057  data: 0.0006  max mem: 9147
Epoch: [271]  [ 800/1251]  eta: 0:01:36  lr: 0.000101  min_lr: 0.000101  loss: 2.2027 (2.7503)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2342 (1.2938)  time: 0.1859  data: 0.0005  max mem: 9147
Epoch: [271]  [1000/1251]  eta: 0:00:53  lr: 0.000100  min_lr: 0.000100  loss: 2.0884 (2.7224)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2741 (1.3035)  time: 0.2155  data: 0.0007  max mem: 9147
Epoch: [271]  [1200/1251]  eta: 0:00:10  lr: 0.000099  min_lr: 0.000099  loss: 2.4010 (2.7197)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1705 (1.3079)  time: 0.1999  data: 0.0016  max mem: 9147
Epoch: [271]  [1250/1251]  eta: 0:00:00  lr: 0.000099  min_lr: 0.000099  loss: 2.0673 (2.7128)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1629 (1.3064)  time: 0.1389  data: 0.0005  max mem: 9147
Epoch: [271] Total time: 0:04:27 (0.2135 s / it)
Averaged stats: lr: 0.000099  min_lr: 0.000099  loss: 2.0673 (2.7319)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1629 (1.3064)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.6034 (0.6034)  acc1: 90.4000 (90.4000)  acc5: 99.2000 (99.2000)  time: 5.6340  data: 5.5536  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8186 (0.8105)  acc1: 82.8000 (83.2000)  acc5: 96.8000 (96.7636)  time: 0.6981  data: 0.6040  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0101 (0.9765)  acc1: 77.6000 (79.4286)  acc5: 92.8000 (94.5714)  time: 0.1929  data: 0.1099  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1166 (0.9878)  acc1: 77.6000 (78.9760)  acc5: 92.8000 (94.5280)  time: 0.2145  data: 0.1360  max mem: 9147
Test: Total time: 0:00:10 (0.4158 s / it)
* Acc@1 78.872 Acc@5 94.484 loss 0.990
Accuracy of the model on the 50000 test images: 78.9%
Max accuracy: 79.06%
Epoch: [272]  [   0/1251]  eta: 1:01:03  lr: 0.000099  min_lr: 0.000099  loss: 3.5530 (3.5530)  weight_decay: 0.0500 (0.0500)  time: 2.9287  data: 2.4983  max mem: 9147
Epoch: [272]  [ 200/1251]  eta: 0:03:53  lr: 0.000098  min_lr: 0.000098  loss: 2.1285 (2.7698)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1093 (1.2265)  time: 0.2104  data: 0.0008  max mem: 9147
Epoch: [272]  [ 400/1251]  eta: 0:03:01  lr: 0.000097  min_lr: 0.000097  loss: 2.2441 (2.6895)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2394 (1.2683)  time: 0.2225  data: 0.0006  max mem: 9147
Epoch: [272]  [ 600/1251]  eta: 0:02:19  lr: 0.000096  min_lr: 0.000096  loss: 3.2542 (2.6938)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3025 (1.2860)  time: 0.1961  data: 0.0006  max mem: 9147
Epoch: [272]  [ 800/1251]  eta: 0:01:35  lr: 0.000094  min_lr: 0.000094  loss: 2.8678 (2.6959)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2677 (1.3012)  time: 0.1953  data: 0.0006  max mem: 9147
Epoch: [272]  [1000/1251]  eta: 0:00:53  lr: 0.000093  min_lr: 0.000093  loss: 2.2848 (2.6963)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3101 (1.3029)  time: 0.2084  data: 0.0007  max mem: 9147
Epoch: [272]  [1200/1251]  eta: 0:00:10  lr: 0.000092  min_lr: 0.000092  loss: 2.0992 (2.7151)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2154 (1.2966)  time: 0.1999  data: 0.0005  max mem: 9147
Epoch: [272]  [1250/1251]  eta: 0:00:00  lr: 0.000092  min_lr: 0.000092  loss: 2.3634 (2.7154)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2775 (1.2962)  time: 0.1430  data: 0.0018  max mem: 9147
Epoch: [272] Total time: 0:04:23 (0.2110 s / it)
Averaged stats: lr: 0.000092  min_lr: 0.000092  loss: 2.3634 (2.7416)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2775 (1.2962)
Test:  [ 0/25]  eta: 0:02:23  loss: 0.6831 (0.6831)  acc1: 87.6000 (87.6000)  acc5: 98.8000 (98.8000)  time: 5.7265  data: 5.6416  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8695 (0.8625)  acc1: 83.2000 (82.9091)  acc5: 96.8000 (96.5818)  time: 0.7302  data: 0.6501  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1067 (1.0443)  acc1: 76.8000 (79.1238)  acc5: 94.0000 (94.6476)  time: 0.2098  data: 0.1353  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1796 (1.0540)  acc1: 76.8000 (78.7360)  acc5: 93.2000 (94.5440)  time: 0.2051  data: 0.1345  max mem: 9147
Test: Total time: 0:00:10 (0.4112 s / it)
* Acc@1 78.720 Acc@5 94.490 loss 1.054
Accuracy of the model on the 50000 test images: 78.7%
Max accuracy: 79.06%
Epoch: [273]  [   0/1251]  eta: 1:02:23  lr: 0.000092  min_lr: 0.000092  loss: 2.2788 (2.2788)  weight_decay: 0.0500 (0.0500)  time: 2.9924  data: 1.7196  max mem: 9147
Epoch: [273]  [ 200/1251]  eta: 0:03:52  lr: 0.000091  min_lr: 0.000091  loss: 2.3535 (2.8605)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3298 (1.3431)  time: 0.1950  data: 0.0006  max mem: 9147
Epoch: [273]  [ 400/1251]  eta: 0:02:57  lr: 0.000090  min_lr: 0.000090  loss: 3.0781 (2.7885)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2340 (1.3040)  time: 0.2069  data: 0.0006  max mem: 9147
Epoch: [273]  [ 600/1251]  eta: 0:02:18  lr: 0.000089  min_lr: 0.000089  loss: 2.2491 (2.7561)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1482 (1.3019)  time: 0.2230  data: 0.0166  max mem: 9147
Epoch: [273]  [ 800/1251]  eta: 0:01:34  lr: 0.000088  min_lr: 0.000088  loss: 2.4117 (2.7368)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4108 (1.3116)  time: 0.2033  data: 0.0007  max mem: 9147
Epoch: [273]  [1000/1251]  eta: 0:00:53  lr: 0.000087  min_lr: 0.000087  loss: 2.9446 (2.7425)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1436 (1.3027)  time: 0.2262  data: 0.0007  max mem: 9147
Epoch: [273]  [1200/1251]  eta: 0:00:10  lr: 0.000086  min_lr: 0.000086  loss: 2.6691 (2.7270)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3902 (1.3154)  time: 0.2207  data: 0.0007  max mem: 9147
Epoch: [273]  [1250/1251]  eta: 0:00:00  lr: 0.000085  min_lr: 0.000085  loss: 2.1535 (2.7279)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1667 (1.3107)  time: 0.1425  data: 0.0010  max mem: 9147
Epoch: [273] Total time: 0:04:26 (0.2127 s / it)
Averaged stats: lr: 0.000085  min_lr: 0.000085  loss: 2.1535 (2.7318)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1667 (1.3107)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.6089 (0.6089)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.4577  data: 5.3603  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8105 (0.8080)  acc1: 84.4000 (83.0909)  acc5: 96.8000 (96.7273)  time: 0.7099  data: 0.6315  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0273 (0.9813)  acc1: 77.2000 (79.1619)  acc5: 93.6000 (94.7429)  time: 0.1993  data: 0.1260  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1257 (0.9917)  acc1: 76.4000 (78.8640)  acc5: 93.6000 (94.6880)  time: 0.2073  data: 0.1356  max mem: 9147
Test: Total time: 0:00:10 (0.4002 s / it)
* Acc@1 78.826 Acc@5 94.476 loss 0.992
Accuracy of the model on the 50000 test images: 78.8%
Max accuracy: 79.06%
Epoch: [274]  [   0/1251]  eta: 1:00:41  lr: 0.000085  min_lr: 0.000085  loss: 2.8747 (2.8747)  weight_decay: 0.0500 (0.0500)  time: 2.9109  data: 2.3794  max mem: 9147
Epoch: [274]  [ 200/1251]  eta: 0:03:56  lr: 0.000084  min_lr: 0.000084  loss: 2.2525 (2.7373)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2128 (1.3455)  time: 0.1995  data: 0.0006  max mem: 9147
Epoch: [274]  [ 400/1251]  eta: 0:03:08  lr: 0.000083  min_lr: 0.000083  loss: 2.1466 (2.7358)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3064 (1.3302)  time: 0.2182  data: 0.0114  max mem: 9147
Epoch: [274]  [ 600/1251]  eta: 0:02:21  lr: 0.000082  min_lr: 0.000082  loss: 2.5041 (2.7525)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1713 (1.3198)  time: 0.2009  data: 0.0006  max mem: 9147
Epoch: [274]  [ 800/1251]  eta: 0:01:37  lr: 0.000081  min_lr: 0.000081  loss: 2.3612 (2.7798)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0676 (1.3115)  time: 0.1910  data: 0.0005  max mem: 9147
Epoch: [274]  [1000/1251]  eta: 0:00:53  lr: 0.000080  min_lr: 0.000080  loss: 2.3219 (2.7725)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2861 (1.3176)  time: 0.1990  data: 0.0006  max mem: 9147
Epoch: [274]  [1200/1251]  eta: 0:00:10  lr: 0.000079  min_lr: 0.000079  loss: 2.2482 (2.7694)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2692 (1.3097)  time: 0.1934  data: 0.0010  max mem: 9147
Epoch: [274]  [1250/1251]  eta: 0:00:00  lr: 0.000079  min_lr: 0.000079  loss: 2.2660 (2.7652)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1712 (1.3052)  time: 0.1397  data: 0.0014  max mem: 9147
Epoch: [274] Total time: 0:04:24 (0.2115 s / it)
Averaged stats: lr: 0.000079  min_lr: 0.000079  loss: 2.2660 (2.7499)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1712 (1.3052)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.6075 (0.6075)  acc1: 88.8000 (88.8000)  acc5: 99.2000 (99.2000)  time: 5.5419  data: 5.4614  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.7846 (0.8011)  acc1: 84.0000 (82.8727)  acc5: 96.8000 (96.7636)  time: 0.7606  data: 0.6840  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0168 (0.9773)  acc1: 77.6000 (79.0857)  acc5: 94.4000 (94.7429)  time: 0.2014  data: 0.1276  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1228 (0.9879)  acc1: 76.4000 (78.6880)  acc5: 93.2000 (94.6720)  time: 0.2091  data: 0.1363  max mem: 9147
Test: Total time: 0:00:10 (0.4038 s / it)
* Acc@1 78.788 Acc@5 94.542 loss 0.988
Accuracy of the model on the 50000 test images: 78.8%
Max accuracy: 79.06%
Epoch: [275]  [   0/1251]  eta: 1:03:52  lr: 0.000079  min_lr: 0.000079  loss: 2.1029 (2.1029)  weight_decay: 0.0500 (0.0500)  time: 3.0639  data: 2.4741  max mem: 9147
Epoch: [275]  [ 200/1251]  eta: 0:04:01  lr: 0.000078  min_lr: 0.000078  loss: 2.3357 (2.7529)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2910 (1.3564)  time: 0.2271  data: 0.0007  max mem: 9147
Epoch: [275]  [ 400/1251]  eta: 0:03:03  lr: 0.000077  min_lr: 0.000077  loss: 2.1730 (2.7293)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2058 (1.3056)  time: 0.1950  data: 0.0012  max mem: 9147
Epoch: [275]  [ 600/1251]  eta: 0:02:18  lr: 0.000076  min_lr: 0.000076  loss: 2.7174 (2.7661)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1648 (1.2985)  time: 0.2205  data: 0.0007  max mem: 9147
Epoch: [275]  [ 800/1251]  eta: 0:01:35  lr: 0.000075  min_lr: 0.000075  loss: 2.1878 (2.7669)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2946 (1.3090)  time: 0.2157  data: 0.0007  max mem: 9147
Epoch: [275]  [1000/1251]  eta: 0:00:53  lr: 0.000074  min_lr: 0.000074  loss: 2.3551 (2.7449)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1403 (1.2976)  time: 0.2138  data: 0.0006  max mem: 9147
Epoch: [275]  [1200/1251]  eta: 0:00:10  lr: 0.000073  min_lr: 0.000073  loss: 2.5528 (2.7460)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2581 (1.3083)  time: 0.2346  data: 0.0008  max mem: 9147
Epoch: [275]  [1250/1251]  eta: 0:00:00  lr: 0.000073  min_lr: 0.000073  loss: 2.9888 (2.7495)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2564 (1.3084)  time: 0.1432  data: 0.0015  max mem: 9147
Epoch: [275] Total time: 0:04:26 (0.2133 s / it)
Averaged stats: lr: 0.000073  min_lr: 0.000073  loss: 2.9888 (2.7538)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2564 (1.3084)
Test:  [ 0/25]  eta: 0:02:25  loss: 0.6997 (0.6997)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.8372  data: 5.7447  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8956 (0.8908)  acc1: 82.8000 (82.7273)  acc5: 96.4000 (96.6545)  time: 0.6952  data: 0.6019  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1104 (1.0721)  acc1: 77.6000 (78.9524)  acc5: 93.6000 (94.7619)  time: 0.1887  data: 0.1054  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2168 (1.0830)  acc1: 76.4000 (78.5120)  acc5: 93.6000 (94.6880)  time: 0.1883  data: 0.1111  max mem: 9147
Test: Total time: 0:00:10 (0.4033 s / it)
* Acc@1 78.646 Acc@5 94.450 loss 1.083
Accuracy of the model on the 50000 test images: 78.6%
Max accuracy: 79.06%
Epoch: [276]  [   0/1251]  eta: 1:05:27  lr: 0.000073  min_lr: 0.000073  loss: 2.0385 (2.0385)  weight_decay: 0.0500 (0.0500)  time: 3.1396  data: 2.5189  max mem: 9147
Epoch: [276]  [ 200/1251]  eta: 0:03:48  lr: 0.000072  min_lr: 0.000072  loss: 2.4207 (2.6428)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2437 (1.3437)  time: 0.2008  data: 0.0005  max mem: 9147
Epoch: [276]  [ 400/1251]  eta: 0:03:03  lr: 0.000071  min_lr: 0.000071  loss: 2.2456 (2.6758)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2436 (1.3597)  time: 0.2101  data: 0.0007  max mem: 9147
Epoch: [276]  [ 600/1251]  eta: 0:02:16  lr: 0.000070  min_lr: 0.000070  loss: 2.2203 (2.7045)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2412 (1.3522)  time: 0.1901  data: 0.0006  max mem: 9147
Epoch: [276]  [ 800/1251]  eta: 0:01:34  lr: 0.000069  min_lr: 0.000069  loss: 2.1636 (2.6874)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2297 (1.3386)  time: 0.2152  data: 0.0054  max mem: 9147
Epoch: [276]  [1000/1251]  eta: 0:00:52  lr: 0.000068  min_lr: 0.000068  loss: 3.0541 (2.7166)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2323 (1.3315)  time: 0.1905  data: 0.0013  max mem: 9147
Epoch: [276]  [1200/1251]  eta: 0:00:10  lr: 0.000067  min_lr: 0.000067  loss: 2.2613 (2.7092)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2302 (1.3276)  time: 0.2208  data: 0.0176  max mem: 9147
Epoch: [276]  [1250/1251]  eta: 0:00:00  lr: 0.000067  min_lr: 0.000067  loss: 2.2308 (2.7088)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2324 (1.3252)  time: 0.1401  data: 0.0013  max mem: 9147
Epoch: [276] Total time: 0:04:21 (0.2094 s / it)
Averaged stats: lr: 0.000067  min_lr: 0.000067  loss: 2.2308 (2.7228)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2324 (1.3252)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.6343 (0.6343)  acc1: 88.8000 (88.8000)  acc5: 98.8000 (98.8000)  time: 5.6087  data: 5.5105  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8425 (0.8339)  acc1: 84.0000 (83.2364)  acc5: 96.8000 (96.8000)  time: 0.7574  data: 0.6681  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0485 (1.0182)  acc1: 77.2000 (79.2571)  acc5: 94.0000 (94.7048)  time: 0.2048  data: 0.1255  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1803 (1.0288)  acc1: 77.2000 (78.9120)  acc5: 93.2000 (94.6720)  time: 0.1986  data: 0.1255  max mem: 9147
Test: Total time: 0:00:10 (0.4029 s / it)
* Acc@1 78.880 Acc@5 94.534 loss 1.028
Accuracy of the model on the 50000 test images: 78.9%
Max accuracy: 79.06%
Epoch: [277]  [   0/1251]  eta: 1:07:43  lr: 0.000067  min_lr: 0.000067  loss: 3.9132 (3.9132)  weight_decay: 0.0500 (0.0500)  time: 3.2485  data: 2.6526  max mem: 9147
Epoch: [277]  [ 200/1251]  eta: 0:04:00  lr: 0.000066  min_lr: 0.000066  loss: 2.8142 (2.7633)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2219 (1.2687)  time: 0.2216  data: 0.0184  max mem: 9147
Epoch: [277]  [ 400/1251]  eta: 0:03:06  lr: 0.000065  min_lr: 0.000065  loss: 2.3166 (2.7745)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1758 (1.2654)  time: 0.2137  data: 0.0007  max mem: 9147
Epoch: [277]  [ 600/1251]  eta: 0:02:21  lr: 0.000064  min_lr: 0.000064  loss: 2.1372 (2.7459)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2100 (1.2737)  time: 0.2095  data: 0.0006  max mem: 9147
Epoch: [277]  [ 800/1251]  eta: 0:01:35  lr: 0.000064  min_lr: 0.000064  loss: 2.3246 (2.7429)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2314 (1.2926)  time: 0.1999  data: 0.0006  max mem: 9147
Epoch: [277]  [1000/1251]  eta: 0:00:51  lr: 0.000063  min_lr: 0.000063  loss: 2.8821 (2.7558)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1827 (1.2905)  time: 0.2000  data: 0.0011  max mem: 9147
Epoch: [277]  [1200/1251]  eta: 0:00:10  lr: 0.000062  min_lr: 0.000062  loss: 2.1454 (2.7626)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3339 (1.2946)  time: 0.1984  data: 0.0005  max mem: 9147
Epoch: [277]  [1250/1251]  eta: 0:00:00  lr: 0.000062  min_lr: 0.000062  loss: 2.6523 (2.7601)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2384 (1.2944)  time: 0.1376  data: 0.0010  max mem: 9147
Epoch: [277] Total time: 0:04:17 (0.2062 s / it)
Averaged stats: lr: 0.000062  min_lr: 0.000062  loss: 2.6523 (2.7331)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2384 (1.2944)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.6894 (0.6894)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.5669  data: 5.4835  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8517 (0.8608)  acc1: 83.6000 (82.8364)  acc5: 97.2000 (96.7273)  time: 0.6772  data: 0.5947  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0700 (1.0405)  acc1: 77.2000 (79.0286)  acc5: 93.6000 (94.7619)  time: 0.1923  data: 0.1165  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1877 (1.0514)  acc1: 76.8000 (78.6560)  acc5: 93.2000 (94.6880)  time: 0.2041  data: 0.1335  max mem: 9147
Test: Total time: 0:00:10 (0.4050 s / it)
* Acc@1 78.720 Acc@5 94.502 loss 1.052
Accuracy of the model on the 50000 test images: 78.7%
Max accuracy: 79.06%
Epoch: [278]  [   0/1251]  eta: 0:55:22  lr: 0.000062  min_lr: 0.000062  loss: 1.8908 (1.8908)  weight_decay: 0.0500 (0.0500)  time: 2.6562  data: 2.4319  max mem: 9147
Epoch: [278]  [ 200/1251]  eta: 0:03:58  lr: 0.000061  min_lr: 0.000061  loss: 2.4649 (2.7683)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1861 (1.3397)  time: 0.2011  data: 0.0007  max mem: 9147
Epoch: [278]  [ 400/1251]  eta: 0:03:08  lr: 0.000060  min_lr: 0.000060  loss: 2.6179 (2.7465)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3635 (1.3422)  time: 0.2199  data: 0.0007  max mem: 9147
Epoch: [278]  [ 600/1251]  eta: 0:02:22  lr: 0.000059  min_lr: 0.000059  loss: 2.2257 (2.7717)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1311 (1.3142)  time: 0.1944  data: 0.0006  max mem: 9147
Epoch: [278]  [ 800/1251]  eta: 0:01:35  lr: 0.000058  min_lr: 0.000058  loss: 3.3260 (2.7793)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2774 (1.3106)  time: 0.1950  data: 0.0012  max mem: 9147
Epoch: [278]  [1000/1251]  eta: 0:00:53  lr: 0.000057  min_lr: 0.000057  loss: 2.1103 (2.7689)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2699 (1.3022)  time: 0.2007  data: 0.0007  max mem: 9147
Epoch: [278]  [1200/1251]  eta: 0:00:10  lr: 0.000056  min_lr: 0.000056  loss: 2.2338 (2.7552)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1572 (1.3007)  time: 0.2160  data: 0.0162  max mem: 9147
Epoch: [278]  [1250/1251]  eta: 0:00:00  lr: 0.000056  min_lr: 0.000056  loss: 2.2111 (2.7621)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1572 (1.2976)  time: 0.1516  data: 0.0045  max mem: 9147
Epoch: [278] Total time: 0:04:25 (0.2124 s / it)
Averaged stats: lr: 0.000056  min_lr: 0.000056  loss: 2.2111 (2.7311)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1572 (1.2976)
Test:  [ 0/25]  eta: 0:02:24  loss: 0.6423 (0.6423)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.7843  data: 5.7039  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8232 (0.8328)  acc1: 83.2000 (82.9818)  acc5: 96.8000 (96.7273)  time: 0.6985  data: 0.6234  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0361 (1.0116)  acc1: 77.2000 (79.0286)  acc5: 93.6000 (94.7048)  time: 0.1665  data: 0.0918  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1521 (1.0217)  acc1: 77.2000 (78.7360)  acc5: 93.2000 (94.6400)  time: 0.1842  data: 0.1104  max mem: 9147
Test: Total time: 0:00:09 (0.3984 s / it)
* Acc@1 78.796 Acc@5 94.466 loss 1.022
Accuracy of the model on the 50000 test images: 78.8%
Max accuracy: 79.06%
Epoch: [279]  [   0/1251]  eta: 1:03:46  lr: 0.000056  min_lr: 0.000056  loss: 3.2242 (3.2242)  weight_decay: 0.0500 (0.0500)  time: 3.0585  data: 2.7871  max mem: 9147
Epoch: [279]  [ 200/1251]  eta: 0:03:52  lr: 0.000055  min_lr: 0.000055  loss: 3.3320 (2.6736)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3061 (1.3397)  time: 0.2009  data: 0.0007  max mem: 9147
Epoch: [279]  [ 400/1251]  eta: 0:03:04  lr: 0.000055  min_lr: 0.000055  loss: 2.5438 (2.6934)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2675 (1.3219)  time: 0.2127  data: 0.0076  max mem: 9147
Epoch: [279]  [ 600/1251]  eta: 0:02:19  lr: 0.000054  min_lr: 0.000054  loss: 2.1126 (2.6837)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2837 (1.3115)  time: 0.1997  data: 0.0006  max mem: 9147
Epoch: [279]  [ 800/1251]  eta: 0:01:36  lr: 0.000053  min_lr: 0.000053  loss: 3.3349 (2.7005)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2158 (1.3024)  time: 0.2146  data: 0.0007  max mem: 9147
Epoch: [279]  [1000/1251]  eta: 0:00:54  lr: 0.000052  min_lr: 0.000052  loss: 2.9201 (2.7031)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2335 (1.3171)  time: 0.1922  data: 0.0006  max mem: 9147
Epoch: [279]  [1200/1251]  eta: 0:00:10  lr: 0.000051  min_lr: 0.000051  loss: 2.6396 (2.7163)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2804 (1.3194)  time: 0.2145  data: 0.0007  max mem: 9147
Epoch: [279]  [1250/1251]  eta: 0:00:00  lr: 0.000051  min_lr: 0.000051  loss: 2.2340 (2.7125)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3566 (1.3220)  time: 0.1419  data: 0.0011  max mem: 9147
Epoch: [279] Total time: 0:04:29 (0.2152 s / it)
Averaged stats: lr: 0.000051  min_lr: 0.000051  loss: 2.2340 (2.7414)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3566 (1.3220)
Test:  [ 0/25]  eta: 0:02:23  loss: 0.5955 (0.5955)  acc1: 87.6000 (87.6000)  acc5: 98.8000 (98.8000)  time: 5.7207  data: 5.6403  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.7982 (0.7903)  acc1: 82.8000 (82.8364)  acc5: 96.8000 (96.7273)  time: 0.7672  data: 0.6890  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 0.9900 (0.9626)  acc1: 77.6000 (79.3333)  acc5: 94.4000 (94.6667)  time: 0.2130  data: 0.1386  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.0950 (0.9732)  acc1: 77.6000 (78.8480)  acc5: 94.0000 (94.7040)  time: 0.2118  data: 0.1385  max mem: 9147
Test: Total time: 0:00:10 (0.4134 s / it)
* Acc@1 78.978 Acc@5 94.550 loss 0.976
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.06%
Epoch: [280]  [   0/1251]  eta: 1:02:55  lr: 0.000051  min_lr: 0.000051  loss: 2.1539 (2.1539)  weight_decay: 0.0500 (0.0500)  time: 3.0178  data: 2.1813  max mem: 9147
Epoch: [280]  [ 200/1251]  eta: 0:03:49  lr: 0.000050  min_lr: 0.000050  loss: 2.2908 (2.7783)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1895 (1.2734)  time: 0.2104  data: 0.0007  max mem: 9147
Epoch: [280]  [ 400/1251]  eta: 0:03:07  lr: 0.000050  min_lr: 0.000050  loss: 2.2627 (2.7245)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5463 (1.3015)  time: 0.2302  data: 0.0007  max mem: 9147
Epoch: [280]  [ 600/1251]  eta: 0:02:18  lr: 0.000049  min_lr: 0.000049  loss: 2.2321 (2.7480)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3593 (1.3204)  time: 0.1856  data: 0.0010  max mem: 9147
Epoch: [280]  [ 800/1251]  eta: 0:01:33  lr: 0.000048  min_lr: 0.000048  loss: 2.2744 (2.7597)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4246 (1.3239)  time: 0.1864  data: 0.0009  max mem: 9147
Epoch: [280]  [1000/1251]  eta: 0:00:51  lr: 0.000047  min_lr: 0.000047  loss: 2.3574 (2.7426)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2210 (inf)  time: 0.1896  data: 0.0006  max mem: 9147
Epoch: [280]  [1200/1251]  eta: 0:00:10  lr: 0.000046  min_lr: 0.000046  loss: 3.2052 (2.7349)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3547 (inf)  time: 0.2154  data: 0.0006  max mem: 9147
Epoch: [280]  [1250/1251]  eta: 0:00:00  lr: 0.000046  min_lr: 0.000046  loss: 2.1455 (2.7325)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3814 (inf)  time: 0.1397  data: 0.0011  max mem: 9147
Epoch: [280] Total time: 0:04:12 (0.2019 s / it)
Averaged stats: lr: 0.000046  min_lr: 0.000046  loss: 2.1455 (2.7479)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3814 (inf)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.6269 (0.6269)  acc1: 88.4000 (88.4000)  acc5: 98.8000 (98.8000)  time: 5.5443  data: 5.4640  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8130 (0.8156)  acc1: 83.6000 (83.0545)  acc5: 96.8000 (96.7273)  time: 0.7349  data: 0.6600  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0237 (0.9919)  acc1: 77.6000 (79.3143)  acc5: 94.0000 (94.7048)  time: 0.2067  data: 0.1341  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1317 (1.0031)  acc1: 77.6000 (78.8960)  acc5: 93.2000 (94.6240)  time: 0.2052  data: 0.1341  max mem: 9147
Test: Total time: 0:00:10 (0.4018 s / it)
* Acc@1 78.914 Acc@5 94.494 loss 1.004
Accuracy of the model on the 50000 test images: 78.9%
Max accuracy: 79.06%
Epoch: [281]  [   0/1251]  eta: 1:04:30  lr: 0.000046  min_lr: 0.000046  loss: 4.2089 (4.2089)  weight_decay: 0.0500 (0.0500)  time: 3.0942  data: 1.6951  max mem: 9147
Epoch: [281]  [ 200/1251]  eta: 0:03:44  lr: 0.000046  min_lr: 0.000046  loss: 2.2204 (2.6054)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2043 (1.3092)  time: 0.1843  data: 0.0006  max mem: 9147
Epoch: [281]  [ 400/1251]  eta: 0:02:50  lr: 0.000045  min_lr: 0.000045  loss: 2.2305 (2.6604)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1943 (1.2869)  time: 0.1871  data: 0.0005  max mem: 9147
Epoch: [281]  [ 600/1251]  eta: 0:02:07  lr: 0.000044  min_lr: 0.000044  loss: 2.1913 (2.6599)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2924 (1.3086)  time: 0.1899  data: 0.0012  max mem: 9147
Epoch: [281]  [ 800/1251]  eta: 0:01:27  lr: 0.000043  min_lr: 0.000043  loss: 2.0903 (2.6823)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2340 (1.3129)  time: 0.1997  data: 0.0006  max mem: 9147
Epoch: [281]  [1000/1251]  eta: 0:00:48  lr: 0.000043  min_lr: 0.000043  loss: 2.3367 (2.6795)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2607 (1.3188)  time: 0.1958  data: 0.0012  max mem: 9147
Epoch: [281]  [1200/1251]  eta: 0:00:09  lr: 0.000042  min_lr: 0.000042  loss: 2.4364 (2.7065)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3085 (1.3161)  time: 0.2097  data: 0.0008  max mem: 9147
Epoch: [281]  [1250/1251]  eta: 0:00:00  lr: 0.000042  min_lr: 0.000042  loss: 2.3273 (2.7054)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2258 (1.3126)  time: 0.1497  data: 0.0018  max mem: 9147
Epoch: [281] Total time: 0:04:03 (0.1945 s / it)
Averaged stats: lr: 0.000042  min_lr: 0.000042  loss: 2.3273 (2.7278)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2258 (1.3126)
Test:  [ 0/25]  eta: 0:02:19  loss: 0.6352 (0.6352)  acc1: 88.4000 (88.4000)  acc5: 98.8000 (98.8000)  time: 5.5702  data: 5.4899  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8189 (0.8289)  acc1: 84.0000 (83.2000)  acc5: 96.8000 (96.5818)  time: 0.7423  data: 0.6473  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0411 (1.0099)  acc1: 78.0000 (79.3333)  acc5: 94.0000 (94.5714)  time: 0.1973  data: 0.1145  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1563 (1.0226)  acc1: 77.6000 (78.9120)  acc5: 93.2000 (94.5120)  time: 0.2036  data: 0.1267  max mem: 9147
Test: Total time: 0:00:10 (0.4044 s / it)
* Acc@1 78.874 Acc@5 94.466 loss 1.022
Accuracy of the model on the 50000 test images: 78.9%
Max accuracy: 79.06%
Epoch: [282]  [   0/1251]  eta: 1:06:29  lr: 0.000042  min_lr: 0.000042  loss: 4.5308 (4.5308)  weight_decay: 0.0500 (0.0500)  time: 3.1890  data: 1.5734  max mem: 9147
Epoch: [282]  [ 200/1251]  eta: 0:03:56  lr: 0.000041  min_lr: 0.000041  loss: 2.4152 (2.6538)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3182 (1.3260)  time: 0.2003  data: 0.0007  max mem: 9147
Epoch: [282]  [ 400/1251]  eta: 0:03:07  lr: 0.000040  min_lr: 0.000040  loss: 2.0700 (2.6447)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2172 (1.3056)  time: 0.1999  data: 0.0006  max mem: 9147
Epoch: [282]  [ 600/1251]  eta: 0:02:20  lr: 0.000040  min_lr: 0.000040  loss: 2.6888 (2.6615)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1394 (1.3107)  time: 0.2100  data: 0.0006  max mem: 9147
Epoch: [282]  [ 800/1251]  eta: 0:01:36  lr: 0.000039  min_lr: 0.000039  loss: 3.0506 (2.6835)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2845 (1.3099)  time: 0.1999  data: 0.0005  max mem: 9147
Epoch: [282]  [1000/1251]  eta: 0:00:53  lr: 0.000038  min_lr: 0.000038  loss: 2.1014 (2.6855)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1760 (1.3046)  time: 0.2063  data: 0.0007  max mem: 9147
Epoch: [282]  [1200/1251]  eta: 0:00:10  lr: 0.000037  min_lr: 0.000037  loss: 2.2744 (2.7044)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2440 (1.3030)  time: 0.2314  data: 0.0020  max mem: 9147
Epoch: [282]  [1250/1251]  eta: 0:00:00  lr: 0.000037  min_lr: 0.000037  loss: 2.1400 (2.7042)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2727 (1.3021)  time: 0.1430  data: 0.0014  max mem: 9147
Epoch: [282] Total time: 0:04:27 (0.2136 s / it)
Averaged stats: lr: 0.000037  min_lr: 0.000037  loss: 2.1400 (2.7279)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2727 (1.3021)
Test:  [ 0/25]  eta: 0:02:14  loss: 0.6199 (0.6199)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.3642  data: 5.2805  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8031 (0.8051)  acc1: 83.2000 (83.0182)  acc5: 96.8000 (96.7273)  time: 0.7172  data: 0.6302  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0218 (0.9824)  acc1: 77.6000 (79.3143)  acc5: 94.0000 (94.7238)  time: 0.2011  data: 0.1228  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1258 (0.9939)  acc1: 77.2000 (78.9120)  acc5: 92.8000 (94.7200)  time: 0.2010  data: 0.1279  max mem: 9147
Test: Total time: 0:00:09 (0.3955 s / it)
* Acc@1 79.000 Acc@5 94.538 loss 0.995
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.06%
Epoch: [283]  [   0/1251]  eta: 1:04:37  lr: 0.000037  min_lr: 0.000037  loss: 3.6791 (3.6791)  weight_decay: 0.0500 (0.0500)  time: 3.0992  data: 1.6606  max mem: 9147
Epoch: [283]  [ 200/1251]  eta: 0:03:51  lr: 0.000037  min_lr: 0.000037  loss: 2.1133 (2.6812)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2447 (1.2780)  time: 0.1915  data: 0.0016  max mem: 9147
Epoch: [283]  [ 400/1251]  eta: 0:03:02  lr: 0.000036  min_lr: 0.000036  loss: 2.4345 (2.7016)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2458 (1.2817)  time: 0.2202  data: 0.0007  max mem: 9147
Epoch: [283]  [ 600/1251]  eta: 0:02:20  lr: 0.000035  min_lr: 0.000035  loss: 3.5089 (2.7195)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1294 (1.2683)  time: 0.2199  data: 0.0007  max mem: 9147
Epoch: [283]  [ 800/1251]  eta: 0:01:36  lr: 0.000035  min_lr: 0.000035  loss: 2.2736 (2.7172)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1864 (1.2777)  time: 0.2109  data: 0.0006  max mem: 9147
Epoch: [283]  [1000/1251]  eta: 0:00:53  lr: 0.000034  min_lr: 0.000034  loss: 2.2271 (2.7278)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2013 (1.2857)  time: 0.2146  data: 0.0006  max mem: 9147
Epoch: [283]  [1200/1251]  eta: 0:00:10  lr: 0.000033  min_lr: 0.000033  loss: 2.0304 (2.7265)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2516 (1.2951)  time: 0.2003  data: 0.0005  max mem: 9147
Epoch: [283]  [1250/1251]  eta: 0:00:00  lr: 0.000033  min_lr: 0.000033  loss: 2.1068 (2.7228)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2422 (1.2953)  time: 0.1405  data: 0.0008  max mem: 9147
Epoch: [283] Total time: 0:04:21 (0.2091 s / it)
Averaged stats: lr: 0.000033  min_lr: 0.000033  loss: 2.1068 (2.7304)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2422 (1.2953)
Test:  [ 0/25]  eta: 0:02:15  loss: 0.5905 (0.5905)  acc1: 88.8000 (88.8000)  acc5: 98.8000 (98.8000)  time: 5.4329  data: 5.3525  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.7823 (0.7861)  acc1: 83.6000 (82.8727)  acc5: 96.8000 (96.7636)  time: 0.7656  data: 0.6817  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0003 (0.9568)  acc1: 78.0000 (79.4286)  acc5: 93.2000 (94.5905)  time: 0.2199  data: 0.1414  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.0976 (0.9695)  acc1: 78.0000 (78.9760)  acc5: 93.2000 (94.5440)  time: 0.2185  data: 0.1413  max mem: 9147
Test: Total time: 0:00:10 (0.4083 s / it)
* Acc@1 79.034 Acc@5 94.538 loss 0.970
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.06%
Epoch: [284]  [   0/1251]  eta: 1:06:14  lr: 0.000033  min_lr: 0.000033  loss: 2.7641 (2.7641)  weight_decay: 0.0500 (0.0500)  time: 3.1770  data: 1.6918  max mem: 9147
Epoch: [284]  [ 200/1251]  eta: 0:03:42  lr: 0.000032  min_lr: 0.000032  loss: 2.1607 (2.7057)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1569 (1.2805)  time: 0.1950  data: 0.0005  max mem: 9147
Epoch: [284]  [ 400/1251]  eta: 0:02:50  lr: 0.000032  min_lr: 0.000032  loss: 2.2968 (2.7411)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2826 (1.3019)  time: 0.1915  data: 0.0006  max mem: 9147
Epoch: [284]  [ 600/1251]  eta: 0:02:08  lr: 0.000031  min_lr: 0.000031  loss: 2.9566 (2.7586)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2556 (1.3003)  time: 0.1768  data: 0.0005  max mem: 9147
Epoch: [284]  [ 800/1251]  eta: 0:01:28  lr: 0.000031  min_lr: 0.000031  loss: 2.1193 (2.7455)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2207 (1.2926)  time: 0.1868  data: 0.0005  max mem: 9147
Epoch: [284]  [1000/1251]  eta: 0:00:49  lr: 0.000030  min_lr: 0.000030  loss: 2.2102 (2.7510)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1765 (1.2816)  time: 0.2101  data: 0.0006  max mem: 9147
Epoch: [284]  [1200/1251]  eta: 0:00:10  lr: 0.000029  min_lr: 0.000029  loss: 2.1084 (2.7492)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2584 (1.2809)  time: 0.2139  data: 0.0198  max mem: 9147
Epoch: [284]  [1250/1251]  eta: 0:00:00  lr: 0.000029  min_lr: 0.000029  loss: 2.1904 (2.7462)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2637 (1.2827)  time: 0.1457  data: 0.0015  max mem: 9147
Epoch: [284] Total time: 0:04:10 (0.2003 s / it)
Averaged stats: lr: 0.000029  min_lr: 0.000029  loss: 2.1904 (2.7254)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2637 (1.2827)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.6572 (0.6572)  acc1: 87.6000 (87.6000)  acc5: 98.8000 (98.8000)  time: 5.6275  data: 5.5470  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8497 (0.8504)  acc1: 83.6000 (83.0909)  acc5: 96.8000 (96.6546)  time: 0.7128  data: 0.6179  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0633 (1.0266)  acc1: 77.2000 (79.1619)  acc5: 93.6000 (94.5524)  time: 0.1942  data: 0.1115  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1606 (1.0380)  acc1: 77.2000 (78.8320)  acc5: 93.2000 (94.4960)  time: 0.1942  data: 0.1161  max mem: 9147
Test: Total time: 0:00:10 (0.4023 s / it)
* Acc@1 78.884 Acc@5 94.510 loss 1.038
Accuracy of the model on the 50000 test images: 78.9%
Max accuracy: 79.06%
Epoch: [285]  [   0/1251]  eta: 1:08:24  lr: 0.000029  min_lr: 0.000029  loss: 1.9178 (1.9178)  weight_decay: 0.0500 (0.0500)  time: 3.2807  data: 2.6792  max mem: 9147
Epoch: [285]  [ 200/1251]  eta: 0:03:53  lr: 0.000029  min_lr: 0.000029  loss: 3.5557 (2.7847)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0861 (1.2462)  time: 0.1909  data: 0.0005  max mem: 9147
Epoch: [285]  [ 400/1251]  eta: 0:02:54  lr: 0.000028  min_lr: 0.000028  loss: 2.2195 (2.7235)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2703 (1.2664)  time: 0.1876  data: 0.0005  max mem: 9147
Epoch: [285]  [ 600/1251]  eta: 0:02:10  lr: 0.000027  min_lr: 0.000027  loss: 2.2016 (2.6916)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2437 (1.2715)  time: 0.1929  data: 0.0005  max mem: 9147
Epoch: [285]  [ 800/1251]  eta: 0:01:29  lr: 0.000027  min_lr: 0.000027  loss: 2.1891 (2.6915)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2661 (1.2719)  time: 0.1953  data: 0.0011  max mem: 9147
Epoch: [285]  [1000/1251]  eta: 0:00:49  lr: 0.000026  min_lr: 0.000026  loss: 2.1690 (2.6943)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1957 (1.2733)  time: 0.2005  data: 0.0007  max mem: 9147
Epoch: [285]  [1200/1251]  eta: 0:00:10  lr: 0.000026  min_lr: 0.000026  loss: 2.4246 (2.7024)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1756 (1.2813)  time: 0.2101  data: 0.0007  max mem: 9147
Epoch: [285]  [1250/1251]  eta: 0:00:00  lr: 0.000026  min_lr: 0.000026  loss: 2.4808 (2.7092)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1910 (1.2848)  time: 0.1388  data: 0.0010  max mem: 9147
Epoch: [285] Total time: 0:04:11 (0.2012 s / it)
Averaged stats: lr: 0.000026  min_lr: 0.000026  loss: 2.4808 (2.7190)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1910 (1.2848)
Test:  [ 0/25]  eta: 0:02:15  loss: 0.6475 (0.6475)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.4290  data: 5.3486  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8165 (0.8319)  acc1: 84.0000 (83.0909)  acc5: 96.8000 (96.6909)  time: 0.7334  data: 0.6405  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0371 (1.0095)  acc1: 77.2000 (79.2762)  acc5: 94.0000 (94.6667)  time: 0.2179  data: 0.1349  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1462 (1.0204)  acc1: 77.2000 (78.8320)  acc5: 92.8000 (94.6880)  time: 0.2207  data: 0.1425  max mem: 9147
Test: Total time: 0:00:10 (0.4121 s / it)
* Acc@1 78.964 Acc@5 94.510 loss 1.019
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.06%
Epoch: [286]  [   0/1251]  eta: 1:02:37  lr: 0.000026  min_lr: 0.000026  loss: 2.0687 (2.0687)  weight_decay: 0.0500 (0.0500)  time: 3.0040  data: 2.2215  max mem: 9147
Epoch: [286]  [ 200/1251]  eta: 0:04:01  lr: 0.000025  min_lr: 0.000025  loss: 2.2150 (2.7700)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2030 (1.2509)  time: 0.2344  data: 0.0227  max mem: 9147
Epoch: [286]  [ 400/1251]  eta: 0:03:08  lr: 0.000025  min_lr: 0.000025  loss: 2.2018 (2.7232)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2624 (1.2546)  time: 0.2003  data: 0.0006  max mem: 9147
Epoch: [286]  [ 600/1251]  eta: 0:02:22  lr: 0.000024  min_lr: 0.000024  loss: 2.2007 (2.6955)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2305 (1.2594)  time: 0.2060  data: 0.0007  max mem: 9147
Epoch: [286]  [ 800/1251]  eta: 0:01:38  lr: 0.000023  min_lr: 0.000023  loss: 2.4497 (2.6865)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2220 (1.2543)  time: 0.2168  data: 0.0007  max mem: 9147
Epoch: [286]  [1000/1251]  eta: 0:00:54  lr: 0.000023  min_lr: 0.000023  loss: 2.1030 (2.6985)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2878 (1.2630)  time: 0.2200  data: 0.0006  max mem: 9147
Epoch: [286]  [1200/1251]  eta: 0:00:11  lr: 0.000022  min_lr: 0.000022  loss: 2.2016 (2.7133)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1580 (1.2648)  time: 0.2191  data: 0.0007  max mem: 9147
Epoch: [286]  [1250/1251]  eta: 0:00:00  lr: 0.000022  min_lr: 0.000022  loss: 2.3291 (2.7160)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2813 (1.2739)  time: 0.1472  data: 0.0009  max mem: 9147
Epoch: [286] Total time: 0:04:30 (0.2165 s / it)
Averaged stats: lr: 0.000022  min_lr: 0.000022  loss: 2.3291 (2.7243)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2813 (1.2739)
Test:  [ 0/25]  eta: 0:02:14  loss: 0.6577 (0.6577)  acc1: 88.4000 (88.4000)  acc5: 98.8000 (98.8000)  time: 5.3966  data: 5.2627  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8359 (0.8404)  acc1: 83.6000 (83.2727)  acc5: 96.8000 (96.6546)  time: 0.7599  data: 0.6609  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0494 (1.0184)  acc1: 78.0000 (79.3714)  acc5: 93.6000 (94.5524)  time: 0.2159  data: 0.1330  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1565 (1.0299)  acc1: 76.8000 (78.9920)  acc5: 92.8000 (94.5120)  time: 0.2159  data: 0.1389  max mem: 9147
Test: Total time: 0:00:10 (0.4078 s / it)
* Acc@1 78.986 Acc@5 94.540 loss 1.030
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.06%
Epoch: [287]  [   0/1251]  eta: 1:03:29  lr: 0.000022  min_lr: 0.000022  loss: 1.9506 (1.9506)  weight_decay: 0.0500 (0.0500)  time: 3.0452  data: 2.5691  max mem: 9147
Epoch: [287]  [ 200/1251]  eta: 0:03:45  lr: 0.000022  min_lr: 0.000022  loss: 2.2779 (2.7108)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1614 (1.3119)  time: 0.1879  data: 0.0010  max mem: 9147
Epoch: [287]  [ 400/1251]  eta: 0:02:52  lr: 0.000021  min_lr: 0.000021  loss: 2.2392 (2.7127)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2151 (inf)  time: 0.1948  data: 0.0005  max mem: 9147
Epoch: [287]  [ 600/1251]  eta: 0:02:10  lr: 0.000021  min_lr: 0.000021  loss: 2.9159 (2.7303)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3519 (inf)  time: 0.1998  data: 0.0005  max mem: 9147
Epoch: [287]  [ 800/1251]  eta: 0:01:31  lr: 0.000020  min_lr: 0.000020  loss: 2.2917 (2.7332)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2645 (inf)  time: 0.2152  data: 0.0008  max mem: 9147
Epoch: [287]  [1000/1251]  eta: 0:00:51  lr: 0.000020  min_lr: 0.000020  loss: 2.1935 (2.7310)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1797 (inf)  time: 0.1919  data: 0.0005  max mem: 9147
Epoch: [287]  [1200/1251]  eta: 0:00:10  lr: 0.000019  min_lr: 0.000019  loss: 2.1386 (2.7101)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1920 (inf)  time: 0.2438  data: 0.0006  max mem: 9147
Epoch: [287]  [1250/1251]  eta: 0:00:00  lr: 0.000019  min_lr: 0.000019  loss: 2.3287 (2.7127)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1535 (inf)  time: 0.1383  data: 0.0012  max mem: 9147
Epoch: [287] Total time: 0:04:18 (0.2066 s / it)
Averaged stats: lr: 0.000019  min_lr: 0.000019  loss: 2.3287 (2.7343)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1535 (inf)
Test:  [ 0/25]  eta: 0:02:25  loss: 0.6434 (0.6434)  acc1: 87.6000 (87.6000)  acc5: 98.8000 (98.8000)  time: 5.8209  data: 5.7405  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8291 (0.8354)  acc1: 83.6000 (83.0182)  acc5: 96.8000 (96.6909)  time: 0.7360  data: 0.6572  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0404 (1.0131)  acc1: 77.2000 (79.2191)  acc5: 94.0000 (94.6857)  time: 0.2076  data: 0.1337  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1524 (1.0241)  acc1: 77.2000 (78.7840)  acc5: 92.8000 (94.6880)  time: 0.2039  data: 0.1336  max mem: 9147
Test: Total time: 0:00:10 (0.4131 s / it)
* Acc@1 79.014 Acc@5 94.542 loss 1.024
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.06%
Epoch: [288]  [   0/1251]  eta: 1:02:13  lr: 0.000019  min_lr: 0.000019  loss: 3.1099 (3.1099)  weight_decay: 0.0500 (0.0500)  time: 2.9847  data: 1.5883  max mem: 9147
Epoch: [288]  [ 200/1251]  eta: 0:03:41  lr: 0.000019  min_lr: 0.000019  loss: 2.0803 (2.7933)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1088 (1.2794)  time: 0.1954  data: 0.0005  max mem: 9147
Epoch: [288]  [ 400/1251]  eta: 0:02:55  lr: 0.000018  min_lr: 0.000018  loss: 2.4670 (2.7933)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2026 (1.2636)  time: 0.2157  data: 0.0008  max mem: 9147
Epoch: [288]  [ 600/1251]  eta: 0:02:15  lr: 0.000018  min_lr: 0.000018  loss: 2.7851 (2.7471)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1626 (1.2596)  time: 0.2008  data: 0.0006  max mem: 9147
Epoch: [288]  [ 800/1251]  eta: 0:01:35  lr: 0.000017  min_lr: 0.000017  loss: 2.1928 (2.7622)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1866 (1.2601)  time: 0.2257  data: 0.0008  max mem: 9147
Epoch: [288]  [1000/1251]  eta: 0:00:53  lr: 0.000017  min_lr: 0.000017  loss: 1.9570 (2.7516)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2043 (1.2645)  time: 0.1955  data: 0.0005  max mem: 9147
Epoch: [288]  [1200/1251]  eta: 0:00:10  lr: 0.000016  min_lr: 0.000016  loss: 2.0647 (2.7588)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3104 (1.2648)  time: 0.2287  data: 0.0006  max mem: 9147
Epoch: [288]  [1250/1251]  eta: 0:00:00  lr: 0.000016  min_lr: 0.000016  loss: 2.4630 (2.7581)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1823 (1.2607)  time: 0.1481  data: 0.0013  max mem: 9147
Epoch: [288] Total time: 0:04:24 (0.2112 s / it)
Averaged stats: lr: 0.000016  min_lr: 0.000016  loss: 2.4630 (2.7278)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1823 (1.2607)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.6380 (0.6380)  acc1: 88.8000 (88.8000)  acc5: 98.8000 (98.8000)  time: 5.6842  data: 5.6039  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8069 (0.8262)  acc1: 84.0000 (83.3818)  acc5: 96.8000 (96.6546)  time: 0.7263  data: 0.6500  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0370 (1.0063)  acc1: 78.4000 (79.4476)  acc5: 93.6000 (94.6476)  time: 0.1933  data: 0.1203  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1577 (1.0168)  acc1: 76.8000 (79.0720)  acc5: 93.2000 (94.6560)  time: 0.1997  data: 0.1287  max mem: 9147
Test: Total time: 0:00:10 (0.4036 s / it)
* Acc@1 79.016 Acc@5 94.566 loss 1.016
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.06%
Epoch: [289]  [   0/1251]  eta: 1:04:07  lr: 0.000016  min_lr: 0.000016  loss: 3.2515 (3.2515)  weight_decay: 0.0500 (0.0500)  time: 3.0753  data: 2.2997  max mem: 9147
Epoch: [289]  [ 200/1251]  eta: 0:04:00  lr: 0.000016  min_lr: 0.000016  loss: 3.0142 (2.7920)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2417 (1.2790)  time: 0.2107  data: 0.0008  max mem: 9147
Epoch: [289]  [ 400/1251]  eta: 0:03:09  lr: 0.000015  min_lr: 0.000015  loss: 2.5204 (2.7867)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1699 (1.2766)  time: 0.2243  data: 0.0167  max mem: 9147
Epoch: [289]  [ 600/1251]  eta: 0:02:21  lr: 0.000015  min_lr: 0.000015  loss: 2.0444 (2.7552)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2093 (1.2766)  time: 0.2093  data: 0.0007  max mem: 9147
Epoch: [289]  [ 800/1251]  eta: 0:01:38  lr: 0.000014  min_lr: 0.000014  loss: 2.4461 (2.7544)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3041 (1.2764)  time: 0.2344  data: 0.0324  max mem: 9147
Epoch: [289]  [1000/1251]  eta: 0:00:54  lr: 0.000014  min_lr: 0.000014  loss: 2.4238 (2.7319)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2504 (1.2819)  time: 0.2003  data: 0.0007  max mem: 9147
Epoch: [289]  [1200/1251]  eta: 0:00:11  lr: 0.000014  min_lr: 0.000014  loss: 2.2511 (2.7317)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1166 (1.2755)  time: 0.2039  data: 0.0008  max mem: 9147
Epoch: [289]  [1250/1251]  eta: 0:00:00  lr: 0.000014  min_lr: 0.000014  loss: 2.1612 (2.7315)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2411 (1.2742)  time: 0.1459  data: 0.0007  max mem: 9147
Epoch: [289] Total time: 0:04:29 (0.2155 s / it)
Averaged stats: lr: 0.000014  min_lr: 0.000014  loss: 2.1612 (2.7171)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2411 (1.2742)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.6090 (0.6090)  acc1: 87.2000 (87.2000)  acc5: 98.8000 (98.8000)  time: 5.6078  data: 5.5055  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.7793 (0.7960)  acc1: 83.6000 (83.1636)  acc5: 96.8000 (96.7273)  time: 0.7438  data: 0.6676  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0053 (0.9699)  acc1: 77.2000 (79.3333)  acc5: 93.6000 (94.6286)  time: 0.2025  data: 0.1305  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1168 (0.9818)  acc1: 76.8000 (78.8800)  acc5: 92.8000 (94.6240)  time: 0.2013  data: 0.1305  max mem: 9147
Test: Total time: 0:00:10 (0.4005 s / it)
* Acc@1 79.016 Acc@5 94.534 loss 0.981
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.06%
Epoch: [290]  [   0/1251]  eta: 1:05:02  lr: 0.000014  min_lr: 0.000014  loss: 3.8322 (3.8322)  weight_decay: 0.0500 (0.0500)  time: 3.1194  data: 2.4863  max mem: 9147
Epoch: [290]  [ 200/1251]  eta: 0:03:54  lr: 0.000013  min_lr: 0.000013  loss: 2.1694 (2.7231)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2900 (1.2377)  time: 0.2003  data: 0.0007  max mem: 9147
Epoch: [290]  [ 400/1251]  eta: 0:03:04  lr: 0.000013  min_lr: 0.000013  loss: 2.3578 (2.7147)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3056 (1.2730)  time: 0.2001  data: 0.0008  max mem: 9147
Epoch: [290]  [ 600/1251]  eta: 0:02:20  lr: 0.000012  min_lr: 0.000012  loss: 2.9589 (2.7032)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2502 (1.2896)  time: 0.2156  data: 0.0149  max mem: 9147
Epoch: [290]  [ 800/1251]  eta: 0:01:36  lr: 0.000012  min_lr: 0.000012  loss: 2.1724 (2.6954)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1985 (1.2769)  time: 0.2246  data: 0.0166  max mem: 9147
Epoch: [290]  [1000/1251]  eta: 0:00:53  lr: 0.000012  min_lr: 0.000012  loss: 2.7842 (2.7035)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1867 (1.2785)  time: 0.2257  data: 0.0007  max mem: 9147
Epoch: [290]  [1200/1251]  eta: 0:00:10  lr: 0.000011  min_lr: 0.000011  loss: 2.0771 (2.6878)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3122 (1.2816)  time: 0.2002  data: 0.0007  max mem: 9147
Epoch: [290]  [1250/1251]  eta: 0:00:00  lr: 0.000011  min_lr: 0.000011  loss: 2.1110 (2.6865)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1274 (1.2751)  time: 0.1481  data: 0.0015  max mem: 9147
Epoch: [290] Total time: 0:04:25 (0.2126 s / it)
Averaged stats: lr: 0.000011  min_lr: 0.000011  loss: 2.1110 (2.7210)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1274 (1.2751)
Test:  [ 0/25]  eta: 0:02:20  loss: 0.5824 (0.5824)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.6295  data: 5.5489  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.7709 (0.7820)  acc1: 83.2000 (83.2727)  acc5: 96.8000 (96.9091)  time: 0.7595  data: 0.6802  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 0.9902 (0.9556)  acc1: 78.0000 (79.5619)  acc5: 93.6000 (94.7429)  time: 0.2254  data: 0.1507  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.0964 (0.9670)  acc1: 78.0000 (79.1200)  acc5: 93.2000 (94.7520)  time: 0.2226  data: 0.1506  max mem: 9147
Test: Total time: 0:00:10 (0.4203 s / it)
* Acc@1 79.088 Acc@5 94.518 loss 0.968
Accuracy of the model on the 50000 test images: 79.1%
Max accuracy: 79.09%
Epoch: [291]  [   0/1251]  eta: 0:57:47  lr: 0.000011  min_lr: 0.000011  loss: 3.0623 (3.0623)  weight_decay: 0.0500 (0.0500)  time: 2.7714  data: 2.4086  max mem: 9147
Epoch: [291]  [ 200/1251]  eta: 0:03:48  lr: 0.000011  min_lr: 0.000011  loss: 2.2082 (2.7355)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2658 (1.3087)  time: 0.1850  data: 0.0005  max mem: 9147
Epoch: [291]  [ 400/1251]  eta: 0:02:53  lr: 0.000010  min_lr: 0.000010  loss: 2.3024 (2.7322)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2663 (1.2983)  time: 0.1951  data: 0.0011  max mem: 9147
Epoch: [291]  [ 600/1251]  eta: 0:02:13  lr: 0.000010  min_lr: 0.000010  loss: 2.6019 (2.7600)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3094 (1.3011)  time: 0.1998  data: 0.0007  max mem: 9147
Epoch: [291]  [ 800/1251]  eta: 0:01:33  lr: 0.000010  min_lr: 0.000010  loss: 2.3446 (2.7635)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2140 (1.2877)  time: 0.2192  data: 0.0006  max mem: 9147
Epoch: [291]  [1000/1251]  eta: 0:00:52  lr: 0.000009  min_lr: 0.000009  loss: 2.3968 (2.7294)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3388 (1.2902)  time: 0.2201  data: 0.0094  max mem: 9147
Epoch: [291]  [1200/1251]  eta: 0:00:10  lr: 0.000009  min_lr: 0.000009  loss: 2.3400 (2.7212)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2226 (1.2964)  time: 0.2230  data: 0.0007  max mem: 9147
Epoch: [291]  [1250/1251]  eta: 0:00:00  lr: 0.000009  min_lr: 0.000009  loss: 2.1743 (2.7292)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3654 (1.2975)  time: 0.1417  data: 0.0015  max mem: 9147
Epoch: [291] Total time: 0:04:22 (0.2100 s / it)
Averaged stats: lr: 0.000009  min_lr: 0.000009  loss: 2.1743 (2.7217)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3654 (1.2975)
Test:  [ 0/25]  eta: 0:02:13  loss: 0.6443 (0.6443)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.3399  data: 5.2587  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8196 (0.8365)  acc1: 83.6000 (82.8727)  acc5: 96.8000 (96.6909)  time: 0.6743  data: 0.5813  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0387 (1.0129)  acc1: 77.6000 (79.1810)  acc5: 93.6000 (94.5714)  time: 0.2014  data: 0.1179  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1568 (1.0238)  acc1: 77.6000 (78.8000)  acc5: 92.4000 (94.5760)  time: 0.1980  data: 0.1188  max mem: 9147
Test: Total time: 0:00:09 (0.3900 s / it)
* Acc@1 79.026 Acc@5 94.536 loss 1.024
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.09%
Epoch: [292]  [   0/1251]  eta: 1:03:07  lr: 0.000009  min_lr: 0.000009  loss: 3.6721 (3.6721)  weight_decay: 0.0500 (0.0500)  time: 3.0275  data: 2.2484  max mem: 9147
Epoch: [292]  [ 200/1251]  eta: 0:03:56  lr: 0.000009  min_lr: 0.000009  loss: 2.4147 (2.7475)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1607 (1.2239)  time: 0.2153  data: 0.0230  max mem: 9147
Epoch: [292]  [ 400/1251]  eta: 0:03:04  lr: 0.000008  min_lr: 0.000008  loss: 2.6558 (2.7203)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2208 (1.2280)  time: 0.2009  data: 0.0007  max mem: 9147
Epoch: [292]  [ 600/1251]  eta: 0:02:19  lr: 0.000008  min_lr: 0.000008  loss: 2.6078 (2.6953)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2075 (1.2276)  time: 0.1903  data: 0.0012  max mem: 9147
Epoch: [292]  [ 800/1251]  eta: 0:01:35  lr: 0.000008  min_lr: 0.000008  loss: 2.1938 (2.7205)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3143 (1.2373)  time: 0.2169  data: 0.0007  max mem: 9147
Epoch: [292]  [1000/1251]  eta: 0:00:53  lr: 0.000008  min_lr: 0.000008  loss: 2.5806 (2.7056)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2316 (1.2437)  time: 0.2002  data: 0.0006  max mem: 9147
Epoch: [292]  [1200/1251]  eta: 0:00:10  lr: 0.000007  min_lr: 0.000007  loss: 2.3238 (2.7068)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1284 (1.2331)  time: 0.2149  data: 0.0007  max mem: 9147
Epoch: [292]  [1250/1251]  eta: 0:00:00  lr: 0.000007  min_lr: 0.000007  loss: 2.3881 (2.7027)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1665 (1.2347)  time: 0.1424  data: 0.0014  max mem: 9147
Epoch: [292] Total time: 0:04:25 (0.2121 s / it)
Averaged stats: lr: 0.000007  min_lr: 0.000007  loss: 2.3881 (2.7275)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1665 (1.2347)
Test:  [ 0/25]  eta: 0:01:22  loss: 0.6388 (0.6388)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 3.2968  data: 3.2099  max mem: 9147
Test:  [10/25]  eta: 0:00:08  loss: 0.8214 (0.8309)  acc1: 83.6000 (82.9818)  acc5: 96.8000 (96.7273)  time: 0.5632  data: 0.4866  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0426 (1.0099)  acc1: 77.6000 (79.3714)  acc5: 93.6000 (94.7048)  time: 0.2694  data: 0.1970  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1551 (1.0207)  acc1: 77.2000 (78.9760)  acc5: 93.2000 (94.7040)  time: 0.2221  data: 0.1528  max mem: 9147
Test: Total time: 0:00:10 (0.4033 s / it)
* Acc@1 79.108 Acc@5 94.534 loss 1.020
Accuracy of the model on the 50000 test images: 79.1%
Max accuracy: 79.11%
Epoch: [293]  [   0/1251]  eta: 1:05:44  lr: 0.000007  min_lr: 0.000007  loss: 1.9411 (1.9411)  weight_decay: 0.0500 (0.0500)  time: 3.1529  data: 2.9502  max mem: 9147
Epoch: [293]  [ 200/1251]  eta: 0:03:36  lr: 0.000007  min_lr: 0.000007  loss: 2.6743 (2.7370)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1658 (1.2633)  time: 0.1953  data: 0.0006  max mem: 9147
Epoch: [293]  [ 400/1251]  eta: 0:02:59  lr: 0.000007  min_lr: 0.000007  loss: 2.2647 (2.7153)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1222 (1.2630)  time: 0.2246  data: 0.0006  max mem: 9147
Epoch: [293]  [ 600/1251]  eta: 0:02:19  lr: 0.000006  min_lr: 0.000006  loss: 2.4778 (2.7104)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1163 (1.2420)  time: 0.2246  data: 0.0013  max mem: 9147
Epoch: [293]  [ 800/1251]  eta: 0:01:34  lr: 0.000006  min_lr: 0.000006  loss: 2.2911 (2.7165)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2584 (1.2483)  time: 0.1909  data: 0.0010  max mem: 9147
Epoch: [293]  [1000/1251]  eta: 0:00:52  lr: 0.000006  min_lr: 0.000006  loss: 2.7587 (2.7057)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1042 (inf)  time: 0.2075  data: 0.0006  max mem: 9147
Epoch: [293]  [1200/1251]  eta: 0:00:10  lr: 0.000006  min_lr: 0.000006  loss: 2.1766 (2.7164)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2892 (inf)  time: 0.2150  data: 0.0162  max mem: 9147
Epoch: [293]  [1250/1251]  eta: 0:00:00  lr: 0.000006  min_lr: 0.000006  loss: 2.1756 (2.7125)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2892 (inf)  time: 0.1523  data: 0.0032  max mem: 9147
Epoch: [293] Total time: 0:04:21 (0.2091 s / it)
Averaged stats: lr: 0.000006  min_lr: 0.000006  loss: 2.1756 (2.7147)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2892 (inf)
Test:  [ 0/25]  eta: 0:02:16  loss: 0.6371 (0.6371)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.4624  data: 5.3596  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8161 (0.8285)  acc1: 83.6000 (83.2364)  acc5: 96.8000 (96.8364)  time: 0.7250  data: 0.6425  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0330 (1.0047)  acc1: 77.6000 (79.4476)  acc5: 94.0000 (94.7810)  time: 0.2022  data: 0.1269  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1470 (1.0156)  acc1: 77.6000 (79.0880)  acc5: 92.8000 (94.7040)  time: 0.2164  data: 0.1422  max mem: 9147
Test: Total time: 0:00:10 (0.4092 s / it)
* Acc@1 79.002 Acc@5 94.518 loss 1.016
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.11%
Epoch: [294]  [   0/1251]  eta: 1:07:52  lr: 0.000006  min_lr: 0.000006  loss: 1.8873 (1.8873)  weight_decay: 0.0500 (0.0500)  time: 3.2553  data: 1.5844  max mem: 9147
Epoch: [294]  [ 200/1251]  eta: 0:03:58  lr: 0.000005  min_lr: 0.000005  loss: 2.4018 (2.6261)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1742 (1.2231)  time: 0.2195  data: 0.0006  max mem: 9147
Epoch: [294]  [ 400/1251]  eta: 0:03:01  lr: 0.000005  min_lr: 0.000005  loss: 3.3405 (2.6712)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1685 (1.2418)  time: 0.1928  data: 0.0005  max mem: 9147
Epoch: [294]  [ 600/1251]  eta: 0:02:14  lr: 0.000005  min_lr: 0.000005  loss: 2.0422 (2.6750)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3242 (1.2374)  time: 0.1983  data: 0.0009  max mem: 9147
Epoch: [294]  [ 800/1251]  eta: 0:01:31  lr: 0.000005  min_lr: 0.000005  loss: 2.2455 (2.6850)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1346 (1.2424)  time: 0.2054  data: 0.0007  max mem: 9147
Epoch: [294]  [1000/1251]  eta: 0:00:51  lr: 0.000004  min_lr: 0.000004  loss: 2.9211 (2.6865)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1326 (1.2439)  time: 0.1913  data: 0.0006  max mem: 9147
Epoch: [294]  [1200/1251]  eta: 0:00:10  lr: 0.000004  min_lr: 0.000004  loss: 2.4879 (2.6822)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2046 (1.2445)  time: 0.2004  data: 0.0006  max mem: 9147
Epoch: [294]  [1250/1251]  eta: 0:00:00  lr: 0.000004  min_lr: 0.000004  loss: 2.1661 (2.6854)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2738 (1.2497)  time: 0.1569  data: 0.0009  max mem: 9147
Epoch: [294] Total time: 0:04:14 (0.2037 s / it)
Averaged stats: lr: 0.000004  min_lr: 0.000004  loss: 2.1661 (2.7042)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2738 (1.2497)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.6473 (0.6473)  acc1: 88.4000 (88.4000)  acc5: 98.8000 (98.8000)  time: 5.5246  data: 5.4437  max mem: 9147
Test:  [10/25]  eta: 0:00:09  loss: 0.8244 (0.8416)  acc1: 84.0000 (83.2364)  acc5: 96.8000 (96.7636)  time: 0.6637  data: 0.5718  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0604 (1.0217)  acc1: 78.0000 (79.3333)  acc5: 93.6000 (94.6857)  time: 0.1849  data: 0.1034  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1680 (1.0332)  acc1: 77.6000 (78.9120)  acc5: 93.6000 (94.6720)  time: 0.1930  data: 0.1163  max mem: 9147
Test: Total time: 0:00:09 (0.3971 s / it)
* Acc@1 79.000 Acc@5 94.548 loss 1.033
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.11%
Epoch: [295]  [   0/1251]  eta: 1:05:57  lr: 0.000004  min_lr: 0.000004  loss: 1.9326 (1.9326)  weight_decay: 0.0500 (0.0500)  time: 3.1634  data: 2.6833  max mem: 9147
Epoch: [295]  [ 200/1251]  eta: 0:03:59  lr: 0.000004  min_lr: 0.000004  loss: 2.5663 (2.7059)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1791 (1.2512)  time: 0.2012  data: 0.0005  max mem: 9147
Epoch: [295]  [ 400/1251]  eta: 0:03:06  lr: 0.000004  min_lr: 0.000004  loss: 2.1009 (2.7063)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3098 (1.2490)  time: 0.2005  data: 0.0006  max mem: 9147
Epoch: [295]  [ 600/1251]  eta: 0:02:21  lr: 0.000004  min_lr: 0.000004  loss: 2.5738 (2.6923)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1092 (1.2471)  time: 0.2238  data: 0.0006  max mem: 9147
Epoch: [295]  [ 800/1251]  eta: 0:01:37  lr: 0.000003  min_lr: 0.000003  loss: 2.0605 (2.6845)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1961 (1.2541)  time: 0.2348  data: 0.0007  max mem: 9147
Epoch: [295]  [1000/1251]  eta: 0:00:53  lr: 0.000003  min_lr: 0.000003  loss: 2.0940 (2.6918)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2610 (1.2577)  time: 0.1943  data: 0.0006  max mem: 9147
Epoch: [295]  [1200/1251]  eta: 0:00:10  lr: 0.000003  min_lr: 0.000003  loss: 2.2134 (2.6935)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2004 (1.2598)  time: 0.2195  data: 0.0007  max mem: 9147
Epoch: [295]  [1250/1251]  eta: 0:00:00  lr: 0.000003  min_lr: 0.000003  loss: 2.9948 (2.6992)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2958 (1.2643)  time: 0.1541  data: 0.0012  max mem: 9147
Epoch: [295] Total time: 0:04:26 (0.2128 s / it)
Averaged stats: lr: 0.000003  min_lr: 0.000003  loss: 2.9948 (2.7185)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2958 (1.2643)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.6562 (0.6562)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 5.5404  data: 5.4461  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.8430 (0.8492)  acc1: 84.0000 (83.0545)  acc5: 96.8000 (96.7273)  time: 0.7167  data: 0.6409  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0771 (1.0308)  acc1: 78.0000 (79.2381)  acc5: 93.2000 (94.6476)  time: 0.1918  data: 0.1197  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1708 (1.0416)  acc1: 77.2000 (78.8960)  acc5: 93.2000 (94.6080)  time: 0.2016  data: 0.1305  max mem: 9147
Test: Total time: 0:00:09 (0.3982 s / it)
* Acc@1 78.988 Acc@5 94.504 loss 1.041
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.11%
Epoch: [296]  [   0/1251]  eta: 1:03:16  lr: 0.000003  min_lr: 0.000003  loss: 1.7420 (1.7420)  weight_decay: 0.0500 (0.0500)  time: 3.0349  data: 1.6355  max mem: 9147
Epoch: [296]  [ 200/1251]  eta: 0:03:53  lr: 0.000003  min_lr: 0.000003  loss: 2.2466 (2.7630)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2560 (1.2620)  time: 0.2008  data: 0.0007  max mem: 9147
Epoch: [296]  [ 400/1251]  eta: 0:02:57  lr: 0.000003  min_lr: 0.000003  loss: 2.2439 (2.7284)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1644 (1.2315)  time: 0.1905  data: 0.0005  max mem: 9147
Epoch: [296]  [ 600/1251]  eta: 0:02:16  lr: 0.000003  min_lr: 0.000003  loss: 2.3291 (2.7418)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2383 (1.2292)  time: 0.2243  data: 0.0006  max mem: 9147
Epoch: [296]  [ 800/1251]  eta: 0:01:35  lr: 0.000002  min_lr: 0.000002  loss: 2.1131 (2.7285)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2903 (1.2596)  time: 0.2342  data: 0.0008  max mem: 9147
Epoch: [296]  [1000/1251]  eta: 0:00:53  lr: 0.000002  min_lr: 0.000002  loss: 2.2593 (2.7305)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1640 (1.2552)  time: 0.2250  data: 0.0008  max mem: 9147
Epoch: [296]  [1200/1251]  eta: 0:00:10  lr: 0.000002  min_lr: 0.000002  loss: 2.3032 (2.7303)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2413 (1.2546)  time: 0.2287  data: 0.0007  max mem: 9147
Epoch: [296]  [1250/1251]  eta: 0:00:00  lr: 0.000002  min_lr: 0.000002  loss: 2.1736 (2.7222)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1944 (1.2561)  time: 0.1435  data: 0.0014  max mem: 9147
Epoch: [296] Total time: 0:04:24 (0.2113 s / it)
Averaged stats: lr: 0.000002  min_lr: 0.000002  loss: 2.1736 (2.7066)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1944 (1.2561)
Test:  [ 0/25]  eta: 0:02:04  loss: 0.6135 (0.6135)  acc1: 89.2000 (89.2000)  acc5: 98.8000 (98.8000)  time: 4.9663  data: 4.8859  max mem: 9147
Test:  [10/25]  eta: 0:00:10  loss: 0.7976 (0.8078)  acc1: 83.2000 (83.2364)  acc5: 96.8000 (96.8364)  time: 0.7290  data: 0.6355  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0148 (0.9821)  acc1: 77.2000 (79.3333)  acc5: 93.6000 (94.6667)  time: 0.2283  data: 0.1449  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1164 (0.9942)  acc1: 77.2000 (78.8800)  acc5: 93.2000 (94.6240)  time: 0.2244  data: 0.1448  max mem: 9147
Test: Total time: 0:00:09 (0.3974 s / it)
* Acc@1 79.062 Acc@5 94.494 loss 0.995
Accuracy of the model on the 50000 test images: 79.1%
Max accuracy: 79.11%
Epoch: [297]  [   0/1251]  eta: 1:07:57  lr: 0.000002  min_lr: 0.000002  loss: 2.1044 (2.1044)  weight_decay: 0.0500 (0.0500)  time: 3.2590  data: 2.2360  max mem: 9147
Epoch: [297]  [ 200/1251]  eta: 0:03:30  lr: 0.000002  min_lr: 0.000002  loss: 2.2584 (2.6765)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2235 (1.2741)  time: 0.1812  data: 0.0006  max mem: 9147
Epoch: [297]  [ 400/1251]  eta: 0:02:51  lr: 0.000002  min_lr: 0.000002  loss: 2.1843 (2.6776)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1476 (1.2592)  time: 0.2156  data: 0.0007  max mem: 9147
Epoch: [297]  [ 600/1251]  eta: 0:02:14  lr: 0.000002  min_lr: 0.000002  loss: 2.0615 (2.6776)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2133 (1.2552)  time: 0.2054  data: 0.0006  max mem: 9147
Epoch: [297]  [ 800/1251]  eta: 0:01:32  lr: 0.000002  min_lr: 0.000002  loss: 2.2284 (2.6858)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2118 (1.2407)  time: 0.1927  data: 0.0006  max mem: 9147
Epoch: [297]  [1000/1251]  eta: 0:00:50  lr: 0.000002  min_lr: 0.000002  loss: 2.2190 (2.7052)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2995 (1.2414)  time: 0.1896  data: 0.0006  max mem: 9147
Epoch: [297]  [1200/1251]  eta: 0:00:10  lr: 0.000002  min_lr: 0.000002  loss: 2.0791 (2.7006)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2558 (1.2444)  time: 0.2205  data: 0.0024  max mem: 9147
Epoch: [297]  [1250/1251]  eta: 0:00:00  lr: 0.000002  min_lr: 0.000002  loss: 2.1947 (2.7028)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2353 (1.2481)  time: 0.1380  data: 0.0015  max mem: 9147
Epoch: [297] Total time: 0:04:14 (0.2034 s / it)
Averaged stats: lr: 0.000002  min_lr: 0.000002  loss: 2.1947 (2.7149)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2353 (1.2481)
Test:  [ 0/25]  eta: 0:02:18  loss: 0.6196 (0.6196)  acc1: 87.6000 (87.6000)  acc5: 98.8000 (98.8000)  time: 5.5393  data: 5.4590  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8095 (0.8109)  acc1: 84.0000 (83.1636)  acc5: 96.8000 (96.7636)  time: 0.7729  data: 0.6895  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0235 (0.9867)  acc1: 78.0000 (79.5429)  acc5: 93.6000 (94.6667)  time: 0.2235  data: 0.1466  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1294 (0.9966)  acc1: 77.2000 (79.1360)  acc5: 93.2000 (94.6400)  time: 0.2223  data: 0.1465  max mem: 9147
Test: Total time: 0:00:10 (0.4145 s / it)
* Acc@1 79.098 Acc@5 94.534 loss 0.997
Accuracy of the model on the 50000 test images: 79.1%
Max accuracy: 79.11%
Epoch: [298]  [   0/1251]  eta: 1:03:45  lr: 0.000002  min_lr: 0.000002  loss: 4.0443 (4.0443)  weight_decay: 0.0500 (0.0500)  time: 3.0582  data: 2.4922  max mem: 9147
Epoch: [298]  [ 200/1251]  eta: 0:03:51  lr: 0.000001  min_lr: 0.000001  loss: 2.1306 (2.6943)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1905 (1.2724)  time: 0.1824  data: 0.0005  max mem: 9147
Epoch: [298]  [ 400/1251]  eta: 0:02:59  lr: 0.000001  min_lr: 0.000001  loss: 3.4585 (2.7541)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1190 (1.2382)  time: 0.2306  data: 0.0010  max mem: 9147
Epoch: [298]  [ 600/1251]  eta: 0:02:17  lr: 0.000001  min_lr: 0.000001  loss: 2.0409 (2.7286)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1688 (1.2323)  time: 0.1969  data: 0.0012  max mem: 9147
Epoch: [298]  [ 800/1251]  eta: 0:01:35  lr: 0.000001  min_lr: 0.000001  loss: 2.5003 (2.7314)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2484 (1.2376)  time: 0.2153  data: 0.0008  max mem: 9147
Epoch: [298]  [1000/1251]  eta: 0:00:53  lr: 0.000001  min_lr: 0.000001  loss: 3.0963 (2.7232)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2077 (1.2435)  time: 0.1933  data: 0.0006  max mem: 9147
Epoch: [298]  [1200/1251]  eta: 0:00:10  lr: 0.000001  min_lr: 0.000001  loss: 2.3084 (2.7266)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2098 (1.2487)  time: 0.2051  data: 0.0008  max mem: 9147
Epoch: [298]  [1250/1251]  eta: 0:00:00  lr: 0.000001  min_lr: 0.000001  loss: 2.2394 (2.7297)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2368 (1.2537)  time: 0.1479  data: 0.0011  max mem: 9147
Epoch: [298] Total time: 0:04:23 (0.2104 s / it)
Averaged stats: lr: 0.000001  min_lr: 0.000001  loss: 2.2394 (2.7144)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2368 (1.2537)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.6946 (0.6946)  acc1: 87.6000 (87.6000)  acc5: 98.8000 (98.8000)  time: 5.7182  data: 5.6153  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8922 (0.8939)  acc1: 83.6000 (83.0909)  acc5: 97.2000 (96.7273)  time: 0.7636  data: 0.6861  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.1147 (1.0766)  acc1: 77.2000 (79.1810)  acc5: 93.2000 (94.6286)  time: 0.2043  data: 0.1321  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.2133 (1.0882)  acc1: 77.2000 (78.7520)  acc5: 92.8000 (94.5920)  time: 0.2031  data: 0.1320  max mem: 9147
Test: Total time: 0:00:10 (0.4068 s / it)
* Acc@1 78.834 Acc@5 94.454 loss 1.087
Accuracy of the model on the 50000 test images: 78.8%
Max accuracy: 79.11%
Epoch: [299]  [   0/1251]  eta: 1:04:56  lr: 0.000001  min_lr: 0.000001  loss: 3.4097 (3.4097)  weight_decay: 0.0500 (0.0500)  time: 3.1151  data: 2.4840  max mem: 9147
Epoch: [299]  [ 200/1251]  eta: 0:03:55  lr: 0.000001  min_lr: 0.000001  loss: 2.0881 (2.7120)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1263 (1.2541)  time: 0.2147  data: 0.0007  max mem: 9147
Epoch: [299]  [ 400/1251]  eta: 0:03:04  lr: 0.000001  min_lr: 0.000001  loss: 2.2006 (2.7165)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2581 (1.2937)  time: 0.2091  data: 0.0006  max mem: 9147
Epoch: [299]  [ 600/1251]  eta: 0:02:16  lr: 0.000001  min_lr: 0.000001  loss: 2.3580 (2.7089)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1604 (1.2704)  time: 0.1892  data: 0.0005  max mem: 9147
Epoch: [299]  [ 800/1251]  eta: 0:01:34  lr: 0.000001  min_lr: 0.000001  loss: 2.4375 (2.7278)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2800 (1.2798)  time: 0.2141  data: 0.0008  max mem: 9147
Epoch: [299]  [1000/1251]  eta: 0:00:52  lr: 0.000001  min_lr: 0.000001  loss: 2.1397 (2.7174)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2021 (1.2867)  time: 0.1952  data: 0.0007  max mem: 9147
Epoch: [299]  [1200/1251]  eta: 0:00:10  lr: 0.000001  min_lr: 0.000001  loss: 2.1435 (2.7195)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1569 (nan)  time: 0.1841  data: 0.0005  max mem: 9147
Epoch: [299]  [1250/1251]  eta: 0:00:00  lr: 0.000001  min_lr: 0.000001  loss: 2.2839 (2.7139)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1568 (nan)  time: 0.1390  data: 0.0013  max mem: 9147
Epoch: [299] Total time: 0:04:15 (0.2044 s / it)
Averaged stats: lr: 0.000001  min_lr: 0.000001  loss: 2.2839 (2.7066)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1568 (nan)
Test:  [ 0/25]  eta: 0:02:22  loss: 0.6513 (0.6513)  acc1: 89.2000 (89.2000)  acc5: 98.8000 (98.8000)  time: 5.7186  data: 5.6373  max mem: 9147
Test:  [10/25]  eta: 0:00:11  loss: 0.8259 (0.8414)  acc1: 84.0000 (83.0909)  acc5: 96.8000 (96.6909)  time: 0.7846  data: 0.6886  max mem: 9147
Test:  [20/25]  eta: 0:00:02  loss: 1.0517 (1.0203)  acc1: 77.6000 (79.4667)  acc5: 94.0000 (94.7238)  time: 0.2285  data: 0.1443  max mem: 9147
Test:  [24/25]  eta: 0:00:00  loss: 1.1650 (1.0312)  acc1: 77.6000 (79.0560)  acc5: 93.6000 (94.6880)  time: 0.2223  data: 0.1443  max mem: 9147
Test: Total time: 0:00:10 (0.4266 s / it)
* Acc@1 79.040 Acc@5 94.540 loss 1.032
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.11%
Training time 22:41:09
