***************
** Arguments **
***************
backbone: 
config_file: configs/trainers/ProDA/vit_b16_ep50_c4_BZ4_ProDA.yaml
dataset_config_file: configs/datasets/sun397.yaml
eval_only: False
head: 
load_epoch: None
model_dir: 
no_train: False
opts: ['DATASET.NUM_SHOTS', '16', 'DATASET.SUBSAMPLE_CLASSES', 'base']
output_dir: output/base2new/train_base/sun397/vit_b16_ep50_c4_BZ4_ProDA/seed1
resume: 
root: /mnt/hdd/DATA
seed: 1
source_domains: None
target_domains: None
trainer: ProDA
transforms: None
************
** Config **
************
DATALOADER:
  K_TRANSFORMS: 1
  NUM_WORKERS: 8
  RETURN_IMG0: False
  TEST:
    BATCH_SIZE: 100
    SAMPLER: SequentialSampler
  TRAIN_U:
    BATCH_SIZE: 32
    N_DOMAIN: 0
    N_INS: 16
    SAME_AS_X: True
    SAMPLER: RandomSampler
  TRAIN_X:
    BATCH_SIZE: 4
    N_DOMAIN: 0
    N_INS: 16
    SAMPLER: RandomSampler
DATASET:
  ALL_AS_UNLABELED: False
  CIFAR_C_LEVEL: 1
  CIFAR_C_TYPE: 
  NAME: SUN397
  NUM_LABELED: -1
  NUM_SHOTS: 16
  ROOT: /mnt/hdd/DATA
  SOURCE_DOMAINS: ()
  STL10_FOLD: -1
  SUBSAMPLE_CLASSES: base
  TARGET_DOMAINS: ()
  VAL_PERCENT: 0.1
INPUT:
  COLORJITTER_B: 0.4
  COLORJITTER_C: 0.4
  COLORJITTER_H: 0.1
  COLORJITTER_S: 0.4
  CROP_PADDING: 4
  CUTOUT_LEN: 16
  CUTOUT_N: 1
  GB_K: 21
  GB_P: 0.5
  GN_MEAN: 0.0
  GN_STD: 0.15
  INTERPOLATION: bicubic
  NO_TRANSFORM: False
  PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
  PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
  RANDAUGMENT_M: 10
  RANDAUGMENT_N: 2
  RGS_P: 0.2
  RRCROP_SCALE: (0.08, 1.0)
  SIZE: (224, 224)
  TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize')
MODEL:
  BACKBONE:
    NAME: ViT-B/16
    PRETRAINED: True
  HEAD:
    ACTIVATION: relu
    BN: True
    DROPOUT: 0.0
    HIDDEN_LAYERS: ()
    NAME: 
  INIT_WEIGHTS: 
OPTIM:
  ADAM_BETA1: 0.9
  ADAM_BETA2: 0.999
  BASE_LR_MULT: 0.1
  GAMMA: 0.1
  LR: 0.002
  LR_SCHEDULER: cosine
  MAX_EPOCH: 50
  MOMENTUM: 0.9
  NAME: sgd
  NEW_LAYERS: ()
  RMSPROP_ALPHA: 0.99
  SGD_DAMPNING: 0
  SGD_NESTEROV: False
  STAGED_LR: False
  STEPSIZE: (-1,)
  WARMUP_CONS_LR: 1e-05
  WARMUP_EPOCH: 5
  WARMUP_MIN_LR: 1e-05
  WARMUP_RECOUNT: True
  WARMUP_TYPE: constant
  WEIGHT_DECAY: 0.0005
OUTPUT_DIR: output/base2new/train_base/sun397/vit_b16_ep50_c4_BZ4_ProDA/seed1
RESUME: 
SEED: 1
TEST:
  COMPUTE_CMAT: False
  EVALUATOR: Classification
  FINAL_MODEL: last_step
  NO_TEST: False
  PER_CLASS_RESULT: False
  SPLIT: test
TRAIN:
  CHECKPOINT_FREQ: 0
  COUNT_ITER: train_x
  PRINT_FREQ: 20
TRAINER:
  CDAC:
    CLASS_LR_MULTI: 10
    P_THRESH: 0.95
    RAMPUP_COEF: 30
    RAMPUP_ITRS: 1000
    STRONG_TRANSFORMS: ()
    TOPK_MATCH: 5
  COCOOP:
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  COOP:
    CLASS_TOKEN_POSITION: end
    CSC: False
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  CROSSGRAD:
    ALPHA_D: 0.5
    ALPHA_F: 0.5
    EPS_D: 1.0
    EPS_F: 1.0
  DAEL:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DAELDG:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DDAIG:
    ALPHA: 0.5
    CLAMP: False
    CLAMP_MAX: 1.0
    CLAMP_MIN: -1.0
    G_ARCH: 
    LMDA: 0.3
    WARMUP: 0
  DOMAINMIX:
    ALPHA: 1.0
    BETA: 1.0
    TYPE: crossdomain
  ENTMIN:
    LMDA: 0.001
  FIXMATCH:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 1.0
  IVLP:
    CTX_INIT: a photo of a
    N_CTX_TEXT: 2
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_TEXT: 9
    PROMPT_DEPTH_VISION: 9
  M3SDA:
    LMDA: 0.5
    N_STEP_F: 4
  MAPLE:
    CTX_INIT: a photo of a
    N_CTX: 4
    PREC: fp16
    PROMPT_DEPTH: 9
  MCD:
    N_STEP_F: 4
  MEANTEACHER:
    EMA_ALPHA: 0.999
    RAMPUP: 5
    WEIGHT_U: 1.0
  MIXMATCH:
    MIXUP_BETA: 0.75
    RAMPUP: 20000
    TEMP: 2.0
    WEIGHT_U: 100.0
  MME:
    LMDA: 0.1
  NAME: ProDA
  ProDA:
    N_CTX: 4
    N_PROMPT: 32
    PREC: fp16
  SE:
    CONF_THRE: 0.95
    EMA_ALPHA: 0.999
    RAMPUP: 300
  VPT:
    CTX_INIT: a photo of a
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_VISION: 1
USE_CUDA: True
VERBOSE: True
VERSION: 1
Collecting env info ...
** System info **
PyTorch version: 2.2.1+cu121
Is debug build: False
CUDA used to build PyTorch: 12.1
ROCM used to build PyTorch: N/A

OS: Debian GNU/Linux 12 (bookworm) (x86_64)
GCC version: (Debian 12.2.0-14) 12.2.0
Clang version: Could not collect
CMake version: Could not collect
Libc version: glibc-2.36

Python version: 3.11.2 (main, Mar 13 2023, 12:18:29) [GCC 12.2.0] (64-bit runtime)
Python platform: Linux-6.5.13-3-pve-x86_64-with-glibc2.36
Is CUDA available: True
CUDA runtime version: 11.8.89
CUDA_MODULE_LOADING set to: LAZY
GPU models and configuration: 
GPU 0: NVIDIA A800 80GB PCIe
GPU 1: NVIDIA A800 80GB PCIe

Nvidia driver version: 525.147.05
cuDNN version: Could not collect
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True

CPU:
Architecture:                       x86_64
CPU op-mode(s):                     32-bit, 64-bit
Address sizes:                      46 bits physical, 57 bits virtual
Byte Order:                         Little Endian
CPU(s):                             64
On-line CPU(s) list:                18,20,22,23,25-27,29,31,32,34,37,46-49
Off-line CPU(s) list:               0-17,19,21,24,28,30,33,35,36,38-45,50-63
Vendor ID:                          GenuineIntel
Model name:                         Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz
CPU family:                         6
Model:                              106
Thread(s) per core:                 2
Core(s) per socket:                 16
Socket(s):                          2
Stepping:                           6
CPU(s) scaling MHz:                 96%
CPU max MHz:                        3500.0000
CPU min MHz:                        800.0000
BogoMIPS:                           5800.00
Flags:                              fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect wbnoinvd dtherm ida arat pln pts vnmi avx512vbmi umip pku ospke avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid fsrm md_clear pconfig flush_l1d arch_capabilities
Virtualization:                     VT-x
L1d cache:                          1.5 MiB (32 instances)
L1i cache:                          1 MiB (32 instances)
L2 cache:                           40 MiB (32 instances)
L3 cache:                           48 MiB (2 instances)
NUMA node(s):                       2
NUMA node0 CPU(s):                  0-15,32-47
NUMA node1 CPU(s):                  16-31,48-63
Vulnerability Gather data sampling: Vulnerable: No microcode
Vulnerability Itlb multihit:        Not affected
Vulnerability L1tf:                 Not affected
Vulnerability Mds:                  Not affected
Vulnerability Meltdown:             Not affected
Vulnerability Mmio stale data:      Mitigation; Clear CPU buffers; SMT vulnerable
Vulnerability Retbleed:             Not affected
Vulnerability Spec rstack overflow: Not affected
Vulnerability Spec store bypass:    Mitigation; Speculative Store Bypass disabled via prctl
Vulnerability Spectre v1:           Mitigation; usercopy/swapgs barriers and __user pointer sanitization
Vulnerability Spectre v2:           Mitigation; Enhanced / Automatic IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
Vulnerability Srbds:                Not affected
Vulnerability Tsx async abort:      Not affected

Versions of relevant libraries:
[pip3] flake8==3.7.9
[pip3] flake8==3.7.9
[pip3] numpy==1.26.4
[pip3] torch==2.2.1
[pip3] torchaudio==2.2.1
[pip3] torchvision==0.17.1
[pip3] triton==2.2.0
[conda] Could not collect
        Pillow (10.2.0)

Loading trainer: ProDA
Loading dataset: SUN397
Reading split from /mnt/hdd/DATA/sun397/split_zhou_SUN397.json
Loading preprocessed few-shot data from /mnt/hdd/DATA/sun397/split_fewshot/shot_16_shuffled-seed_1.pkl
SUBSAMPLE BASE CLASSES!
Building transform_train
+ random resized crop (size=(224, 224), scale=(0.08, 1.0))
+ random flip
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
Building transform_test
+ resize the smaller edge to 224
+ 224x224 center crop
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
---------  ------
Dataset    SUN397
# classes  199
# train_x  3,184
# val      796
# test     9,950
---------  ------
Loading CLIP (backbone: ViT-B/16)
Building custom CLIP
Turning off gradients in both the image and the text encoder
Parameters to be updated: {'prompt_learner.ctx'}
Loading evaluator: Classification
No checkpoint found, train from scratch
Initialize tensorboard (log_dir=output/base2new/train_base/sun397/vit_b16_ep50_c4_BZ4_ProDA/seed1/tensorboard)
epoch [1/50] batch [20/796] time 0.820 (1.066) data 0.000 (0.035) loss 1.4699 (2.3175) lr 1.0000e-05 eta 11:46:45
epoch [1/50] batch [40/796] time 0.923 (0.959) data 0.000 (0.017) loss 1.6980 (2.2305) lr 1.0000e-05 eta 10:35:33
epoch [1/50] batch [60/796] time 1.530 (1.078) data 0.001 (0.012) loss 1.9585 (2.1472) lr 1.0000e-05 eta 11:53:41
epoch [1/50] batch [80/796] time 1.512 (1.157) data 0.000 (0.009) loss 3.1972 (2.1030) lr 1.0000e-05 eta 12:45:46
epoch [1/50] batch [100/796] time 1.543 (1.191) data 0.000 (0.007) loss 0.8049 (2.1327) lr 1.0000e-05 eta 13:07:57
epoch [1/50] batch [120/796] time 1.104 (1.223) data 0.000 (0.006) loss 2.8514 (2.1459) lr 1.0000e-05 eta 13:28:56
epoch [1/50] batch [140/796] time 1.529 (1.247) data 0.000 (0.005) loss 1.3141 (2.1443) lr 1.0000e-05 eta 13:44:28
epoch [1/50] batch [160/796] time 0.831 (1.254) data 0.000 (0.005) loss 2.7822 (2.1179) lr 1.0000e-05 eta 13:48:34
epoch [1/50] batch [180/796] time 0.923 (1.213) data 0.000 (0.004) loss 4.7610 (2.0985) lr 1.0000e-05 eta 13:20:46
epoch [1/50] batch [200/796] time 0.839 (1.217) data 0.000 (0.004) loss 1.6710 (2.0822) lr 1.0000e-05 eta 13:23:00
epoch [1/50] batch [220/796] time 1.517 (1.209) data 0.000 (0.003) loss 1.4654 (2.0499) lr 1.0000e-05 eta 13:17:48
epoch [1/50] batch [240/796] time 1.538 (1.218) data 0.000 (0.003) loss 1.0989 (2.0201) lr 1.0000e-05 eta 13:22:47
epoch [1/50] batch [260/796] time 0.812 (1.225) data 0.000 (0.003) loss 0.8649 (2.0225) lr 1.0000e-05 eta 13:27:05
epoch [1/50] batch [280/796] time 1.516 (1.236) data 0.000 (0.003) loss 1.2942 (2.0018) lr 1.0000e-05 eta 13:34:14
epoch [1/50] batch [300/796] time 1.535 (1.239) data 0.000 (0.003) loss 2.5594 (2.0086) lr 1.0000e-05 eta 13:35:41
epoch [1/50] batch [320/796] time 1.258 (1.247) data 0.000 (0.002) loss 2.1351 (1.9962) lr 1.0000e-05 eta 13:40:46
epoch [1/50] batch [340/796] time 0.841 (1.242) data 0.000 (0.002) loss 2.6212 (1.9725) lr 1.0000e-05 eta 13:36:29
epoch [1/50] batch [360/796] time 0.813 (1.224) data 0.000 (0.002) loss 0.1620 (1.9762) lr 1.0000e-05 eta 13:24:18
epoch [1/50] batch [380/796] time 0.932 (1.205) data 0.000 (0.002) loss 1.8310 (1.9662) lr 1.0000e-05 eta 13:11:26
epoch [1/50] batch [400/796] time 1.664 (1.224) data 0.000 (0.002) loss 4.1607 (1.9573) lr 1.0000e-05 eta 13:23:57
epoch [1/50] batch [420/796] time 1.676 (1.247) data 0.000 (0.002) loss 4.2579 (1.9535) lr 1.0000e-05 eta 13:38:13
epoch [1/50] batch [440/796] time 1.720 (1.267) data 0.000 (0.002) loss 0.4543 (1.9368) lr 1.0000e-05 eta 13:51:15
epoch [1/50] batch [460/796] time 1.701 (1.268) data 0.000 (0.002) loss 0.7461 (1.9285) lr 1.0000e-05 eta 13:51:41
epoch [1/50] batch [480/796] time 1.701 (1.286) data 0.000 (0.002) loss 0.3000 (1.9221) lr 1.0000e-05 eta 14:02:58
epoch [1/50] batch [500/796] time 1.707 (1.303) data 0.000 (0.002) loss 3.1286 (1.9164) lr 1.0000e-05 eta 14:13:20
epoch [1/50] batch [520/796] time 1.687 (1.318) data 0.000 (0.002) loss 1.6856 (1.9043) lr 1.0000e-05 eta 14:22:41
epoch [1/50] batch [540/796] time 1.688 (1.322) data 0.000 (0.002) loss 0.4705 (1.8942) lr 1.0000e-05 eta 14:24:46
epoch [1/50] batch [560/796] time 1.684 (1.335) data 0.000 (0.001) loss 0.4344 (1.8832) lr 1.0000e-05 eta 14:32:55
epoch [1/50] batch [580/796] time 1.693 (1.347) data 0.000 (0.001) loss 0.9928 (1.8708) lr 1.0000e-05 eta 14:40:40
epoch [1/50] batch [600/796] time 0.839 (1.355) data 0.000 (0.001) loss 1.9608 (1.8754) lr 1.0000e-05 eta 14:45:10
epoch [1/50] batch [620/796] time 1.689 (1.361) data 0.000 (0.001) loss 3.5167 (1.8705) lr 1.0000e-05 eta 14:48:47
epoch [1/50] batch [640/796] time 1.707 (1.371) data 0.000 (0.001) loss 1.6464 (1.8611) lr 1.0000e-05 eta 14:55:05
epoch [1/50] batch [660/796] time 1.683 (1.381) data 0.000 (0.001) loss 1.5276 (1.8508) lr 1.0000e-05 eta 15:00:47
epoch [1/50] batch [680/796] time 1.691 (1.382) data 0.000 (0.001) loss 1.5297 (1.8428) lr 1.0000e-05 eta 15:00:54
epoch [1/50] batch [700/796] time 1.662 (1.390) data 0.000 (0.001) loss 1.7190 (1.8371) lr 1.0000e-05 eta 15:05:57
epoch [1/50] batch [720/796] time 1.695 (1.398) data 0.000 (0.001) loss 0.6619 (1.8314) lr 1.0000e-05 eta 15:10:50
epoch [1/50] batch [740/796] time 1.699 (1.406) data 0.000 (0.001) loss 0.9346 (1.8301) lr 1.0000e-05 eta 15:15:35
epoch [1/50] batch [760/796] time 1.675 (1.407) data 0.000 (0.001) loss 0.5768 (1.8223) lr 1.0000e-05 eta 15:15:14
epoch [1/50] batch [780/796] time 1.686 (1.414) data 0.000 (0.001) loss 0.9569 (1.8212) lr 1.0000e-05 eta 15:19:36
epoch [2/50] batch [20/796] time 0.853 (1.659) data 0.000 (0.033) loss 2.6728 (1.5054) lr 1.0000e-05 eta 17:57:39
epoch [2/50] batch [40/796] time 1.683 (1.602) data 0.000 (0.017) loss 1.5809 (1.4523) lr 1.0000e-05 eta 17:20:31
epoch [2/50] batch [60/796] time 1.697 (1.633) data 0.001 (0.012) loss 1.4004 (1.5629) lr 1.0000e-05 eta 17:39:37
epoch [2/50] batch [80/796] time 1.666 (1.646) data 0.000 (0.009) loss 2.0935 (1.5979) lr 1.0000e-05 eta 17:47:57
epoch [2/50] batch [100/796] time 1.695 (1.614) data 0.000 (0.007) loss 1.9606 (1.5760) lr 1.0000e-05 eta 17:26:26
epoch [2/50] batch [120/796] time 1.702 (1.627) data 0.000 (0.006) loss 2.9827 (1.5634) lr 1.0000e-05 eta 17:34:29
epoch [2/50] batch [140/796] time 1.715 (1.638) data 0.000 (0.005) loss 0.9568 (1.5874) lr 1.0000e-05 eta 17:40:48
epoch [2/50] batch [160/796] time 1.682 (1.645) data 0.000 (0.005) loss 1.6336 (1.5843) lr 1.0000e-05 eta 17:45:07
epoch [2/50] batch [180/796] time 1.410 (1.581) data 0.000 (0.004) loss 0.3213 (1.5780) lr 1.0000e-05 eta 17:02:45
epoch [2/50] batch [200/796] time 0.908 (1.533) data 0.000 (0.004) loss 0.9813 (1.5757) lr 1.0000e-05 eta 16:31:32
epoch [2/50] batch [220/796] time 0.841 (1.472) data 0.000 (0.003) loss 3.0686 (1.6025) lr 1.0000e-05 eta 15:51:45
epoch [2/50] batch [240/796] time 1.249 (1.440) data 0.001 (0.003) loss 1.0423 (1.6075) lr 1.0000e-05 eta 15:30:01
epoch [2/50] batch [260/796] time 0.832 (1.404) data 0.000 (0.003) loss 0.2893 (1.5951) lr 1.0000e-05 eta 15:06:38
epoch [2/50] batch [280/796] time 1.595 (1.380) data 0.001 (0.003) loss 0.8139 (1.5701) lr 1.0000e-05 eta 14:50:29
epoch [2/50] batch [300/796] time 0.841 (1.386) data 0.000 (0.003) loss 0.7720 (1.5650) lr 1.0000e-05 eta 14:54:14
epoch [2/50] batch [320/796] time 1.603 (1.400) data 0.001 (0.002) loss 0.6577 (1.5687) lr 1.0000e-05 eta 15:02:32
epoch [2/50] batch [340/796] time 1.635 (1.406) data 0.000 (0.002) loss 1.5657 (1.5808) lr 1.0000e-05 eta 15:06:04
epoch [2/50] batch [360/796] time 1.621 (1.409) data 0.001 (0.002) loss 1.1566 (1.5644) lr 1.0000e-05 eta 15:07:16
epoch [2/50] batch [380/796] time 1.629 (1.411) data 0.000 (0.002) loss 2.0523 (1.5420) lr 1.0000e-05 eta 15:08:22
epoch [2/50] batch [400/796] time 0.818 (1.413) data 0.000 (0.002) loss 1.0681 (1.5281) lr 1.0000e-05 eta 15:09:23
epoch [2/50] batch [420/796] time 1.599 (1.423) data 0.000 (0.002) loss 3.1328 (1.5474) lr 1.0000e-05 eta 15:15:02
epoch [2/50] batch [440/796] time 1.628 (1.427) data 0.000 (0.002) loss 2.6185 (1.5458) lr 1.0000e-05 eta 15:17:08
epoch [2/50] batch [460/796] time 1.625 (1.428) data 0.000 (0.002) loss 1.1970 (1.5368) lr 1.0000e-05 eta 15:17:31
epoch [2/50] batch [480/796] time 1.601 (1.429) data 0.000 (0.002) loss 0.6239 (1.5356) lr 1.0000e-05 eta 15:17:41
epoch [2/50] batch [500/796] time 1.607 (1.433) data 0.000 (0.002) loss 2.4445 (1.5397) lr 1.0000e-05 eta 15:19:19
epoch [2/50] batch [520/796] time 0.996 (1.438) data 0.000 (0.002) loss 3.1137 (1.5395) lr 1.0000e-05 eta 15:22:38
epoch [2/50] batch [540/796] time 1.629 (1.442) data 0.000 (0.002) loss 1.8143 (1.5397) lr 1.0000e-05 eta 15:24:31
epoch [2/50] batch [560/796] time 1.618 (1.443) data 0.000 (0.002) loss 0.4746 (1.5384) lr 1.0000e-05 eta 15:24:17
epoch [2/50] batch [580/796] time 1.611 (1.445) data 0.000 (0.001) loss 1.6934 (1.5504) lr 1.0000e-05 eta 15:25:22
epoch [2/50] batch [600/796] time 1.609 (1.447) data 0.000 (0.001) loss 1.0726 (1.5451) lr 1.0000e-05 eta 15:26:18
epoch [2/50] batch [620/796] time 0.887 (1.448) data 0.000 (0.001) loss 1.6936 (1.5500) lr 1.0000e-05 eta 15:26:35
epoch [2/50] batch [640/796] time 0.957 (1.433) data 0.000 (0.001) loss 0.5366 (1.5543) lr 1.0000e-05 eta 15:16:28
epoch [2/50] batch [660/796] time 0.830 (1.420) data 0.000 (0.001) loss 0.7729 (1.5493) lr 1.0000e-05 eta 15:07:25
epoch [2/50] batch [680/796] time 1.562 (1.409) data 0.000 (0.001) loss 1.1994 (1.5415) lr 1.0000e-05 eta 15:00:04
epoch [2/50] batch [700/796] time 1.551 (1.407) data 0.000 (0.001) loss 1.2684 (1.5398) lr 1.0000e-05 eta 14:58:25
epoch [2/50] batch [720/796] time 1.554 (1.407) data 0.005 (0.001) loss 0.3743 (1.5482) lr 1.0000e-05 eta 14:57:53
epoch [2/50] batch [740/796] time 0.843 (1.406) data 0.000 (0.001) loss 2.8706 (1.5562) lr 1.0000e-05 eta 14:56:24
epoch [2/50] batch [760/796] time 0.933 (1.391) data 0.000 (0.001) loss 1.1234 (1.5584) lr 1.0000e-05 eta 14:46:49
epoch [2/50] batch [780/796] time 0.831 (1.378) data 0.000 (0.001) loss 3.0529 (1.5671) lr 1.0000e-05 eta 14:37:51
epoch [3/50] batch [20/796] time 0.839 (0.859) data 0.000 (0.027) loss 1.0364 (1.4802) lr 1.0000e-05 eta 9:06:58
epoch [3/50] batch [40/796] time 0.817 (0.844) data 0.000 (0.014) loss 2.6816 (1.5332) lr 1.0000e-05 eta 8:56:51
epoch [3/50] batch [60/796] time 0.841 (0.839) data 0.000 (0.009) loss 1.3885 (1.4477) lr 1.0000e-05 eta 8:53:35
epoch [3/50] batch [80/796] time 0.839 (0.837) data 0.000 (0.007) loss 0.3255 (1.4781) lr 1.0000e-05 eta 8:51:42
epoch [3/50] batch [100/796] time 0.839 (0.836) data 0.000 (0.006) loss 1.8257 (1.4684) lr 1.0000e-05 eta 8:50:49
epoch [3/50] batch [120/796] time 0.817 (0.835) data 0.000 (0.005) loss 2.3233 (1.4966) lr 1.0000e-05 eta 8:50:00
epoch [3/50] batch [140/796] time 0.838 (0.834) data 0.000 (0.004) loss 1.8254 (1.4995) lr 1.0000e-05 eta 8:49:26
epoch [3/50] batch [160/796] time 0.829 (0.834) data 0.000 (0.004) loss 1.0019 (1.4801) lr 1.0000e-05 eta 8:48:45
epoch [3/50] batch [180/796] time 0.830 (0.833) data 0.000 (0.003) loss 1.4777 (1.4740) lr 1.0000e-05 eta 8:48:07
epoch [3/50] batch [200/796] time 0.839 (0.833) data 0.000 (0.003) loss 1.4895 (1.4664) lr 1.0000e-05 eta 8:47:31
epoch [3/50] batch [220/796] time 0.839 (0.833) data 0.000 (0.003) loss 2.8326 (1.4630) lr 1.0000e-05 eta 8:47:08
epoch [3/50] batch [240/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.6724 (1.4744) lr 1.0000e-05 eta 8:46:45
epoch [3/50] batch [260/796] time 0.839 (0.832) data 0.000 (0.002) loss 2.4730 (1.4801) lr 1.0000e-05 eta 8:46:20
epoch [3/50] batch [280/796] time 0.840 (0.832) data 0.000 (0.002) loss 3.8871 (1.5017) lr 1.0000e-05 eta 8:46:00
epoch [3/50] batch [300/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.7408 (1.4931) lr 1.0000e-05 eta 8:45:43
epoch [3/50] batch [320/796] time 0.808 (0.832) data 0.000 (0.002) loss 1.7695 (1.4691) lr 1.0000e-05 eta 8:45:18
epoch [3/50] batch [340/796] time 0.839 (0.832) data 0.000 (0.002) loss 3.9343 (1.4836) lr 1.0000e-05 eta 8:44:58
epoch [3/50] batch [360/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.3465 (1.4778) lr 1.0000e-05 eta 8:44:36
epoch [3/50] batch [380/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.0126 (1.4797) lr 1.0000e-05 eta 8:44:19
epoch [3/50] batch [400/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.7948 (1.4772) lr 1.0000e-05 eta 8:44:02
epoch [3/50] batch [420/796] time 0.840 (0.832) data 0.000 (0.001) loss 3.2981 (1.4838) lr 1.0000e-05 eta 8:43:42
epoch [3/50] batch [440/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.1566 (1.4793) lr 1.0000e-05 eta 8:43:20
epoch [3/50] batch [460/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.1107 (1.4792) lr 1.0000e-05 eta 8:43:01
epoch [3/50] batch [480/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.8938 (1.4694) lr 1.0000e-05 eta 8:42:44
epoch [3/50] batch [500/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.0282 (1.4637) lr 1.0000e-05 eta 8:42:24
epoch [3/50] batch [520/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.5231 (1.4667) lr 1.0000e-05 eta 8:42:07
epoch [3/50] batch [540/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.7328 (1.4577) lr 1.0000e-05 eta 8:41:48
epoch [3/50] batch [560/796] time 0.835 (0.831) data 0.000 (0.001) loss 1.2453 (1.4665) lr 1.0000e-05 eta 8:41:29
epoch [3/50] batch [580/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.8678 (1.4634) lr 1.0000e-05 eta 8:41:11
epoch [3/50] batch [600/796] time 0.817 (0.831) data 0.000 (0.001) loss 2.1527 (1.4626) lr 1.0000e-05 eta 8:40:55
epoch [3/50] batch [620/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.6457 (1.4609) lr 1.0000e-05 eta 8:40:37
epoch [3/50] batch [640/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.1361 (1.4570) lr 1.0000e-05 eta 8:40:21
epoch [3/50] batch [660/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.6732 (1.4594) lr 1.0000e-05 eta 8:40:05
epoch [3/50] batch [680/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.7942 (1.4536) lr 1.0000e-05 eta 8:39:48
epoch [3/50] batch [700/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.7026 (1.4506) lr 1.0000e-05 eta 8:39:29
epoch [3/50] batch [720/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.4821 (1.4508) lr 1.0000e-05 eta 8:39:13
epoch [3/50] batch [740/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.5659 (1.4525) lr 1.0000e-05 eta 8:38:54
epoch [3/50] batch [760/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.2031 (1.4542) lr 1.0000e-05 eta 8:38:37
epoch [3/50] batch [780/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.1011 (1.4516) lr 1.0000e-05 eta 8:38:21
epoch [4/50] batch [20/796] time 0.839 (0.862) data 0.000 (0.028) loss 0.6535 (1.6721) lr 1.0000e-05 eta 8:56:54
epoch [4/50] batch [40/796] time 0.818 (0.847) data 0.000 (0.014) loss 0.8125 (1.4994) lr 1.0000e-05 eta 8:47:35
epoch [4/50] batch [60/796] time 0.839 (0.842) data 0.000 (0.010) loss 1.6696 (1.3987) lr 1.0000e-05 eta 8:44:11
epoch [4/50] batch [80/796] time 0.829 (0.838) data 0.000 (0.007) loss 0.7073 (1.3314) lr 1.0000e-05 eta 8:41:39
epoch [4/50] batch [100/796] time 0.830 (0.837) data 0.000 (0.006) loss 4.6349 (1.4343) lr 1.0000e-05 eta 8:40:36
epoch [4/50] batch [120/796] time 0.830 (0.836) data 0.000 (0.005) loss 0.9833 (1.4208) lr 1.0000e-05 eta 8:39:26
epoch [4/50] batch [140/796] time 0.819 (0.835) data 0.000 (0.004) loss 0.8693 (1.4281) lr 1.0000e-05 eta 8:38:32
epoch [4/50] batch [160/796] time 0.840 (0.835) data 0.000 (0.004) loss 1.1808 (1.4273) lr 1.0000e-05 eta 8:38:14
epoch [4/50] batch [180/796] time 0.840 (0.835) data 0.000 (0.003) loss 0.6420 (1.4285) lr 1.0000e-05 eta 8:37:58
epoch [4/50] batch [200/796] time 0.810 (0.834) data 0.000 (0.003) loss 0.2343 (1.4306) lr 1.0000e-05 eta 8:37:32
epoch [4/50] batch [220/796] time 0.833 (0.834) data 0.000 (0.003) loss 1.0560 (1.4276) lr 1.0000e-05 eta 8:37:12
epoch [4/50] batch [240/796] time 0.840 (0.834) data 0.000 (0.003) loss 2.3385 (1.4233) lr 1.0000e-05 eta 8:36:45
epoch [4/50] batch [260/796] time 0.817 (0.834) data 0.000 (0.002) loss 0.1470 (1.4315) lr 1.0000e-05 eta 8:36:17
epoch [4/50] batch [280/796] time 0.839 (0.833) data 0.000 (0.002) loss 0.6280 (1.4471) lr 1.0000e-05 eta 8:35:48
epoch [4/50] batch [300/796] time 0.838 (0.833) data 0.000 (0.002) loss 1.2748 (1.4298) lr 1.0000e-05 eta 8:35:27
epoch [4/50] batch [320/796] time 0.841 (0.833) data 0.000 (0.002) loss 0.2761 (1.4257) lr 1.0000e-05 eta 8:34:59
epoch [4/50] batch [340/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.3638 (1.4116) lr 1.0000e-05 eta 8:34:37
epoch [4/50] batch [360/796] time 0.820 (0.833) data 0.000 (0.002) loss 0.7027 (1.4073) lr 1.0000e-05 eta 8:34:16
epoch [4/50] batch [380/796] time 0.814 (0.833) data 0.000 (0.002) loss 0.8366 (1.4092) lr 1.0000e-05 eta 8:33:53
epoch [4/50] batch [400/796] time 0.809 (0.832) data 0.000 (0.002) loss 1.5525 (1.4077) lr 1.0000e-05 eta 8:33:29
epoch [4/50] batch [420/796] time 0.840 (0.832) data 0.000 (0.002) loss 0.8008 (1.4145) lr 1.0000e-05 eta 8:33:12
epoch [4/50] batch [440/796] time 0.842 (0.832) data 0.000 (0.001) loss 0.5545 (1.4261) lr 1.0000e-05 eta 8:32:55
epoch [4/50] batch [460/796] time 0.842 (0.832) data 0.000 (0.001) loss 2.6962 (1.4379) lr 1.0000e-05 eta 8:32:37
epoch [4/50] batch [480/796] time 0.841 (0.832) data 0.000 (0.001) loss 1.5042 (1.4414) lr 1.0000e-05 eta 8:32:23
epoch [4/50] batch [500/796] time 0.840 (0.832) data 0.000 (0.001) loss 1.5443 (1.4415) lr 1.0000e-05 eta 8:32:04
epoch [4/50] batch [520/796] time 0.831 (0.832) data 0.000 (0.001) loss 0.6662 (1.4381) lr 1.0000e-05 eta 8:31:46
epoch [4/50] batch [540/796] time 0.830 (0.832) data 0.000 (0.001) loss 0.2330 (1.4260) lr 1.0000e-05 eta 8:31:28
epoch [4/50] batch [560/796] time 0.819 (0.832) data 0.000 (0.001) loss 0.6044 (1.4176) lr 1.0000e-05 eta 8:31:09
epoch [4/50] batch [580/796] time 0.838 (0.832) data 0.000 (0.001) loss 2.2892 (1.4183) lr 1.0000e-05 eta 8:30:55
epoch [4/50] batch [600/796] time 0.819 (0.832) data 0.000 (0.001) loss 0.7511 (1.4109) lr 1.0000e-05 eta 8:30:35
epoch [4/50] batch [620/796] time 0.831 (0.832) data 0.000 (0.001) loss 0.4312 (1.4120) lr 1.0000e-05 eta 8:30:20
epoch [4/50] batch [640/796] time 0.836 (0.832) data 0.000 (0.001) loss 0.7766 (1.4064) lr 1.0000e-05 eta 8:30:02
epoch [4/50] batch [660/796] time 0.843 (0.832) data 0.000 (0.001) loss 1.9602 (1.4036) lr 1.0000e-05 eta 8:29:48
epoch [4/50] batch [680/796] time 0.821 (0.832) data 0.000 (0.001) loss 1.0010 (1.3926) lr 1.0000e-05 eta 8:29:35
epoch [4/50] batch [700/796] time 0.839 (0.832) data 0.000 (0.001) loss 1.4926 (1.3946) lr 1.0000e-05 eta 8:29:20
epoch [4/50] batch [720/796] time 0.843 (0.832) data 0.000 (0.001) loss 1.7411 (1.3994) lr 1.0000e-05 eta 8:28:59
epoch [4/50] batch [740/796] time 0.821 (0.832) data 0.000 (0.001) loss 0.7849 (1.3970) lr 1.0000e-05 eta 8:28:42
epoch [4/50] batch [760/796] time 0.819 (0.832) data 0.000 (0.001) loss 1.0367 (1.3954) lr 1.0000e-05 eta 8:28:24
epoch [4/50] batch [780/796] time 0.821 (0.832) data 0.000 (0.001) loss 0.1307 (1.3944) lr 1.0000e-05 eta 8:28:09
epoch [5/50] batch [20/796] time 0.818 (0.858) data 0.000 (0.028) loss 1.4331 (1.2922) lr 1.0000e-05 eta 8:43:21
epoch [5/50] batch [40/796] time 0.841 (0.844) data 0.000 (0.014) loss 0.4203 (1.2490) lr 1.0000e-05 eta 8:34:46
epoch [5/50] batch [60/796] time 0.833 (0.840) data 0.000 (0.009) loss 0.3315 (1.2632) lr 1.0000e-05 eta 8:31:51
epoch [5/50] batch [80/796] time 0.829 (0.838) data 0.000 (0.007) loss 0.4495 (1.2597) lr 1.0000e-05 eta 8:30:02
epoch [5/50] batch [100/796] time 0.839 (0.837) data 0.000 (0.006) loss 0.7414 (1.2601) lr 1.0000e-05 eta 8:29:14
epoch [5/50] batch [120/796] time 0.839 (0.836) data 0.000 (0.005) loss 1.0465 (1.2222) lr 1.0000e-05 eta 8:28:22
epoch [5/50] batch [140/796] time 0.830 (0.835) data 0.000 (0.004) loss 0.3364 (1.2254) lr 1.0000e-05 eta 8:27:37
epoch [5/50] batch [160/796] time 0.832 (0.834) data 0.000 (0.004) loss 1.7712 (1.2220) lr 1.0000e-05 eta 8:26:52
epoch [5/50] batch [180/796] time 0.831 (0.834) data 0.000 (0.003) loss 0.6660 (1.2992) lr 1.0000e-05 eta 8:26:28
epoch [5/50] batch [200/796] time 0.842 (0.834) data 0.000 (0.003) loss 1.9404 (1.2587) lr 1.0000e-05 eta 8:26:01
epoch [5/50] batch [220/796] time 0.831 (0.833) data 0.000 (0.003) loss 2.0015 (1.2746) lr 1.0000e-05 eta 8:25:27
epoch [5/50] batch [240/796] time 0.841 (0.833) data 0.000 (0.002) loss 2.4349 (1.2904) lr 1.0000e-05 eta 8:25:02
epoch [5/50] batch [260/796] time 0.819 (0.833) data 0.000 (0.002) loss 2.2803 (1.3009) lr 1.0000e-05 eta 8:24:42
epoch [5/50] batch [280/796] time 0.829 (0.833) data 0.000 (0.002) loss 0.6732 (1.2806) lr 1.0000e-05 eta 8:24:19
epoch [5/50] batch [300/796] time 0.830 (0.833) data 0.000 (0.002) loss 1.8081 (1.2822) lr 1.0000e-05 eta 8:24:03
epoch [5/50] batch [320/796] time 0.830 (0.833) data 0.000 (0.002) loss 0.6647 (1.2779) lr 1.0000e-05 eta 8:23:40
epoch [5/50] batch [340/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.5651 (1.2963) lr 1.0000e-05 eta 8:23:15
epoch [5/50] batch [360/796] time 0.810 (0.832) data 0.000 (0.002) loss 0.3410 (1.3072) lr 1.0000e-05 eta 8:22:49
epoch [5/50] batch [380/796] time 0.839 (0.832) data 0.000 (0.002) loss 2.1628 (1.3031) lr 1.0000e-05 eta 8:22:28
epoch [5/50] batch [400/796] time 0.819 (0.832) data 0.000 (0.002) loss 1.5585 (1.3229) lr 1.0000e-05 eta 8:22:04
epoch [5/50] batch [420/796] time 0.819 (0.832) data 0.000 (0.001) loss 3.0206 (1.3259) lr 1.0000e-05 eta 8:21:40
epoch [5/50] batch [440/796] time 0.829 (0.832) data 0.000 (0.001) loss 0.4789 (1.3173) lr 1.0000e-05 eta 8:21:20
epoch [5/50] batch [460/796] time 0.838 (0.832) data 0.000 (0.001) loss 1.8564 (1.3188) lr 1.0000e-05 eta 8:21:03
epoch [5/50] batch [480/796] time 0.839 (0.831) data 0.000 (0.001) loss 2.0513 (1.3212) lr 1.0000e-05 eta 8:20:44
epoch [5/50] batch [500/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.7980 (1.3289) lr 1.0000e-05 eta 8:20:27
epoch [5/50] batch [520/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.6473 (1.3194) lr 1.0000e-05 eta 8:20:11
epoch [5/50] batch [540/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.3789 (1.3117) lr 1.0000e-05 eta 8:19:53
epoch [5/50] batch [560/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.8993 (1.2991) lr 1.0000e-05 eta 8:19:33
epoch [5/50] batch [580/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.2648 (1.3004) lr 1.0000e-05 eta 8:19:14
epoch [5/50] batch [600/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.9533 (1.2948) lr 1.0000e-05 eta 8:18:56
epoch [5/50] batch [620/796] time 0.819 (0.831) data 0.000 (0.001) loss 1.3428 (1.2938) lr 1.0000e-05 eta 8:18:38
epoch [5/50] batch [640/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.5860 (1.3090) lr 1.0000e-05 eta 8:18:20
epoch [5/50] batch [660/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.3583 (1.3097) lr 1.0000e-05 eta 8:18:02
epoch [5/50] batch [680/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.5327 (1.3131) lr 1.0000e-05 eta 8:17:44
epoch [5/50] batch [700/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.0934 (1.3138) lr 1.0000e-05 eta 8:17:27
epoch [5/50] batch [720/796] time 0.830 (0.831) data 0.000 (0.001) loss 2.2170 (1.3306) lr 1.0000e-05 eta 8:17:09
epoch [5/50] batch [740/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.8699 (1.3311) lr 1.0000e-05 eta 8:16:53
epoch [5/50] batch [760/796] time 0.831 (0.831) data 0.000 (0.001) loss 1.3372 (1.3392) lr 1.0000e-05 eta 8:16:34
epoch [5/50] batch [780/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.8666 (1.3419) lr 1.0000e-05 eta 8:16:19
epoch [6/50] batch [20/796] time 0.838 (0.858) data 0.000 (0.028) loss 1.0886 (1.6141) lr 2.0000e-03 eta 8:31:59
epoch [6/50] batch [40/796] time 0.842 (0.845) data 0.000 (0.014) loss 1.8748 (1.7394) lr 2.0000e-03 eta 8:23:44
epoch [6/50] batch [60/796] time 0.839 (0.840) data 0.000 (0.009) loss 1.7242 (1.7070) lr 2.0000e-03 eta 8:20:50
epoch [6/50] batch [80/796] time 0.829 (0.838) data 0.000 (0.007) loss 2.4802 (1.6913) lr 2.0000e-03 eta 8:19:06
epoch [6/50] batch [100/796] time 0.818 (0.836) data 0.000 (0.006) loss 0.6306 (1.5981) lr 2.0000e-03 eta 8:17:47
epoch [6/50] batch [120/796] time 0.838 (0.835) data 0.000 (0.005) loss 0.8009 (1.5630) lr 2.0000e-03 eta 8:16:48
epoch [6/50] batch [140/796] time 0.839 (0.835) data 0.000 (0.004) loss 2.2595 (1.5392) lr 2.0000e-03 eta 8:16:17
epoch [6/50] batch [160/796] time 0.817 (0.834) data 0.000 (0.004) loss 0.6277 (1.5514) lr 2.0000e-03 eta 8:15:41
epoch [6/50] batch [180/796] time 0.839 (0.834) data 0.000 (0.003) loss 1.1932 (1.5092) lr 2.0000e-03 eta 8:15:10
epoch [6/50] batch [200/796] time 0.840 (0.833) data 0.000 (0.003) loss 2.3837 (1.4629) lr 2.0000e-03 eta 8:14:47
epoch [6/50] batch [220/796] time 0.829 (0.833) data 0.000 (0.003) loss 1.9088 (1.4287) lr 2.0000e-03 eta 8:14:20
epoch [6/50] batch [240/796] time 0.826 (0.833) data 0.000 (0.002) loss 0.7970 (1.4257) lr 2.0000e-03 eta 8:14:01
epoch [6/50] batch [260/796] time 0.810 (0.833) data 0.000 (0.002) loss 0.2657 (1.4077) lr 2.0000e-03 eta 8:13:32
epoch [6/50] batch [280/796] time 0.839 (0.833) data 0.000 (0.002) loss 0.1556 (1.4062) lr 2.0000e-03 eta 8:13:12
epoch [6/50] batch [300/796] time 0.830 (0.832) data 0.000 (0.002) loss 1.2724 (1.4044) lr 2.0000e-03 eta 8:12:47
epoch [6/50] batch [320/796] time 0.834 (0.832) data 0.000 (0.002) loss 1.9345 (1.3957) lr 2.0000e-03 eta 8:12:25
epoch [6/50] batch [340/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.9406 (1.3775) lr 2.0000e-03 eta 8:12:03
epoch [6/50] batch [360/796] time 0.818 (0.832) data 0.000 (0.002) loss 1.4988 (1.3847) lr 2.0000e-03 eta 8:11:41
epoch [6/50] batch [380/796] time 0.817 (0.832) data 0.000 (0.002) loss 1.3976 (1.3749) lr 2.0000e-03 eta 8:11:19
epoch [6/50] batch [400/796] time 0.816 (0.832) data 0.000 (0.002) loss 1.2185 (1.3663) lr 2.0000e-03 eta 8:11:02
epoch [6/50] batch [420/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.6264 (1.3601) lr 2.0000e-03 eta 8:10:44
epoch [6/50] batch [440/796] time 0.819 (0.832) data 0.000 (0.001) loss 2.7053 (1.3646) lr 2.0000e-03 eta 8:10:27
epoch [6/50] batch [460/796] time 0.818 (0.832) data 0.000 (0.001) loss 1.0890 (1.3577) lr 2.0000e-03 eta 8:10:06
epoch [6/50] batch [480/796] time 0.819 (0.832) data 0.000 (0.001) loss 0.2214 (1.3457) lr 2.0000e-03 eta 8:09:51
epoch [6/50] batch [500/796] time 0.832 (0.832) data 0.000 (0.001) loss 1.0298 (1.3446) lr 2.0000e-03 eta 8:09:38
epoch [6/50] batch [520/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.7085 (1.3393) lr 2.0000e-03 eta 8:09:22
epoch [6/50] batch [540/796] time 0.844 (0.832) data 0.000 (0.001) loss 0.9920 (1.3243) lr 2.0000e-03 eta 8:09:05
epoch [6/50] batch [560/796] time 0.819 (0.832) data 0.000 (0.001) loss 1.0552 (1.3276) lr 2.0000e-03 eta 8:08:47
epoch [6/50] batch [580/796] time 0.831 (0.832) data 0.000 (0.001) loss 1.4568 (1.3184) lr 2.0000e-03 eta 8:08:29
epoch [6/50] batch [600/796] time 0.831 (0.832) data 0.000 (0.001) loss 0.9327 (1.3113) lr 2.0000e-03 eta 8:08:15
epoch [6/50] batch [620/796] time 0.831 (0.832) data 0.000 (0.001) loss 0.2491 (1.3009) lr 2.0000e-03 eta 8:08:01
epoch [6/50] batch [640/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.6171 (1.2925) lr 2.0000e-03 eta 8:07:44
epoch [6/50] batch [660/796] time 0.839 (0.832) data 0.000 (0.001) loss 1.7412 (1.2920) lr 2.0000e-03 eta 8:07:27
epoch [6/50] batch [680/796] time 0.829 (0.832) data 0.000 (0.001) loss 0.2829 (1.2889) lr 2.0000e-03 eta 8:07:12
epoch [6/50] batch [700/796] time 0.831 (0.832) data 0.000 (0.001) loss 0.5204 (1.2823) lr 2.0000e-03 eta 8:06:52
epoch [6/50] batch [720/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.5499 (1.2772) lr 2.0000e-03 eta 8:06:37
epoch [6/50] batch [740/796] time 0.830 (0.832) data 0.000 (0.001) loss 0.4958 (1.2732) lr 2.0000e-03 eta 8:06:20
epoch [6/50] batch [760/796] time 0.830 (0.832) data 0.000 (0.001) loss 2.0994 (1.2664) lr 2.0000e-03 eta 8:06:02
epoch [6/50] batch [780/796] time 0.809 (0.832) data 0.000 (0.001) loss 1.1624 (1.2683) lr 2.0000e-03 eta 8:05:43
epoch [7/50] batch [20/796] time 0.829 (0.859) data 0.000 (0.027) loss 0.1346 (1.1491) lr 1.9980e-03 eta 8:21:11
epoch [7/50] batch [40/796] time 0.830 (0.844) data 0.000 (0.013) loss 1.3422 (1.1860) lr 1.9980e-03 eta 8:12:23
epoch [7/50] batch [60/796] time 0.829 (0.840) data 0.000 (0.009) loss 0.0676 (1.1401) lr 1.9980e-03 eta 8:09:45
epoch [7/50] batch [80/796] time 0.809 (0.838) data 0.000 (0.007) loss 0.1806 (1.1932) lr 1.9980e-03 eta 8:07:51
epoch [7/50] batch [100/796] time 0.838 (0.837) data 0.000 (0.005) loss 0.1221 (1.1735) lr 1.9980e-03 eta 8:06:59
epoch [7/50] batch [120/796] time 0.839 (0.836) data 0.000 (0.005) loss 0.6390 (1.1051) lr 1.9980e-03 eta 8:06:08
epoch [7/50] batch [140/796] time 0.840 (0.835) data 0.000 (0.004) loss 0.8112 (1.1127) lr 1.9980e-03 eta 8:05:24
epoch [7/50] batch [160/796] time 0.831 (0.834) data 0.000 (0.003) loss 1.7498 (1.1597) lr 1.9980e-03 eta 8:04:49
epoch [7/50] batch [180/796] time 0.841 (0.835) data 0.000 (0.003) loss 1.9529 (1.1369) lr 1.9980e-03 eta 8:04:38
epoch [7/50] batch [200/796] time 0.820 (0.834) data 0.000 (0.003) loss 3.4667 (1.1387) lr 1.9980e-03 eta 8:04:00
epoch [7/50] batch [220/796] time 0.839 (0.833) data 0.000 (0.003) loss 0.4831 (1.1301) lr 1.9980e-03 eta 8:03:28
epoch [7/50] batch [240/796] time 0.829 (0.833) data 0.000 (0.002) loss 0.8799 (1.1336) lr 1.9980e-03 eta 8:03:04
epoch [7/50] batch [260/796] time 0.818 (0.833) data 0.000 (0.002) loss 1.8459 (1.1408) lr 1.9980e-03 eta 8:02:46
epoch [7/50] batch [280/796] time 0.817 (0.833) data 0.000 (0.002) loss 0.8173 (1.1560) lr 1.9980e-03 eta 8:02:16
epoch [7/50] batch [300/796] time 0.830 (0.833) data 0.000 (0.002) loss 1.5644 (1.1470) lr 1.9980e-03 eta 8:01:50
epoch [7/50] batch [320/796] time 0.819 (0.832) data 0.000 (0.002) loss 1.3980 (1.1580) lr 1.9980e-03 eta 8:01:30
epoch [7/50] batch [340/796] time 0.808 (0.832) data 0.000 (0.002) loss 0.3927 (1.1404) lr 1.9980e-03 eta 8:01:08
epoch [7/50] batch [360/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.2737 (1.1343) lr 1.9980e-03 eta 8:00:46
epoch [7/50] batch [380/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.4470 (1.1331) lr 1.9980e-03 eta 8:00:28
epoch [7/50] batch [400/796] time 0.819 (0.832) data 0.000 (0.002) loss 0.2851 (1.1409) lr 1.9980e-03 eta 8:00:08
epoch [7/50] batch [420/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.8186 (1.1389) lr 1.9980e-03 eta 7:59:47
epoch [7/50] batch [440/796] time 0.817 (0.832) data 0.000 (0.001) loss 0.9962 (1.1423) lr 1.9980e-03 eta 7:59:27
epoch [7/50] batch [460/796] time 0.819 (0.832) data 0.000 (0.001) loss 0.0335 (1.1397) lr 1.9980e-03 eta 7:59:03
epoch [7/50] batch [480/796] time 0.821 (0.832) data 0.000 (0.001) loss 0.1084 (1.1331) lr 1.9980e-03 eta 7:58:47
epoch [7/50] batch [500/796] time 0.817 (0.832) data 0.000 (0.001) loss 0.5892 (1.1236) lr 1.9980e-03 eta 7:58:26
epoch [7/50] batch [520/796] time 0.843 (0.832) data 0.000 (0.001) loss 0.4681 (1.1331) lr 1.9980e-03 eta 7:58:12
epoch [7/50] batch [540/796] time 0.841 (0.832) data 0.000 (0.001) loss 0.2523 (1.1270) lr 1.9980e-03 eta 7:57:57
epoch [7/50] batch [560/796] time 0.841 (0.832) data 0.000 (0.001) loss 0.6191 (1.1212) lr 1.9980e-03 eta 7:57:42
epoch [7/50] batch [580/796] time 0.841 (0.832) data 0.000 (0.001) loss 0.3342 (1.1181) lr 1.9980e-03 eta 7:57:28
epoch [7/50] batch [600/796] time 0.830 (0.832) data 0.000 (0.001) loss 0.2790 (1.1141) lr 1.9980e-03 eta 7:57:11
epoch [7/50] batch [620/796] time 0.840 (0.832) data 0.000 (0.001) loss 1.0660 (1.1137) lr 1.9980e-03 eta 7:56:57
epoch [7/50] batch [640/796] time 0.819 (0.832) data 0.000 (0.001) loss 2.8404 (1.1143) lr 1.9980e-03 eta 7:56:40
epoch [7/50] batch [660/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.3458 (1.1043) lr 1.9980e-03 eta 7:56:26
epoch [7/50] batch [680/796] time 0.819 (0.832) data 0.000 (0.001) loss 0.1311 (1.1125) lr 1.9980e-03 eta 7:56:09
epoch [7/50] batch [700/796] time 0.810 (0.832) data 0.000 (0.001) loss 1.8582 (1.1159) lr 1.9980e-03 eta 7:55:52
epoch [7/50] batch [720/796] time 0.825 (0.832) data 0.000 (0.001) loss 0.7390 (1.1116) lr 1.9980e-03 eta 7:55:35
epoch [7/50] batch [740/796] time 0.819 (0.832) data 0.000 (0.001) loss 0.3975 (1.1173) lr 1.9980e-03 eta 7:55:18
epoch [7/50] batch [760/796] time 0.830 (0.832) data 0.000 (0.001) loss 0.6553 (1.1217) lr 1.9980e-03 eta 7:55:00
epoch [7/50] batch [780/796] time 0.831 (0.832) data 0.000 (0.001) loss 0.6520 (1.1219) lr 1.9980e-03 eta 7:54:42
epoch [8/50] batch [20/796] time 0.820 (0.864) data 0.000 (0.030) loss 0.8489 (1.0285) lr 1.9921e-03 eta 8:12:24
epoch [8/50] batch [40/796] time 0.839 (0.847) data 0.000 (0.015) loss 0.7263 (0.9736) lr 1.9921e-03 eta 8:02:52
epoch [8/50] batch [60/796] time 0.830 (0.842) data 0.000 (0.010) loss 1.9936 (0.9514) lr 1.9921e-03 eta 7:59:35
epoch [8/50] batch [80/796] time 0.829 (0.839) data 0.000 (0.008) loss 0.1889 (0.9852) lr 1.9921e-03 eta 7:57:45
epoch [8/50] batch [100/796] time 0.838 (0.837) data 0.000 (0.006) loss 1.0819 (1.0886) lr 1.9921e-03 eta 7:56:12
epoch [8/50] batch [120/796] time 0.838 (0.836) data 0.000 (0.005) loss 0.3578 (1.1065) lr 1.9921e-03 eta 7:55:16
epoch [8/50] batch [140/796] time 0.830 (0.835) data 0.000 (0.004) loss 1.3302 (1.0873) lr 1.9921e-03 eta 7:54:36
epoch [8/50] batch [160/796] time 0.811 (0.834) data 0.000 (0.004) loss 0.4182 (1.1031) lr 1.9921e-03 eta 7:53:48
epoch [8/50] batch [180/796] time 0.840 (0.834) data 0.000 (0.003) loss 2.4081 (1.0958) lr 1.9921e-03 eta 7:53:25
epoch [8/50] batch [200/796] time 0.830 (0.834) data 0.000 (0.003) loss 1.8565 (1.0937) lr 1.9921e-03 eta 7:53:07
epoch [8/50] batch [220/796] time 0.839 (0.834) data 0.000 (0.003) loss 0.7732 (1.0636) lr 1.9921e-03 eta 7:52:35
epoch [8/50] batch [240/796] time 0.838 (0.834) data 0.000 (0.003) loss 1.4158 (1.0656) lr 1.9921e-03 eta 7:52:10
epoch [8/50] batch [260/796] time 0.818 (0.833) data 0.000 (0.002) loss 1.2958 (1.0687) lr 1.9921e-03 eta 7:51:36
epoch [8/50] batch [280/796] time 0.836 (0.833) data 0.000 (0.002) loss 1.7062 (1.0658) lr 1.9921e-03 eta 7:51:12
epoch [8/50] batch [300/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.3387 (1.0605) lr 1.9921e-03 eta 7:50:53
epoch [8/50] batch [320/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.1018 (1.0622) lr 1.9921e-03 eta 7:50:35
epoch [8/50] batch [340/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.9476 (1.0732) lr 1.9921e-03 eta 7:50:11
epoch [8/50] batch [360/796] time 0.829 (0.832) data 0.000 (0.002) loss 1.2105 (1.0798) lr 1.9921e-03 eta 7:49:49
epoch [8/50] batch [380/796] time 0.838 (0.832) data 0.000 (0.002) loss 2.1594 (1.0811) lr 1.9921e-03 eta 7:49:24
epoch [8/50] batch [400/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.7680 (1.0824) lr 1.9921e-03 eta 7:49:04
epoch [8/50] batch [420/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.6284 (1.0786) lr 1.9921e-03 eta 7:48:39
epoch [8/50] batch [440/796] time 0.819 (0.832) data 0.000 (0.002) loss 1.5953 (1.0965) lr 1.9921e-03 eta 7:48:19
epoch [8/50] batch [460/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.4847 (1.0880) lr 1.9921e-03 eta 7:48:03
epoch [8/50] batch [480/796] time 0.838 (0.832) data 0.000 (0.001) loss 1.8499 (1.0881) lr 1.9921e-03 eta 7:47:48
epoch [8/50] batch [500/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.1817 (1.0898) lr 1.9921e-03 eta 7:47:30
epoch [8/50] batch [520/796] time 0.840 (0.832) data 0.000 (0.001) loss 1.2654 (1.0833) lr 1.9921e-03 eta 7:47:11
epoch [8/50] batch [540/796] time 0.837 (0.832) data 0.000 (0.001) loss 0.6678 (1.0869) lr 1.9921e-03 eta 7:46:54
epoch [8/50] batch [560/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.0451 (1.0826) lr 1.9921e-03 eta 7:46:38
epoch [8/50] batch [580/796] time 0.828 (0.832) data 0.000 (0.001) loss 1.6552 (1.0843) lr 1.9921e-03 eta 7:46:20
epoch [8/50] batch [600/796] time 0.811 (0.832) data 0.000 (0.001) loss 1.2339 (1.0751) lr 1.9921e-03 eta 7:46:02
epoch [8/50] batch [620/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.0972 (1.0698) lr 1.9921e-03 eta 7:45:44
epoch [8/50] batch [640/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.2669 (1.0733) lr 1.9921e-03 eta 7:45:26
epoch [8/50] batch [660/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.1267 (1.0789) lr 1.9921e-03 eta 7:45:07
epoch [8/50] batch [680/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.2512 (1.0691) lr 1.9921e-03 eta 7:44:51
epoch [8/50] batch [700/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.2406 (1.0719) lr 1.9921e-03 eta 7:44:33
epoch [8/50] batch [720/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.0212 (1.0700) lr 1.9921e-03 eta 7:44:19
epoch [8/50] batch [740/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.4827 (1.0610) lr 1.9921e-03 eta 7:44:00
epoch [8/50] batch [760/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.9839 (1.0582) lr 1.9921e-03 eta 7:43:42
epoch [8/50] batch [780/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.1350 (1.0545) lr 1.9921e-03 eta 7:43:26
epoch [9/50] batch [20/796] time 0.830 (0.858) data 0.000 (0.027) loss 1.0608 (1.2531) lr 1.9823e-03 eta 7:57:56
epoch [9/50] batch [40/796] time 0.838 (0.844) data 0.000 (0.014) loss 0.6932 (1.0934) lr 1.9823e-03 eta 7:49:59
epoch [9/50] batch [60/796] time 0.818 (0.839) data 0.000 (0.009) loss 0.8039 (1.0038) lr 1.9823e-03 eta 7:46:53
epoch [9/50] batch [80/796] time 0.812 (0.837) data 0.000 (0.007) loss 1.6547 (0.9601) lr 1.9823e-03 eta 7:45:30
epoch [9/50] batch [100/796] time 0.818 (0.836) data 0.000 (0.006) loss 1.2569 (0.9672) lr 1.9823e-03 eta 7:44:34
epoch [9/50] batch [120/796] time 0.840 (0.835) data 0.000 (0.005) loss 1.2677 (0.9591) lr 1.9823e-03 eta 7:43:43
epoch [9/50] batch [140/796] time 0.839 (0.835) data 0.000 (0.004) loss 0.7770 (0.9792) lr 1.9823e-03 eta 7:43:03
epoch [9/50] batch [160/796] time 0.830 (0.834) data 0.000 (0.004) loss 0.3711 (1.0202) lr 1.9823e-03 eta 7:42:22
epoch [9/50] batch [180/796] time 0.840 (0.833) data 0.000 (0.003) loss 1.0241 (1.0134) lr 1.9823e-03 eta 7:41:55
epoch [9/50] batch [200/796] time 0.817 (0.833) data 0.000 (0.003) loss 1.4874 (1.0159) lr 1.9823e-03 eta 7:41:24
epoch [9/50] batch [220/796] time 0.829 (0.833) data 0.000 (0.003) loss 0.8707 (1.0131) lr 1.9823e-03 eta 7:41:08
epoch [9/50] batch [240/796] time 0.839 (0.833) data 0.000 (0.002) loss 1.3447 (1.0101) lr 1.9823e-03 eta 7:40:42
epoch [9/50] batch [260/796] time 0.829 (0.833) data 0.000 (0.002) loss 0.3904 (1.0095) lr 1.9823e-03 eta 7:40:18
epoch [9/50] batch [280/796] time 0.832 (0.832) data 0.000 (0.002) loss 3.7597 (0.9987) lr 1.9823e-03 eta 7:39:56
epoch [9/50] batch [300/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.3483 (0.9991) lr 1.9823e-03 eta 7:39:29
epoch [9/50] batch [320/796] time 0.819 (0.832) data 0.000 (0.002) loss 2.2320 (1.0085) lr 1.9823e-03 eta 7:39:08
epoch [9/50] batch [340/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.8113 (1.0129) lr 1.9823e-03 eta 7:38:48
epoch [9/50] batch [360/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.8414 (1.0290) lr 1.9823e-03 eta 7:38:32
epoch [9/50] batch [380/796] time 0.840 (0.832) data 0.000 (0.002) loss 1.4255 (1.0401) lr 1.9823e-03 eta 7:38:15
epoch [9/50] batch [400/796] time 0.848 (0.832) data 0.000 (0.002) loss 0.8054 (1.0339) lr 1.9823e-03 eta 7:38:00
epoch [9/50] batch [420/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.6543 (1.0197) lr 1.9823e-03 eta 7:37:43
epoch [9/50] batch [440/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.8762 (1.0278) lr 1.9823e-03 eta 7:37:28
epoch [9/50] batch [460/796] time 0.819 (0.832) data 0.000 (0.001) loss 2.0221 (1.0259) lr 1.9823e-03 eta 7:37:09
epoch [9/50] batch [480/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.6503 (1.0129) lr 1.9823e-03 eta 7:36:51
epoch [9/50] batch [500/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.9195 (1.0001) lr 1.9823e-03 eta 7:36:34
epoch [9/50] batch [520/796] time 0.839 (0.832) data 0.000 (0.001) loss 2.0971 (1.0029) lr 1.9823e-03 eta 7:36:17
epoch [9/50] batch [540/796] time 0.839 (0.832) data 0.000 (0.001) loss 2.6813 (1.0112) lr 1.9823e-03 eta 7:36:02
epoch [9/50] batch [560/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.1749 (1.0085) lr 1.9823e-03 eta 7:35:43
epoch [9/50] batch [580/796] time 0.842 (0.832) data 0.000 (0.001) loss 1.0596 (1.0113) lr 1.9823e-03 eta 7:35:24
epoch [9/50] batch [600/796] time 0.839 (0.832) data 0.000 (0.001) loss 1.1773 (1.0122) lr 1.9823e-03 eta 7:35:08
epoch [9/50] batch [620/796] time 0.830 (0.832) data 0.000 (0.001) loss 0.6281 (1.0117) lr 1.9823e-03 eta 7:34:51
epoch [9/50] batch [640/796] time 0.831 (0.832) data 0.000 (0.001) loss 1.0048 (1.0143) lr 1.9823e-03 eta 7:34:34
epoch [9/50] batch [660/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.1485 (1.0178) lr 1.9823e-03 eta 7:34:15
epoch [9/50] batch [680/796] time 0.831 (0.832) data 0.000 (0.001) loss 1.0791 (1.0105) lr 1.9823e-03 eta 7:33:54
epoch [9/50] batch [700/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.4620 (1.0135) lr 1.9823e-03 eta 7:33:39
epoch [9/50] batch [720/796] time 0.830 (0.832) data 0.000 (0.001) loss 0.5148 (1.0077) lr 1.9823e-03 eta 7:33:24
epoch [9/50] batch [740/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.3019 (1.0097) lr 1.9823e-03 eta 7:33:09
epoch [9/50] batch [760/796] time 0.831 (0.832) data 0.000 (0.001) loss 0.8917 (1.0053) lr 1.9823e-03 eta 7:32:52
epoch [9/50] batch [780/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.3877 (1.0042) lr 1.9823e-03 eta 7:32:35
epoch [10/50] batch [20/796] time 0.831 (0.856) data 0.000 (0.026) loss 1.3039 (1.1783) lr 1.9686e-03 eta 7:45:20
epoch [10/50] batch [40/796] time 0.840 (0.843) data 0.000 (0.013) loss 1.1591 (1.1066) lr 1.9686e-03 eta 7:37:57
epoch [10/50] batch [60/796] time 0.831 (0.839) data 0.000 (0.009) loss 0.7850 (1.2231) lr 1.9686e-03 eta 7:35:17
epoch [10/50] batch [80/796] time 0.832 (0.837) data 0.000 (0.007) loss 0.4333 (1.1848) lr 1.9686e-03 eta 7:33:55
epoch [10/50] batch [100/796] time 0.841 (0.835) data 0.000 (0.005) loss 0.5109 (1.1170) lr 1.9686e-03 eta 7:33:03
epoch [10/50] batch [120/796] time 0.840 (0.835) data 0.000 (0.004) loss 0.4559 (1.0999) lr 1.9686e-03 eta 7:32:46
epoch [10/50] batch [140/796] time 0.838 (0.835) data 0.000 (0.004) loss 1.0024 (1.0682) lr 1.9686e-03 eta 7:32:07
epoch [10/50] batch [160/796] time 0.822 (0.834) data 0.000 (0.003) loss 0.2470 (1.0533) lr 1.9686e-03 eta 7:31:34
epoch [10/50] batch [180/796] time 0.841 (0.834) data 0.000 (0.003) loss 1.9617 (1.0317) lr 1.9686e-03 eta 7:31:13
epoch [10/50] batch [200/796] time 0.819 (0.834) data 0.000 (0.003) loss 2.8904 (1.0739) lr 1.9686e-03 eta 7:30:47
epoch [10/50] batch [220/796] time 0.811 (0.834) data 0.000 (0.003) loss 0.1550 (1.0433) lr 1.9686e-03 eta 7:30:19
epoch [10/50] batch [240/796] time 0.819 (0.833) data 0.000 (0.002) loss 1.1549 (1.0554) lr 1.9686e-03 eta 7:29:57
epoch [10/50] batch [260/796] time 0.840 (0.833) data 0.000 (0.002) loss 0.3741 (1.0818) lr 1.9686e-03 eta 7:29:37
epoch [10/50] batch [280/796] time 0.819 (0.833) data 0.000 (0.002) loss 1.7690 (1.0880) lr 1.9686e-03 eta 7:29:16
epoch [10/50] batch [300/796] time 0.840 (0.833) data 0.000 (0.002) loss 0.4085 (1.0953) lr 1.9686e-03 eta 7:28:49
epoch [10/50] batch [320/796] time 0.833 (0.833) data 0.000 (0.002) loss 0.8282 (1.0904) lr 1.9686e-03 eta 7:28:37
epoch [10/50] batch [340/796] time 0.810 (0.833) data 0.000 (0.002) loss 0.2578 (1.0797) lr 1.9686e-03 eta 7:28:14
epoch [10/50] batch [360/796] time 0.830 (0.833) data 0.000 (0.002) loss 1.0741 (1.0813) lr 1.9686e-03 eta 7:27:53
epoch [10/50] batch [380/796] time 0.820 (0.832) data 0.000 (0.002) loss 1.8661 (1.0843) lr 1.9686e-03 eta 7:27:31
epoch [10/50] batch [400/796] time 0.832 (0.832) data 0.000 (0.001) loss 1.3065 (1.0773) lr 1.9686e-03 eta 7:27:06
epoch [10/50] batch [420/796] time 0.819 (0.832) data 0.000 (0.001) loss 0.7655 (1.0643) lr 1.9686e-03 eta 7:26:47
epoch [10/50] batch [440/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.5327 (1.0500) lr 1.9686e-03 eta 7:26:33
epoch [10/50] batch [460/796] time 0.829 (0.832) data 0.000 (0.001) loss 0.8606 (1.0547) lr 1.9686e-03 eta 7:26:10
epoch [10/50] batch [480/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.7902 (1.0490) lr 1.9686e-03 eta 7:25:51
epoch [10/50] batch [500/796] time 0.817 (0.832) data 0.000 (0.001) loss 0.2448 (1.0600) lr 1.9686e-03 eta 7:25:31
epoch [10/50] batch [520/796] time 0.817 (0.832) data 0.000 (0.001) loss 1.0095 (1.0529) lr 1.9686e-03 eta 7:25:11
epoch [10/50] batch [540/796] time 0.818 (0.832) data 0.000 (0.001) loss 1.0483 (1.0481) lr 1.9686e-03 eta 7:24:51
epoch [10/50] batch [560/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.2905 (1.0331) lr 1.9686e-03 eta 7:24:36
epoch [10/50] batch [580/796] time 0.839 (0.832) data 0.000 (0.001) loss 1.0860 (1.0296) lr 1.9686e-03 eta 7:24:18
epoch [10/50] batch [600/796] time 0.832 (0.832) data 0.000 (0.001) loss 0.8146 (1.0266) lr 1.9686e-03 eta 7:24:02
epoch [10/50] batch [620/796] time 0.829 (0.832) data 0.000 (0.001) loss 1.4186 (1.0256) lr 1.9686e-03 eta 7:23:45
epoch [10/50] batch [640/796] time 0.818 (0.832) data 0.000 (0.001) loss 1.6711 (1.0245) lr 1.9686e-03 eta 7:23:26
epoch [10/50] batch [660/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.9582 (1.0284) lr 1.9686e-03 eta 7:23:07
epoch [10/50] batch [680/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.5527 (1.0303) lr 1.9686e-03 eta 7:22:52
epoch [10/50] batch [700/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.4630 (1.0330) lr 1.9686e-03 eta 7:22:34
epoch [10/50] batch [720/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.4606 (1.0266) lr 1.9686e-03 eta 7:22:17
epoch [10/50] batch [740/796] time 0.810 (0.831) data 0.000 (0.001) loss 0.3618 (1.0297) lr 1.9686e-03 eta 7:21:58
epoch [10/50] batch [760/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.5873 (1.0273) lr 1.9686e-03 eta 7:21:42
epoch [10/50] batch [780/796] time 0.822 (0.831) data 0.000 (0.001) loss 1.2669 (1.0251) lr 1.9686e-03 eta 7:21:25
epoch [11/50] batch [20/796] time 0.832 (0.861) data 0.000 (0.028) loss 2.0428 (0.9386) lr 1.9511e-03 eta 7:36:46
epoch [11/50] batch [40/796] time 0.829 (0.846) data 0.000 (0.014) loss 2.2532 (1.0893) lr 1.9511e-03 eta 7:28:14
epoch [11/50] batch [60/796] time 0.830 (0.840) data 0.000 (0.009) loss 1.8373 (1.0711) lr 1.9511e-03 eta 7:24:52
epoch [11/50] batch [80/796] time 0.841 (0.838) data 0.002 (0.007) loss 0.6677 (1.0221) lr 1.9511e-03 eta 7:23:30
epoch [11/50] batch [100/796] time 0.838 (0.836) data 0.000 (0.006) loss 1.3047 (1.0097) lr 1.9511e-03 eta 7:22:28
epoch [11/50] batch [120/796] time 0.837 (0.835) data 0.000 (0.005) loss 0.3704 (1.0418) lr 1.9511e-03 eta 7:21:24
epoch [11/50] batch [140/796] time 0.831 (0.834) data 0.000 (0.004) loss 0.6352 (1.0469) lr 1.9511e-03 eta 7:20:51
epoch [11/50] batch [160/796] time 0.838 (0.834) data 0.000 (0.004) loss 0.1782 (1.0307) lr 1.9511e-03 eta 7:20:23
epoch [11/50] batch [180/796] time 0.830 (0.833) data 0.000 (0.003) loss 0.1630 (1.0205) lr 1.9511e-03 eta 7:19:41
epoch [11/50] batch [200/796] time 0.840 (0.833) data 0.000 (0.003) loss 2.1275 (1.0086) lr 1.9511e-03 eta 7:19:29
epoch [11/50] batch [220/796] time 0.829 (0.833) data 0.000 (0.003) loss 0.4486 (0.9909) lr 1.9511e-03 eta 7:18:59
epoch [11/50] batch [240/796] time 0.819 (0.833) data 0.000 (0.002) loss 0.3422 (1.0049) lr 1.9511e-03 eta 7:18:31
epoch [11/50] batch [260/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.5027 (1.0038) lr 1.9511e-03 eta 7:18:07
epoch [11/50] batch [280/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.7969 (1.0002) lr 1.9511e-03 eta 7:17:41
epoch [11/50] batch [300/796] time 0.842 (0.832) data 0.000 (0.002) loss 0.3449 (0.9903) lr 1.9511e-03 eta 7:17:28
epoch [11/50] batch [320/796] time 0.838 (0.832) data 0.000 (0.002) loss 2.1102 (0.9939) lr 1.9511e-03 eta 7:17:04
epoch [11/50] batch [340/796] time 0.838 (0.832) data 0.000 (0.002) loss 2.2760 (0.9819) lr 1.9511e-03 eta 7:16:46
epoch [11/50] batch [360/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.6156 (0.9815) lr 1.9511e-03 eta 7:16:30
epoch [11/50] batch [380/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.8955 (0.9809) lr 1.9511e-03 eta 7:16:12
epoch [11/50] batch [400/796] time 0.830 (0.832) data 0.000 (0.002) loss 1.4149 (1.0062) lr 1.9511e-03 eta 7:15:50
epoch [11/50] batch [420/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.5713 (1.0064) lr 1.9511e-03 eta 7:15:30
epoch [11/50] batch [440/796] time 0.838 (0.832) data 0.000 (0.001) loss 1.2263 (1.0013) lr 1.9511e-03 eta 7:15:11
epoch [11/50] batch [460/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.5350 (0.9876) lr 1.9511e-03 eta 7:14:49
epoch [11/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.7105 (0.9863) lr 1.9511e-03 eta 7:14:32
epoch [11/50] batch [500/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.9823 (0.9831) lr 1.9511e-03 eta 7:14:16
epoch [11/50] batch [520/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.7378 (0.9959) lr 1.9511e-03 eta 7:14:04
epoch [11/50] batch [540/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.3462 (0.9904) lr 1.9511e-03 eta 7:13:47
epoch [11/50] batch [560/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.1395 (0.9839) lr 1.9511e-03 eta 7:13:33
epoch [11/50] batch [580/796] time 0.838 (0.832) data 0.000 (0.001) loss 1.3406 (0.9752) lr 1.9511e-03 eta 7:13:17
epoch [11/50] batch [600/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.8048 (0.9691) lr 1.9511e-03 eta 7:13:02
epoch [11/50] batch [620/796] time 0.831 (0.832) data 0.000 (0.001) loss 1.7553 (0.9754) lr 1.9511e-03 eta 7:12:42
epoch [11/50] batch [640/796] time 0.837 (0.832) data 0.000 (0.001) loss 0.4823 (0.9736) lr 1.9511e-03 eta 7:12:25
epoch [11/50] batch [660/796] time 0.831 (0.831) data 0.000 (0.001) loss 0.1424 (0.9674) lr 1.9511e-03 eta 7:12:06
epoch [11/50] batch [680/796] time 0.809 (0.831) data 0.000 (0.001) loss 2.0807 (0.9678) lr 1.9511e-03 eta 7:11:46
epoch [11/50] batch [700/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.0982 (0.9572) lr 1.9511e-03 eta 7:11:29
epoch [11/50] batch [720/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.4407 (0.9669) lr 1.9511e-03 eta 7:11:10
epoch [11/50] batch [740/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.2483 (0.9652) lr 1.9511e-03 eta 7:10:54
epoch [11/50] batch [760/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.0675 (0.9655) lr 1.9511e-03 eta 7:10:36
epoch [11/50] batch [780/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.5603 (0.9608) lr 1.9511e-03 eta 7:10:15
epoch [12/50] batch [20/796] time 0.829 (0.862) data 0.000 (0.032) loss 2.4926 (0.9475) lr 1.9298e-03 eta 7:25:38
epoch [12/50] batch [40/796] time 0.829 (0.845) data 0.000 (0.016) loss 1.5184 (1.0625) lr 1.9298e-03 eta 7:16:45
epoch [12/50] batch [60/796] time 0.818 (0.839) data 0.000 (0.011) loss 0.6369 (1.0907) lr 1.9298e-03 eta 7:13:23
epoch [12/50] batch [80/796] time 0.808 (0.837) data 0.000 (0.008) loss 1.9929 (1.0351) lr 1.9298e-03 eta 7:11:53
epoch [12/50] batch [100/796] time 0.829 (0.835) data 0.000 (0.007) loss 1.8618 (1.0248) lr 1.9298e-03 eta 7:10:50
epoch [12/50] batch [120/796] time 0.831 (0.835) data 0.000 (0.005) loss 0.6022 (1.0422) lr 1.9298e-03 eta 7:10:18
epoch [12/50] batch [140/796] time 0.839 (0.835) data 0.000 (0.005) loss 1.7516 (1.0601) lr 1.9298e-03 eta 7:09:53
epoch [12/50] batch [160/796] time 0.818 (0.834) data 0.000 (0.004) loss 0.6013 (1.0128) lr 1.9298e-03 eta 7:09:11
epoch [12/50] batch [180/796] time 0.839 (0.833) data 0.000 (0.004) loss 1.1454 (1.0295) lr 1.9298e-03 eta 7:08:42
epoch [12/50] batch [200/796] time 0.836 (0.833) data 0.000 (0.003) loss 0.3517 (1.0159) lr 1.9298e-03 eta 7:08:08
epoch [12/50] batch [220/796] time 0.839 (0.833) data 0.000 (0.003) loss 1.3871 (1.0132) lr 1.9298e-03 eta 7:07:44
epoch [12/50] batch [240/796] time 0.842 (0.832) data 0.000 (0.003) loss 1.4710 (1.0105) lr 1.9298e-03 eta 7:07:23
epoch [12/50] batch [260/796] time 0.818 (0.832) data 0.000 (0.003) loss 1.6410 (1.0049) lr 1.9298e-03 eta 7:07:01
epoch [12/50] batch [280/796] time 0.831 (0.832) data 0.000 (0.002) loss 0.8489 (0.9913) lr 1.9298e-03 eta 7:06:33
epoch [12/50] batch [300/796] time 0.819 (0.832) data 0.000 (0.002) loss 0.4255 (0.9878) lr 1.9298e-03 eta 7:06:19
epoch [12/50] batch [320/796] time 0.841 (0.832) data 0.000 (0.002) loss 0.4931 (0.9826) lr 1.9298e-03 eta 7:06:03
epoch [12/50] batch [340/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.1744 (0.9920) lr 1.9298e-03 eta 7:05:46
epoch [12/50] batch [360/796] time 0.818 (0.832) data 0.000 (0.002) loss 1.6141 (1.0007) lr 1.9298e-03 eta 7:05:29
epoch [12/50] batch [380/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.1840 (0.9925) lr 1.9298e-03 eta 7:05:12
epoch [12/50] batch [400/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.2659 (0.9954) lr 1.9298e-03 eta 7:04:57
epoch [12/50] batch [420/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.8486 (0.9853) lr 1.9298e-03 eta 7:04:36
epoch [12/50] batch [440/796] time 0.820 (0.832) data 0.000 (0.002) loss 0.2653 (0.9798) lr 1.9298e-03 eta 7:04:20
epoch [12/50] batch [460/796] time 0.810 (0.832) data 0.000 (0.002) loss 0.5053 (0.9726) lr 1.9298e-03 eta 7:04:02
epoch [12/50] batch [480/796] time 0.842 (0.832) data 0.000 (0.001) loss 0.8952 (0.9737) lr 1.9298e-03 eta 7:03:44
epoch [12/50] batch [500/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.4136 (0.9625) lr 1.9298e-03 eta 7:03:24
epoch [12/50] batch [520/796] time 0.840 (0.832) data 0.000 (0.001) loss 3.8759 (0.9712) lr 1.9298e-03 eta 7:03:08
epoch [12/50] batch [540/796] time 0.824 (0.832) data 0.000 (0.001) loss 1.0619 (0.9634) lr 1.9298e-03 eta 7:02:51
epoch [12/50] batch [560/796] time 0.844 (0.832) data 0.000 (0.001) loss 0.7457 (0.9636) lr 1.9298e-03 eta 7:02:36
epoch [12/50] batch [580/796] time 0.849 (0.832) data 0.000 (0.001) loss 0.3438 (0.9682) lr 1.9298e-03 eta 7:02:20
epoch [12/50] batch [600/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.0453 (0.9616) lr 1.9298e-03 eta 7:02:02
epoch [12/50] batch [620/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.0796 (0.9525) lr 1.9298e-03 eta 7:01:44
epoch [12/50] batch [640/796] time 0.842 (0.832) data 0.000 (0.001) loss 0.1697 (0.9482) lr 1.9298e-03 eta 7:01:27
epoch [12/50] batch [660/796] time 0.829 (0.832) data 0.000 (0.001) loss 0.2172 (0.9446) lr 1.9298e-03 eta 7:01:11
epoch [12/50] batch [680/796] time 0.808 (0.832) data 0.000 (0.001) loss 0.1402 (0.9499) lr 1.9298e-03 eta 7:00:53
epoch [12/50] batch [700/796] time 0.819 (0.832) data 0.000 (0.001) loss 0.7937 (0.9559) lr 1.9298e-03 eta 7:00:35
epoch [12/50] batch [720/796] time 0.829 (0.832) data 0.000 (0.001) loss 0.1739 (0.9572) lr 1.9298e-03 eta 7:00:17
epoch [12/50] batch [740/796] time 0.808 (0.832) data 0.000 (0.001) loss 3.0267 (0.9654) lr 1.9298e-03 eta 6:59:59
epoch [12/50] batch [760/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.2359 (0.9611) lr 1.9298e-03 eta 6:59:44
epoch [12/50] batch [780/796] time 0.817 (0.832) data 0.000 (0.001) loss 0.1679 (0.9546) lr 1.9298e-03 eta 6:59:27
epoch [13/50] batch [20/796] time 0.818 (0.862) data 0.000 (0.030) loss 0.1844 (0.7514) lr 1.9048e-03 eta 7:14:17
epoch [13/50] batch [40/796] time 0.819 (0.845) data 0.000 (0.015) loss 0.8508 (0.8027) lr 1.9048e-03 eta 7:05:31
epoch [13/50] batch [60/796] time 0.818 (0.840) data 0.000 (0.010) loss 0.9895 (0.8554) lr 1.9048e-03 eta 7:02:34
epoch [13/50] batch [80/796] time 0.840 (0.837) data 0.000 (0.008) loss 1.0510 (0.9180) lr 1.9048e-03 eta 7:00:55
epoch [13/50] batch [100/796] time 0.829 (0.835) data 0.000 (0.006) loss 0.5010 (0.9170) lr 1.9048e-03 eta 6:59:42
epoch [13/50] batch [120/796] time 0.842 (0.835) data 0.000 (0.005) loss 1.0106 (0.8936) lr 1.9048e-03 eta 6:59:02
epoch [13/50] batch [140/796] time 0.837 (0.834) data 0.000 (0.004) loss 0.3116 (0.9005) lr 1.9048e-03 eta 6:58:21
epoch [13/50] batch [160/796] time 0.831 (0.833) data 0.000 (0.004) loss 0.0821 (0.8903) lr 1.9048e-03 eta 6:57:55
epoch [13/50] batch [180/796] time 0.829 (0.833) data 0.000 (0.004) loss 0.3371 (0.8904) lr 1.9048e-03 eta 6:57:27
epoch [13/50] batch [200/796] time 0.830 (0.832) data 0.000 (0.003) loss 1.5314 (0.8915) lr 1.9048e-03 eta 6:56:51
epoch [13/50] batch [220/796] time 0.842 (0.832) data 0.000 (0.003) loss 0.5319 (0.9028) lr 1.9048e-03 eta 6:56:33
epoch [13/50] batch [240/796] time 0.832 (0.832) data 0.000 (0.003) loss 0.3408 (0.9066) lr 1.9048e-03 eta 6:56:15
epoch [13/50] batch [260/796] time 0.838 (0.832) data 0.000 (0.002) loss 2.0207 (0.9372) lr 1.9048e-03 eta 6:55:57
epoch [13/50] batch [280/796] time 0.841 (0.832) data 0.000 (0.002) loss 0.3224 (0.9400) lr 1.9048e-03 eta 6:55:43
epoch [13/50] batch [300/796] time 0.832 (0.832) data 0.000 (0.002) loss 2.2616 (0.9337) lr 1.9048e-03 eta 6:55:23
epoch [13/50] batch [320/796] time 0.835 (0.832) data 0.000 (0.002) loss 1.0354 (0.9294) lr 1.9048e-03 eta 6:55:06
epoch [13/50] batch [340/796] time 0.821 (0.832) data 0.000 (0.002) loss 1.0846 (0.9202) lr 1.9048e-03 eta 6:54:46
epoch [13/50] batch [360/796] time 0.830 (0.832) data 0.000 (0.002) loss 1.5671 (0.9165) lr 1.9048e-03 eta 6:54:24
epoch [13/50] batch [380/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.1181 (0.9138) lr 1.9048e-03 eta 6:54:04
epoch [13/50] batch [400/796] time 0.840 (0.832) data 0.000 (0.002) loss 0.2865 (0.9104) lr 1.9048e-03 eta 6:53:44
epoch [13/50] batch [420/796] time 0.839 (0.832) data 0.000 (0.002) loss 2.5855 (0.9150) lr 1.9048e-03 eta 6:53:24
epoch [13/50] batch [440/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.6369 (0.9268) lr 1.9048e-03 eta 6:53:09
epoch [13/50] batch [460/796] time 0.823 (0.832) data 0.000 (0.001) loss 1.2067 (0.9224) lr 1.9048e-03 eta 6:52:52
epoch [13/50] batch [480/796] time 0.841 (0.832) data 0.000 (0.001) loss 0.2678 (0.9313) lr 1.9048e-03 eta 6:52:36
epoch [13/50] batch [500/796] time 0.843 (0.832) data 0.000 (0.001) loss 0.4961 (0.9322) lr 1.9048e-03 eta 6:52:22
epoch [13/50] batch [520/796] time 0.839 (0.832) data 0.000 (0.001) loss 1.5623 (0.9341) lr 1.9048e-03 eta 6:52:06
epoch [13/50] batch [540/796] time 0.819 (0.832) data 0.000 (0.001) loss 0.5979 (0.9384) lr 1.9048e-03 eta 6:51:47
epoch [13/50] batch [560/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.6656 (0.9343) lr 1.9048e-03 eta 6:51:31
epoch [13/50] batch [580/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.3507 (0.9371) lr 1.9048e-03 eta 6:51:13
epoch [13/50] batch [600/796] time 0.832 (0.832) data 0.000 (0.001) loss 0.7992 (0.9314) lr 1.9048e-03 eta 6:50:53
epoch [13/50] batch [620/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.5581 (0.9221) lr 1.9048e-03 eta 6:50:39
epoch [13/50] batch [640/796] time 0.839 (0.832) data 0.000 (0.001) loss 1.9062 (0.9246) lr 1.9048e-03 eta 6:50:25
epoch [13/50] batch [660/796] time 0.830 (0.832) data 0.000 (0.001) loss 2.1719 (0.9316) lr 1.9048e-03 eta 6:50:08
epoch [13/50] batch [680/796] time 0.832 (0.832) data 0.000 (0.001) loss 0.2823 (0.9427) lr 1.9048e-03 eta 6:49:53
epoch [13/50] batch [700/796] time 0.835 (0.832) data 0.000 (0.001) loss 0.8681 (0.9364) lr 1.9048e-03 eta 6:49:35
epoch [13/50] batch [720/796] time 0.839 (0.832) data 0.000 (0.001) loss 1.3862 (0.9383) lr 1.9048e-03 eta 6:49:21
epoch [13/50] batch [740/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.3754 (0.9402) lr 1.9048e-03 eta 6:49:05
epoch [13/50] batch [760/796] time 0.830 (0.832) data 0.000 (0.001) loss 0.6361 (0.9393) lr 1.9048e-03 eta 6:48:46
epoch [13/50] batch [780/796] time 0.830 (0.832) data 0.000 (0.001) loss 0.4296 (0.9389) lr 1.9048e-03 eta 6:48:29
epoch [14/50] batch [20/796] time 0.829 (0.865) data 0.000 (0.031) loss 1.9382 (0.9765) lr 1.8763e-03 eta 7:04:08
epoch [14/50] batch [40/796] time 0.830 (0.848) data 0.000 (0.015) loss 0.5294 (0.8894) lr 1.8763e-03 eta 6:55:36
epoch [14/50] batch [60/796] time 0.817 (0.842) data 0.000 (0.010) loss 0.2272 (0.9871) lr 1.8763e-03 eta 6:52:15
epoch [14/50] batch [80/796] time 0.832 (0.839) data 0.000 (0.008) loss 0.7002 (0.9436) lr 1.8763e-03 eta 6:50:41
epoch [14/50] batch [100/796] time 0.830 (0.838) data 0.000 (0.006) loss 1.2990 (0.9528) lr 1.8763e-03 eta 6:49:47
epoch [14/50] batch [120/796] time 0.839 (0.837) data 0.000 (0.005) loss 0.1376 (0.9944) lr 1.8763e-03 eta 6:49:06
epoch [14/50] batch [140/796] time 0.838 (0.836) data 0.000 (0.005) loss 0.4595 (0.9685) lr 1.8763e-03 eta 6:48:19
epoch [14/50] batch [160/796] time 0.839 (0.836) data 0.000 (0.004) loss 0.3855 (0.9628) lr 1.8763e-03 eta 6:47:54
epoch [14/50] batch [180/796] time 0.823 (0.835) data 0.000 (0.004) loss 1.5041 (0.9577) lr 1.8763e-03 eta 6:47:23
epoch [14/50] batch [200/796] time 0.839 (0.835) data 0.000 (0.003) loss 0.1888 (0.9539) lr 1.8763e-03 eta 6:46:58
epoch [14/50] batch [220/796] time 0.839 (0.835) data 0.000 (0.003) loss 0.0835 (0.9625) lr 1.8763e-03 eta 6:46:34
epoch [14/50] batch [240/796] time 0.839 (0.834) data 0.000 (0.003) loss 1.6833 (0.9608) lr 1.8763e-03 eta 6:46:09
epoch [14/50] batch [260/796] time 0.839 (0.834) data 0.000 (0.003) loss 1.3883 (0.9632) lr 1.8763e-03 eta 6:45:40
epoch [14/50] batch [280/796] time 0.838 (0.834) data 0.000 (0.002) loss 0.5059 (0.9608) lr 1.8763e-03 eta 6:45:20
epoch [14/50] batch [300/796] time 0.840 (0.833) data 0.000 (0.002) loss 1.2330 (0.9720) lr 1.8763e-03 eta 6:44:55
epoch [14/50] batch [320/796] time 0.839 (0.833) data 0.000 (0.002) loss 4.1460 (0.9786) lr 1.8763e-03 eta 6:44:36
epoch [14/50] batch [340/796] time 0.838 (0.833) data 0.000 (0.002) loss 1.1138 (0.9787) lr 1.8763e-03 eta 6:44:13
epoch [14/50] batch [360/796] time 0.839 (0.833) data 0.000 (0.002) loss 1.3038 (0.9741) lr 1.8763e-03 eta 6:43:55
epoch [14/50] batch [380/796] time 0.809 (0.833) data 0.000 (0.002) loss 1.2274 (0.9760) lr 1.8763e-03 eta 6:43:34
epoch [14/50] batch [400/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.5457 (0.9859) lr 1.8763e-03 eta 6:43:12
epoch [14/50] batch [420/796] time 0.840 (0.833) data 0.000 (0.002) loss 0.7144 (0.9779) lr 1.8763e-03 eta 6:42:50
epoch [14/50] batch [440/796] time 0.831 (0.832) data 0.000 (0.002) loss 0.6107 (0.9769) lr 1.8763e-03 eta 6:42:30
epoch [14/50] batch [460/796] time 0.818 (0.832) data 0.000 (0.002) loss 1.9365 (0.9831) lr 1.8763e-03 eta 6:42:10
epoch [14/50] batch [480/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.6331 (0.9907) lr 1.8763e-03 eta 6:41:54
epoch [14/50] batch [500/796] time 0.841 (0.832) data 0.000 (0.001) loss 0.8542 (0.9817) lr 1.8763e-03 eta 6:41:33
epoch [14/50] batch [520/796] time 0.810 (0.832) data 0.000 (0.001) loss 0.5250 (0.9674) lr 1.8763e-03 eta 6:41:15
epoch [14/50] batch [540/796] time 0.840 (0.832) data 0.000 (0.001) loss 1.0137 (0.9625) lr 1.8763e-03 eta 6:40:59
epoch [14/50] batch [560/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.4446 (0.9555) lr 1.8763e-03 eta 6:40:44
epoch [14/50] batch [580/796] time 0.843 (0.832) data 0.000 (0.001) loss 0.8735 (0.9589) lr 1.8763e-03 eta 6:40:24
epoch [14/50] batch [600/796] time 0.840 (0.832) data 0.000 (0.001) loss 1.0275 (0.9563) lr 1.8763e-03 eta 6:40:04
epoch [14/50] batch [620/796] time 0.839 (0.832) data 0.000 (0.001) loss 1.0892 (0.9490) lr 1.8763e-03 eta 6:39:48
epoch [14/50] batch [640/796] time 0.838 (0.832) data 0.000 (0.001) loss 1.2649 (0.9497) lr 1.8763e-03 eta 6:39:31
epoch [14/50] batch [660/796] time 0.832 (0.832) data 0.000 (0.001) loss 1.2271 (0.9446) lr 1.8763e-03 eta 6:39:15
epoch [14/50] batch [680/796] time 0.830 (0.832) data 0.000 (0.001) loss 0.2836 (0.9389) lr 1.8763e-03 eta 6:38:58
epoch [14/50] batch [700/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.7685 (0.9367) lr 1.8763e-03 eta 6:38:37
epoch [14/50] batch [720/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.8041 (0.9406) lr 1.8763e-03 eta 6:38:19
epoch [14/50] batch [740/796] time 0.845 (0.832) data 0.000 (0.001) loss 1.6444 (0.9434) lr 1.8763e-03 eta 6:38:02
epoch [14/50] batch [760/796] time 0.817 (0.832) data 0.000 (0.001) loss 0.3128 (0.9350) lr 1.8763e-03 eta 6:37:44
epoch [14/50] batch [780/796] time 0.829 (0.832) data 0.000 (0.001) loss 2.2279 (0.9371) lr 1.8763e-03 eta 6:37:27
epoch [15/50] batch [20/796] time 0.838 (0.859) data 0.000 (0.028) loss 1.1101 (0.8014) lr 1.8443e-03 eta 6:49:50
epoch [15/50] batch [40/796] time 0.840 (0.845) data 0.000 (0.014) loss 0.2840 (0.8972) lr 1.8443e-03 eta 6:43:01
epoch [15/50] batch [60/796] time 0.831 (0.840) data 0.000 (0.010) loss 0.4524 (0.8640) lr 1.8443e-03 eta 6:40:28
epoch [15/50] batch [80/796] time 0.838 (0.838) data 0.000 (0.007) loss 1.4486 (0.9781) lr 1.8443e-03 eta 6:39:15
epoch [15/50] batch [100/796] time 0.839 (0.837) data 0.000 (0.006) loss 0.3754 (0.9833) lr 1.8443e-03 eta 6:38:09
epoch [15/50] batch [120/796] time 0.844 (0.836) data 0.000 (0.005) loss 0.5093 (1.0075) lr 1.8443e-03 eta 6:37:31
epoch [15/50] batch [140/796] time 0.840 (0.835) data 0.000 (0.004) loss 0.2519 (0.9828) lr 1.8443e-03 eta 6:36:50
epoch [15/50] batch [160/796] time 0.810 (0.834) data 0.000 (0.004) loss 0.8393 (0.9678) lr 1.8443e-03 eta 6:36:10
epoch [15/50] batch [180/796] time 0.830 (0.834) data 0.000 (0.003) loss 0.8542 (0.9838) lr 1.8443e-03 eta 6:35:39
epoch [15/50] batch [200/796] time 0.809 (0.833) data 0.000 (0.003) loss 0.5907 (0.9701) lr 1.8443e-03 eta 6:35:14
epoch [15/50] batch [220/796] time 0.839 (0.833) data 0.000 (0.003) loss 0.9068 (0.9511) lr 1.8443e-03 eta 6:34:47
epoch [15/50] batch [240/796] time 0.840 (0.833) data 0.000 (0.003) loss 1.8788 (0.9493) lr 1.8443e-03 eta 6:34:23
epoch [15/50] batch [260/796] time 0.840 (0.833) data 0.000 (0.002) loss 2.4094 (0.9470) lr 1.8443e-03 eta 6:34:08
epoch [15/50] batch [280/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.7323 (0.9544) lr 1.8443e-03 eta 6:33:45
epoch [15/50] batch [300/796] time 0.839 (0.833) data 0.000 (0.002) loss 0.2435 (0.9536) lr 1.8443e-03 eta 6:33:30
epoch [15/50] batch [320/796] time 0.810 (0.832) data 0.000 (0.002) loss 0.4642 (0.9605) lr 1.8443e-03 eta 6:33:02
epoch [15/50] batch [340/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.0346 (0.9504) lr 1.8443e-03 eta 6:32:41
epoch [15/50] batch [360/796] time 0.818 (0.832) data 0.000 (0.002) loss 1.6010 (0.9488) lr 1.8443e-03 eta 6:32:20
epoch [15/50] batch [380/796] time 0.808 (0.832) data 0.000 (0.002) loss 0.3821 (0.9553) lr 1.8443e-03 eta 6:32:01
epoch [15/50] batch [400/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.0987 (0.9518) lr 1.8443e-03 eta 6:31:43
epoch [15/50] batch [420/796] time 0.836 (0.832) data 0.000 (0.002) loss 0.3424 (0.9567) lr 1.8443e-03 eta 6:31:26
epoch [15/50] batch [440/796] time 0.819 (0.832) data 0.000 (0.001) loss 1.2613 (0.9577) lr 1.8443e-03 eta 6:31:04
epoch [15/50] batch [460/796] time 0.838 (0.832) data 0.000 (0.001) loss 1.1253 (0.9521) lr 1.8443e-03 eta 6:30:49
epoch [15/50] batch [480/796] time 0.829 (0.832) data 0.000 (0.001) loss 2.0423 (0.9507) lr 1.8443e-03 eta 6:30:35
epoch [15/50] batch [500/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.5903 (0.9534) lr 1.8443e-03 eta 6:30:17
epoch [15/50] batch [520/796] time 0.817 (0.832) data 0.000 (0.001) loss 1.1343 (0.9558) lr 1.8443e-03 eta 6:29:59
epoch [15/50] batch [540/796] time 0.842 (0.832) data 0.000 (0.001) loss 0.2078 (0.9617) lr 1.8443e-03 eta 6:29:44
epoch [15/50] batch [560/796] time 0.844 (0.832) data 0.000 (0.001) loss 1.9605 (0.9689) lr 1.8443e-03 eta 6:29:27
epoch [15/50] batch [580/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.4914 (0.9763) lr 1.8443e-03 eta 6:29:10
epoch [15/50] batch [600/796] time 0.817 (0.832) data 0.000 (0.001) loss 0.5236 (0.9769) lr 1.8443e-03 eta 6:28:53
epoch [15/50] batch [620/796] time 0.831 (0.832) data 0.000 (0.001) loss 0.2735 (0.9677) lr 1.8443e-03 eta 6:28:34
epoch [15/50] batch [640/796] time 0.841 (0.832) data 0.000 (0.001) loss 0.3353 (0.9626) lr 1.8443e-03 eta 6:28:18
epoch [15/50] batch [660/796] time 0.838 (0.832) data 0.000 (0.001) loss 2.1226 (0.9618) lr 1.8443e-03 eta 6:27:59
epoch [15/50] batch [680/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.0074 (0.9510) lr 1.8443e-03 eta 6:27:42
epoch [15/50] batch [700/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.7600 (0.9548) lr 1.8443e-03 eta 6:27:23
epoch [15/50] batch [720/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.7795 (0.9601) lr 1.8443e-03 eta 6:27:06
epoch [15/50] batch [740/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.3209 (0.9545) lr 1.8443e-03 eta 6:26:50
epoch [15/50] batch [760/796] time 0.842 (0.831) data 0.000 (0.001) loss 0.1066 (0.9527) lr 1.8443e-03 eta 6:26:33
epoch [15/50] batch [780/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.0218 (0.9508) lr 1.8443e-03 eta 6:26:16
epoch [16/50] batch [20/796] time 0.830 (0.858) data 0.000 (0.026) loss 1.0247 (0.9799) lr 1.8090e-03 eta 6:38:14
epoch [16/50] batch [40/796] time 0.818 (0.846) data 0.000 (0.013) loss 1.0282 (0.9470) lr 1.8090e-03 eta 6:32:02
epoch [16/50] batch [60/796] time 0.831 (0.841) data 0.000 (0.009) loss 0.1187 (0.9207) lr 1.8090e-03 eta 6:29:47
epoch [16/50] batch [80/796] time 0.826 (0.839) data 0.000 (0.007) loss 1.8555 (0.9355) lr 1.8090e-03 eta 6:28:15
epoch [16/50] batch [100/796] time 0.817 (0.837) data 0.000 (0.005) loss 1.0993 (0.9339) lr 1.8090e-03 eta 6:27:01
epoch [16/50] batch [120/796] time 0.828 (0.835) data 0.000 (0.005) loss 0.3043 (0.9475) lr 1.8090e-03 eta 6:26:09
epoch [16/50] batch [140/796] time 0.818 (0.834) data 0.000 (0.004) loss 1.0277 (0.9891) lr 1.8090e-03 eta 6:25:28
epoch [16/50] batch [160/796] time 0.839 (0.834) data 0.000 (0.003) loss 2.2009 (0.9899) lr 1.8090e-03 eta 6:24:59
epoch [16/50] batch [180/796] time 0.838 (0.833) data 0.000 (0.003) loss 2.4206 (0.9964) lr 1.8090e-03 eta 6:24:26
epoch [16/50] batch [200/796] time 0.830 (0.833) data 0.000 (0.003) loss 0.3283 (1.0163) lr 1.8090e-03 eta 6:23:54
epoch [16/50] batch [220/796] time 0.810 (0.833) data 0.000 (0.003) loss 2.4588 (1.0015) lr 1.8090e-03 eta 6:23:33
epoch [16/50] batch [240/796] time 0.819 (0.832) data 0.000 (0.002) loss 0.0993 (0.9964) lr 1.8090e-03 eta 6:23:05
epoch [16/50] batch [260/796] time 0.840 (0.832) data 0.000 (0.002) loss 1.7323 (0.9857) lr 1.8090e-03 eta 6:22:46
epoch [16/50] batch [280/796] time 0.810 (0.832) data 0.000 (0.002) loss 0.7435 (0.9785) lr 1.8090e-03 eta 6:22:20
epoch [16/50] batch [300/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.5609 (0.9685) lr 1.8090e-03 eta 6:22:02
epoch [16/50] batch [320/796] time 0.830 (0.832) data 0.000 (0.002) loss 2.9308 (0.9756) lr 1.8090e-03 eta 6:21:43
epoch [16/50] batch [340/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.0687 (0.9794) lr 1.8090e-03 eta 6:21:25
epoch [16/50] batch [360/796] time 0.828 (0.832) data 0.000 (0.002) loss 0.3930 (0.9834) lr 1.8090e-03 eta 6:21:07
epoch [16/50] batch [380/796] time 0.838 (0.831) data 0.000 (0.002) loss 0.2662 (0.9885) lr 1.8090e-03 eta 6:20:49
epoch [16/50] batch [400/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.5564 (0.9818) lr 1.8090e-03 eta 6:20:32
epoch [16/50] batch [420/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.0849 (0.9798) lr 1.8090e-03 eta 6:20:14
epoch [16/50] batch [440/796] time 0.838 (0.831) data 0.000 (0.001) loss 2.4855 (0.9658) lr 1.8090e-03 eta 6:19:56
epoch [16/50] batch [460/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.9256 (0.9568) lr 1.8090e-03 eta 6:19:36
epoch [16/50] batch [480/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.3323 (0.9648) lr 1.8090e-03 eta 6:19:19
epoch [16/50] batch [500/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.9932 (0.9592) lr 1.8090e-03 eta 6:19:02
epoch [16/50] batch [520/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.9080 (0.9683) lr 1.8090e-03 eta 6:18:46
epoch [16/50] batch [540/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.3592 (0.9745) lr 1.8090e-03 eta 6:18:28
epoch [16/50] batch [560/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.2927 (0.9847) lr 1.8090e-03 eta 6:18:10
epoch [16/50] batch [580/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.3144 (0.9775) lr 1.8090e-03 eta 6:17:52
epoch [16/50] batch [600/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.7653 (0.9778) lr 1.8090e-03 eta 6:17:34
epoch [16/50] batch [620/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.5712 (0.9743) lr 1.8090e-03 eta 6:17:16
epoch [16/50] batch [640/796] time 0.809 (0.831) data 0.000 (0.001) loss 0.4204 (0.9705) lr 1.8090e-03 eta 6:16:59
epoch [16/50] batch [660/796] time 0.842 (0.831) data 0.000 (0.001) loss 2.3607 (0.9724) lr 1.8090e-03 eta 6:16:44
epoch [16/50] batch [680/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.6556 (0.9666) lr 1.8090e-03 eta 6:16:28
epoch [16/50] batch [700/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.2350 (0.9632) lr 1.8090e-03 eta 6:16:11
epoch [16/50] batch [720/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.4055 (0.9612) lr 1.8090e-03 eta 6:15:54
epoch [16/50] batch [740/796] time 0.830 (0.831) data 0.000 (0.001) loss 2.4931 (0.9569) lr 1.8090e-03 eta 6:15:35
epoch [16/50] batch [760/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.4246 (0.9551) lr 1.8090e-03 eta 6:15:19
epoch [16/50] batch [780/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.3021 (0.9539) lr 1.8090e-03 eta 6:15:01
epoch [17/50] batch [20/796] time 0.831 (0.857) data 0.000 (0.027) loss 1.2860 (0.7107) lr 1.7705e-03 eta 6:26:07
epoch [17/50] batch [40/796] time 0.838 (0.844) data 0.000 (0.014) loss 1.0278 (0.8964) lr 1.7705e-03 eta 6:20:10
epoch [17/50] batch [60/796] time 0.838 (0.840) data 0.000 (0.009) loss 0.3651 (0.9744) lr 1.7705e-03 eta 6:18:02
epoch [17/50] batch [80/796] time 0.839 (0.838) data 0.000 (0.007) loss 0.0538 (0.9699) lr 1.7705e-03 eta 6:16:40
epoch [17/50] batch [100/796] time 0.847 (0.837) data 0.000 (0.006) loss 1.2350 (1.0098) lr 1.7705e-03 eta 6:15:59
epoch [17/50] batch [120/796] time 0.823 (0.835) data 0.000 (0.005) loss 0.3342 (1.0177) lr 1.7705e-03 eta 6:15:04
epoch [17/50] batch [140/796] time 0.840 (0.835) data 0.000 (0.004) loss 0.8673 (1.0135) lr 1.7705e-03 eta 6:14:33
epoch [17/50] batch [160/796] time 0.829 (0.834) data 0.000 (0.004) loss 2.1101 (1.0015) lr 1.7705e-03 eta 6:14:04
epoch [17/50] batch [180/796] time 0.830 (0.834) data 0.000 (0.003) loss 0.4165 (1.0135) lr 1.7705e-03 eta 6:13:44
epoch [17/50] batch [200/796] time 0.838 (0.834) data 0.000 (0.003) loss 1.3969 (1.0072) lr 1.7705e-03 eta 6:13:20
epoch [17/50] batch [220/796] time 0.841 (0.834) data 0.000 (0.003) loss 0.3420 (1.0090) lr 1.7705e-03 eta 6:13:00
epoch [17/50] batch [240/796] time 0.808 (0.833) data 0.000 (0.002) loss 0.5083 (0.9771) lr 1.7705e-03 eta 6:12:30
epoch [17/50] batch [260/796] time 0.818 (0.833) data 0.000 (0.002) loss 1.6491 (0.9590) lr 1.7705e-03 eta 6:12:02
epoch [17/50] batch [280/796] time 0.819 (0.833) data 0.000 (0.002) loss 0.9219 (0.9650) lr 1.7705e-03 eta 6:11:44
epoch [17/50] batch [300/796] time 0.832 (0.833) data 0.000 (0.002) loss 0.4944 (0.9481) lr 1.7705e-03 eta 6:11:30
epoch [17/50] batch [320/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.1364 (0.9555) lr 1.7705e-03 eta 6:11:05
epoch [17/50] batch [340/796] time 0.840 (0.832) data 0.000 (0.002) loss 1.5095 (0.9427) lr 1.7705e-03 eta 6:10:47
epoch [17/50] batch [360/796] time 0.818 (0.832) data 0.000 (0.002) loss 1.2432 (0.9421) lr 1.7705e-03 eta 6:10:26
epoch [17/50] batch [380/796] time 0.843 (0.832) data 0.000 (0.002) loss 0.8163 (0.9501) lr 1.7705e-03 eta 6:10:06
epoch [17/50] batch [400/796] time 0.811 (0.832) data 0.000 (0.002) loss 0.1196 (0.9552) lr 1.7705e-03 eta 6:09:49
epoch [17/50] batch [420/796] time 0.811 (0.832) data 0.000 (0.001) loss 0.3991 (0.9530) lr 1.7705e-03 eta 6:09:27
epoch [17/50] batch [440/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.7299 (0.9419) lr 1.7705e-03 eta 6:09:08
epoch [17/50] batch [460/796] time 0.840 (0.832) data 0.000 (0.001) loss 1.0246 (0.9371) lr 1.7705e-03 eta 6:08:50
epoch [17/50] batch [480/796] time 0.820 (0.832) data 0.000 (0.001) loss 0.7476 (0.9393) lr 1.7705e-03 eta 6:08:31
epoch [17/50] batch [500/796] time 0.843 (0.832) data 0.000 (0.001) loss 0.0871 (0.9343) lr 1.7705e-03 eta 6:08:12
epoch [17/50] batch [520/796] time 0.829 (0.832) data 0.000 (0.001) loss 0.0995 (0.9325) lr 1.7705e-03 eta 6:07:52
epoch [17/50] batch [540/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.3796 (0.9275) lr 1.7705e-03 eta 6:07:31
epoch [17/50] batch [560/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.8908 (0.9246) lr 1.7705e-03 eta 6:07:14
epoch [17/50] batch [580/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.2475 (0.9207) lr 1.7705e-03 eta 6:06:56
epoch [17/50] batch [600/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.5481 (0.9182) lr 1.7705e-03 eta 6:06:38
epoch [17/50] batch [620/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.9694 (0.9248) lr 1.7705e-03 eta 6:06:20
epoch [17/50] batch [640/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.5621 (0.9249) lr 1.7705e-03 eta 6:06:04
epoch [17/50] batch [660/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.1798 (0.9219) lr 1.7705e-03 eta 6:05:45
epoch [17/50] batch [680/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2836 (0.9214) lr 1.7705e-03 eta 6:05:27
epoch [17/50] batch [700/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.6620 (0.9248) lr 1.7705e-03 eta 6:05:10
epoch [17/50] batch [720/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.8798 (0.9273) lr 1.7705e-03 eta 6:04:56
epoch [17/50] batch [740/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.3909 (0.9246) lr 1.7705e-03 eta 6:04:37
epoch [17/50] batch [760/796] time 0.821 (0.831) data 0.000 (0.001) loss 0.3271 (0.9172) lr 1.7705e-03 eta 6:04:21
epoch [17/50] batch [780/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.9253 (0.9131) lr 1.7705e-03 eta 6:04:03
epoch [18/50] batch [20/796] time 0.830 (0.858) data 0.000 (0.027) loss 0.5036 (0.9619) lr 1.7290e-03 eta 6:15:08
epoch [18/50] batch [40/796] time 0.816 (0.844) data 0.000 (0.014) loss 1.9925 (0.9143) lr 1.7290e-03 eta 6:08:46
epoch [18/50] batch [60/796] time 0.837 (0.840) data 0.000 (0.009) loss 0.6439 (0.9973) lr 1.7290e-03 eta 6:07:06
epoch [18/50] batch [80/796] time 0.823 (0.838) data 0.000 (0.007) loss 2.6032 (1.0371) lr 1.7290e-03 eta 6:05:44
epoch [18/50] batch [100/796] time 0.817 (0.836) data 0.000 (0.006) loss 0.3159 (1.0311) lr 1.7290e-03 eta 6:04:45
epoch [18/50] batch [120/796] time 0.841 (0.835) data 0.000 (0.005) loss 0.6837 (1.0065) lr 1.7290e-03 eta 6:04:02
epoch [18/50] batch [140/796] time 0.829 (0.835) data 0.000 (0.004) loss 1.6902 (0.9816) lr 1.7290e-03 eta 6:03:27
epoch [18/50] batch [160/796] time 0.818 (0.834) data 0.000 (0.004) loss 0.5815 (0.9301) lr 1.7290e-03 eta 6:03:00
epoch [18/50] batch [180/796] time 0.838 (0.834) data 0.000 (0.003) loss 0.5183 (0.9226) lr 1.7290e-03 eta 6:02:28
epoch [18/50] batch [200/796] time 0.838 (0.833) data 0.000 (0.003) loss 0.2574 (0.9111) lr 1.7290e-03 eta 6:01:57
epoch [18/50] batch [220/796] time 0.818 (0.833) data 0.000 (0.003) loss 0.2589 (0.8815) lr 1.7290e-03 eta 6:01:32
epoch [18/50] batch [240/796] time 0.817 (0.833) data 0.000 (0.002) loss 0.6741 (0.8791) lr 1.7290e-03 eta 6:01:08
epoch [18/50] batch [260/796] time 0.836 (0.832) data 0.000 (0.002) loss 1.0155 (0.8843) lr 1.7290e-03 eta 6:00:51
epoch [18/50] batch [280/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.6199 (0.8995) lr 1.7290e-03 eta 6:00:30
epoch [18/50] batch [300/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.9970 (0.8963) lr 1.7290e-03 eta 6:00:11
epoch [18/50] batch [320/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.2255 (0.8999) lr 1.7290e-03 eta 5:59:49
epoch [18/50] batch [340/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.5984 (0.9111) lr 1.7290e-03 eta 5:59:28
epoch [18/50] batch [360/796] time 0.820 (0.832) data 0.000 (0.002) loss 0.4569 (0.9116) lr 1.7290e-03 eta 5:59:10
epoch [18/50] batch [380/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.1326 (0.8974) lr 1.7290e-03 eta 5:58:51
epoch [18/50] batch [400/796] time 0.833 (0.832) data 0.000 (0.002) loss 1.7846 (0.8986) lr 1.7290e-03 eta 5:58:31
epoch [18/50] batch [420/796] time 0.841 (0.832) data 0.000 (0.001) loss 0.5187 (0.8932) lr 1.7290e-03 eta 5:58:13
epoch [18/50] batch [440/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.1157 (0.8893) lr 1.7290e-03 eta 5:57:55
epoch [18/50] batch [460/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.7137 (0.8895) lr 1.7290e-03 eta 5:57:38
epoch [18/50] batch [480/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.0223 (0.8905) lr 1.7290e-03 eta 5:57:17
epoch [18/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.5688 (0.8911) lr 1.7290e-03 eta 5:57:01
epoch [18/50] batch [520/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.9545 (0.8919) lr 1.7290e-03 eta 5:56:45
epoch [18/50] batch [540/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.1183 (0.8996) lr 1.7290e-03 eta 5:56:28
epoch [18/50] batch [560/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.1220 (0.9005) lr 1.7290e-03 eta 5:56:12
epoch [18/50] batch [580/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.0355 (0.8947) lr 1.7290e-03 eta 5:55:56
epoch [18/50] batch [600/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.2065 (0.8941) lr 1.7290e-03 eta 5:55:40
epoch [18/50] batch [620/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.2865 (0.8882) lr 1.7290e-03 eta 5:55:24
epoch [18/50] batch [640/796] time 0.842 (0.831) data 0.000 (0.001) loss 0.4375 (0.8823) lr 1.7290e-03 eta 5:55:06
epoch [18/50] batch [660/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.6176 (0.8846) lr 1.7290e-03 eta 5:54:47
epoch [18/50] batch [680/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.0717 (0.8896) lr 1.7290e-03 eta 5:54:28
epoch [18/50] batch [700/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.2561 (0.8917) lr 1.7290e-03 eta 5:54:10
epoch [18/50] batch [720/796] time 0.841 (0.831) data 0.000 (0.001) loss 0.6179 (0.8893) lr 1.7290e-03 eta 5:53:55
epoch [18/50] batch [740/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.5295 (0.8834) lr 1.7290e-03 eta 5:53:38
epoch [18/50] batch [760/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.1444 (0.8814) lr 1.7290e-03 eta 5:53:22
epoch [18/50] batch [780/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.1408 (0.8766) lr 1.7290e-03 eta 5:53:04
epoch [19/50] batch [20/796] time 0.838 (0.860) data 0.000 (0.026) loss 0.8276 (0.8620) lr 1.6845e-03 eta 6:04:36
epoch [19/50] batch [40/796] time 0.818 (0.844) data 0.000 (0.013) loss 0.3782 (1.0283) lr 1.6845e-03 eta 5:57:55
epoch [19/50] batch [60/796] time 0.823 (0.840) data 0.000 (0.009) loss 0.6181 (0.9298) lr 1.6845e-03 eta 5:55:44
epoch [19/50] batch [80/796] time 0.812 (0.838) data 0.000 (0.007) loss 0.7562 (0.8725) lr 1.6845e-03 eta 5:54:27
epoch [19/50] batch [100/796] time 0.830 (0.836) data 0.000 (0.005) loss 0.8456 (0.8858) lr 1.6845e-03 eta 5:53:35
epoch [19/50] batch [120/796] time 0.810 (0.835) data 0.000 (0.005) loss 1.3476 (0.8469) lr 1.6845e-03 eta 5:52:40
epoch [19/50] batch [140/796] time 0.825 (0.834) data 0.000 (0.004) loss 0.2064 (0.8413) lr 1.6845e-03 eta 5:52:02
epoch [19/50] batch [160/796] time 0.841 (0.834) data 0.000 (0.003) loss 0.4520 (0.8393) lr 1.6845e-03 eta 5:51:39
epoch [19/50] batch [180/796] time 0.818 (0.833) data 0.000 (0.003) loss 1.3840 (0.8247) lr 1.6845e-03 eta 5:51:12
epoch [19/50] batch [200/796] time 0.830 (0.833) data 0.000 (0.003) loss 0.2928 (0.8339) lr 1.6845e-03 eta 5:50:43
epoch [19/50] batch [220/796] time 0.839 (0.833) data 0.000 (0.003) loss 0.8064 (0.8275) lr 1.6845e-03 eta 5:50:24
epoch [19/50] batch [240/796] time 0.837 (0.832) data 0.000 (0.002) loss 0.8209 (0.8206) lr 1.6845e-03 eta 5:49:59
epoch [19/50] batch [260/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.1363 (0.8352) lr 1.6845e-03 eta 5:49:37
epoch [19/50] batch [280/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.7293 (0.8314) lr 1.6845e-03 eta 5:49:17
epoch [19/50] batch [300/796] time 0.837 (0.832) data 0.000 (0.002) loss 1.1104 (0.8363) lr 1.6845e-03 eta 5:48:57
epoch [19/50] batch [320/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.5534 (0.8510) lr 1.6845e-03 eta 5:48:36
epoch [19/50] batch [340/796] time 0.828 (0.831) data 0.000 (0.002) loss 1.6008 (0.8568) lr 1.6845e-03 eta 5:48:15
epoch [19/50] batch [360/796] time 0.817 (0.831) data 0.000 (0.002) loss 1.2096 (0.8558) lr 1.6845e-03 eta 5:47:59
epoch [19/50] batch [380/796] time 0.817 (0.831) data 0.000 (0.002) loss 0.1965 (0.8620) lr 1.6845e-03 eta 5:47:36
epoch [19/50] batch [400/796] time 0.845 (0.831) data 0.000 (0.001) loss 0.7010 (0.8650) lr 1.6845e-03 eta 5:47:19
epoch [19/50] batch [420/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.8567 (0.8706) lr 1.6845e-03 eta 5:47:01
epoch [19/50] batch [440/796] time 0.837 (0.831) data 0.000 (0.001) loss 1.2461 (0.8756) lr 1.6845e-03 eta 5:46:46
epoch [19/50] batch [460/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.1430 (0.8788) lr 1.6845e-03 eta 5:46:26
epoch [19/50] batch [480/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.0701 (0.8751) lr 1.6845e-03 eta 5:46:11
epoch [19/50] batch [500/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.2728 (0.8830) lr 1.6845e-03 eta 5:45:51
epoch [19/50] batch [520/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.6604 (0.8840) lr 1.6845e-03 eta 5:45:32
epoch [19/50] batch [540/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.5859 (0.8799) lr 1.6845e-03 eta 5:45:15
epoch [19/50] batch [560/796] time 0.816 (0.831) data 0.000 (0.001) loss 0.0873 (0.8735) lr 1.6845e-03 eta 5:44:56
epoch [19/50] batch [580/796] time 0.845 (0.831) data 0.000 (0.001) loss 0.3519 (0.8743) lr 1.6845e-03 eta 5:44:39
epoch [19/50] batch [600/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.1465 (0.8794) lr 1.6845e-03 eta 5:44:22
epoch [19/50] batch [620/796] time 0.818 (0.831) data 0.000 (0.001) loss 3.2822 (0.8781) lr 1.6845e-03 eta 5:44:03
epoch [19/50] batch [640/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.6737 (0.8811) lr 1.6845e-03 eta 5:43:46
epoch [19/50] batch [660/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.2353 (0.8773) lr 1.6845e-03 eta 5:43:29
epoch [19/50] batch [680/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.4777 (0.8769) lr 1.6845e-03 eta 5:43:12
epoch [19/50] batch [700/796] time 0.820 (0.831) data 0.000 (0.001) loss 0.6967 (0.8761) lr 1.6845e-03 eta 5:42:56
epoch [19/50] batch [720/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.0360 (0.8778) lr 1.6845e-03 eta 5:42:39
epoch [19/50] batch [740/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.4031 (0.8771) lr 1.6845e-03 eta 5:42:22
epoch [19/50] batch [760/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.5122 (0.8799) lr 1.6845e-03 eta 5:42:04
epoch [19/50] batch [780/796] time 0.844 (0.831) data 0.000 (0.001) loss 2.2938 (0.8823) lr 1.6845e-03 eta 5:41:48
epoch [20/50] batch [20/796] time 0.811 (0.862) data 0.000 (0.027) loss 1.1941 (0.8212) lr 1.6374e-03 eta 5:54:06
epoch [20/50] batch [40/796] time 0.846 (0.848) data 0.000 (0.014) loss 0.7788 (0.9037) lr 1.6374e-03 eta 5:48:05
epoch [20/50] batch [60/796] time 0.817 (0.842) data 0.000 (0.009) loss 0.8675 (0.8985) lr 1.6374e-03 eta 5:45:28
epoch [20/50] batch [80/796] time 0.832 (0.839) data 0.000 (0.007) loss 0.4938 (0.8882) lr 1.6374e-03 eta 5:44:04
epoch [20/50] batch [100/796] time 0.830 (0.838) data 0.000 (0.006) loss 0.7647 (0.9405) lr 1.6374e-03 eta 5:43:14
epoch [20/50] batch [120/796] time 0.840 (0.837) data 0.000 (0.005) loss 1.9432 (0.9130) lr 1.6374e-03 eta 5:42:26
epoch [20/50] batch [140/796] time 0.841 (0.836) data 0.000 (0.004) loss 0.2004 (0.9034) lr 1.6374e-03 eta 5:41:59
epoch [20/50] batch [160/796] time 0.839 (0.836) data 0.000 (0.004) loss 0.3853 (0.9132) lr 1.6374e-03 eta 5:41:26
epoch [20/50] batch [180/796] time 0.840 (0.835) data 0.000 (0.003) loss 0.1673 (0.8806) lr 1.6374e-03 eta 5:41:05
epoch [20/50] batch [200/796] time 0.840 (0.835) data 0.000 (0.003) loss 1.7316 (0.9025) lr 1.6374e-03 eta 5:40:38
epoch [20/50] batch [220/796] time 0.838 (0.835) data 0.000 (0.003) loss 0.2811 (0.8986) lr 1.6374e-03 eta 5:40:13
epoch [20/50] batch [240/796] time 0.839 (0.835) data 0.000 (0.002) loss 1.7044 (0.9348) lr 1.6374e-03 eta 5:39:55
epoch [20/50] batch [260/796] time 0.838 (0.834) data 0.000 (0.002) loss 0.7423 (0.9445) lr 1.6374e-03 eta 5:39:30
epoch [20/50] batch [280/796] time 0.809 (0.834) data 0.000 (0.002) loss 0.4348 (0.9318) lr 1.6374e-03 eta 5:39:07
epoch [20/50] batch [300/796] time 0.829 (0.834) data 0.000 (0.002) loss 0.6014 (0.9236) lr 1.6374e-03 eta 5:38:47
epoch [20/50] batch [320/796] time 0.840 (0.834) data 0.000 (0.002) loss 0.7984 (0.9198) lr 1.6374e-03 eta 5:38:30
epoch [20/50] batch [340/796] time 0.811 (0.834) data 0.000 (0.002) loss 0.8521 (0.9084) lr 1.6374e-03 eta 5:38:05
epoch [20/50] batch [360/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.2632 (0.9185) lr 1.6374e-03 eta 5:37:45
epoch [20/50] batch [380/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.3556 (0.9123) lr 1.6374e-03 eta 5:37:21
epoch [20/50] batch [400/796] time 0.840 (0.833) data 0.000 (0.002) loss 0.1747 (0.9045) lr 1.6374e-03 eta 5:37:02
epoch [20/50] batch [420/796] time 0.831 (0.833) data 0.000 (0.001) loss 0.2116 (0.9006) lr 1.6374e-03 eta 5:36:42
epoch [20/50] batch [440/796] time 0.839 (0.833) data 0.000 (0.001) loss 1.1573 (0.8978) lr 1.6374e-03 eta 5:36:22
epoch [20/50] batch [460/796] time 0.818 (0.833) data 0.000 (0.001) loss 1.6080 (0.9027) lr 1.6374e-03 eta 5:36:06
epoch [20/50] batch [480/796] time 0.830 (0.833) data 0.000 (0.001) loss 0.1381 (0.9016) lr 1.6374e-03 eta 5:35:47
epoch [20/50] batch [500/796] time 0.838 (0.833) data 0.000 (0.001) loss 0.4830 (0.8948) lr 1.6374e-03 eta 5:35:30
epoch [20/50] batch [520/796] time 0.817 (0.833) data 0.000 (0.001) loss 0.0285 (0.8887) lr 1.6374e-03 eta 5:35:13
epoch [20/50] batch [540/796] time 0.846 (0.833) data 0.000 (0.001) loss 0.6207 (0.8821) lr 1.6374e-03 eta 5:34:56
epoch [20/50] batch [560/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.1621 (0.8847) lr 1.6374e-03 eta 5:34:35
epoch [20/50] batch [580/796] time 0.829 (0.833) data 0.000 (0.001) loss 0.3214 (0.8832) lr 1.6374e-03 eta 5:34:20
epoch [20/50] batch [600/796] time 0.829 (0.832) data 0.000 (0.001) loss 0.8830 (0.8799) lr 1.6374e-03 eta 5:34:03
epoch [20/50] batch [620/796] time 0.837 (0.832) data 0.000 (0.001) loss 0.6635 (0.8748) lr 1.6374e-03 eta 5:33:46
epoch [20/50] batch [640/796] time 0.839 (0.832) data 0.000 (0.001) loss 3.7075 (0.8756) lr 1.6374e-03 eta 5:33:27
epoch [20/50] batch [660/796] time 0.818 (0.832) data 0.000 (0.001) loss 1.1810 (0.8744) lr 1.6374e-03 eta 5:33:11
epoch [20/50] batch [680/796] time 0.840 (0.832) data 0.000 (0.001) loss 1.7047 (0.8805) lr 1.6374e-03 eta 5:32:54
epoch [20/50] batch [700/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.7295 (0.8799) lr 1.6374e-03 eta 5:32:37
epoch [20/50] batch [720/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.6045 (0.8778) lr 1.6374e-03 eta 5:32:20
epoch [20/50] batch [740/796] time 0.844 (0.832) data 0.000 (0.001) loss 0.9219 (0.8785) lr 1.6374e-03 eta 5:32:04
epoch [20/50] batch [760/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.9225 (0.8773) lr 1.6374e-03 eta 5:31:47
epoch [20/50] batch [780/796] time 0.844 (0.832) data 0.000 (0.001) loss 0.5946 (0.8792) lr 1.6374e-03 eta 5:31:30
epoch [21/50] batch [20/796] time 0.839 (0.858) data 0.000 (0.026) loss 0.0763 (0.9219) lr 1.5878e-03 eta 5:41:16
epoch [21/50] batch [40/796] time 0.839 (0.845) data 0.000 (0.013) loss 1.3157 (0.8952) lr 1.5878e-03 eta 5:35:40
epoch [21/50] batch [60/796] time 0.838 (0.840) data 0.000 (0.009) loss 0.1733 (1.0456) lr 1.5878e-03 eta 5:33:29
epoch [21/50] batch [80/796] time 0.838 (0.838) data 0.000 (0.007) loss 0.4608 (1.0389) lr 1.5878e-03 eta 5:32:23
epoch [21/50] batch [100/796] time 0.822 (0.837) data 0.000 (0.005) loss 1.0154 (1.0585) lr 1.5878e-03 eta 5:31:40
epoch [21/50] batch [120/796] time 0.848 (0.836) data 0.000 (0.005) loss 0.9522 (1.0703) lr 1.5878e-03 eta 5:31:00
epoch [21/50] batch [140/796] time 0.838 (0.835) data 0.000 (0.004) loss 0.2423 (1.0573) lr 1.5878e-03 eta 5:30:27
epoch [21/50] batch [160/796] time 0.831 (0.834) data 0.000 (0.003) loss 0.2442 (1.0388) lr 1.5878e-03 eta 5:29:52
epoch [21/50] batch [180/796] time 0.845 (0.834) data 0.000 (0.003) loss 0.5570 (1.0277) lr 1.5878e-03 eta 5:29:35
epoch [21/50] batch [200/796] time 0.829 (0.834) data 0.000 (0.003) loss 0.4200 (0.9920) lr 1.5878e-03 eta 5:29:09
epoch [21/50] batch [220/796] time 0.817 (0.834) data 0.000 (0.003) loss 0.5801 (0.9902) lr 1.5878e-03 eta 5:28:48
epoch [21/50] batch [240/796] time 0.838 (0.833) data 0.000 (0.002) loss 1.2343 (0.9677) lr 1.5878e-03 eta 5:28:20
epoch [21/50] batch [260/796] time 0.843 (0.833) data 0.000 (0.002) loss 0.2162 (0.9522) lr 1.5878e-03 eta 5:28:03
epoch [21/50] batch [280/796] time 0.818 (0.833) data 0.000 (0.002) loss 1.8047 (0.9309) lr 1.5878e-03 eta 5:27:40
epoch [21/50] batch [300/796] time 0.817 (0.833) data 0.000 (0.002) loss 2.0351 (0.9247) lr 1.5878e-03 eta 5:27:18
epoch [21/50] batch [320/796] time 0.842 (0.833) data 0.000 (0.002) loss 0.6666 (0.9265) lr 1.5878e-03 eta 5:26:58
epoch [21/50] batch [340/796] time 0.822 (0.833) data 0.000 (0.002) loss 0.0729 (0.9209) lr 1.5878e-03 eta 5:26:38
epoch [21/50] batch [360/796] time 0.821 (0.833) data 0.000 (0.002) loss 1.1193 (0.9084) lr 1.5878e-03 eta 5:26:20
epoch [21/50] batch [380/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.5708 (0.9111) lr 1.5878e-03 eta 5:26:04
epoch [21/50] batch [400/796] time 0.840 (0.833) data 0.000 (0.001) loss 0.9074 (0.9159) lr 1.5878e-03 eta 5:25:49
epoch [21/50] batch [420/796] time 0.830 (0.832) data 0.000 (0.001) loss 1.0245 (0.9077) lr 1.5878e-03 eta 5:25:29
epoch [21/50] batch [440/796] time 0.830 (0.832) data 0.000 (0.001) loss 2.0769 (0.8989) lr 1.5878e-03 eta 5:25:08
epoch [21/50] batch [460/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.7058 (0.9058) lr 1.5878e-03 eta 5:24:49
epoch [21/50] batch [480/796] time 0.842 (0.832) data 0.000 (0.001) loss 0.3376 (0.9078) lr 1.5878e-03 eta 5:24:30
epoch [21/50] batch [500/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.4550 (0.9094) lr 1.5878e-03 eta 5:24:13
epoch [21/50] batch [520/796] time 0.822 (0.832) data 0.000 (0.001) loss 1.1779 (0.9054) lr 1.5878e-03 eta 5:23:54
epoch [21/50] batch [540/796] time 0.817 (0.832) data 0.000 (0.001) loss 0.7097 (0.9080) lr 1.5878e-03 eta 5:23:37
epoch [21/50] batch [560/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.0836 (0.8994) lr 1.5878e-03 eta 5:23:20
epoch [21/50] batch [580/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.8636 (0.8968) lr 1.5878e-03 eta 5:23:01
epoch [21/50] batch [600/796] time 0.810 (0.832) data 0.000 (0.001) loss 0.2826 (0.8937) lr 1.5878e-03 eta 5:22:46
epoch [21/50] batch [620/796] time 0.822 (0.832) data 0.000 (0.001) loss 0.3692 (0.8931) lr 1.5878e-03 eta 5:22:28
epoch [21/50] batch [640/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.2884 (0.8862) lr 1.5878e-03 eta 5:22:14
epoch [21/50] batch [660/796] time 0.818 (0.832) data 0.000 (0.001) loss 1.2998 (0.8937) lr 1.5878e-03 eta 5:21:57
epoch [21/50] batch [680/796] time 0.817 (0.832) data 0.000 (0.001) loss 0.0872 (0.8939) lr 1.5878e-03 eta 5:21:41
epoch [21/50] batch [700/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.1687 (0.9003) lr 1.5878e-03 eta 5:21:22
epoch [21/50] batch [720/796] time 0.818 (0.832) data 0.000 (0.001) loss 1.0798 (0.8932) lr 1.5878e-03 eta 5:21:04
epoch [21/50] batch [740/796] time 0.843 (0.832) data 0.000 (0.001) loss 0.0940 (0.8905) lr 1.5878e-03 eta 5:20:47
epoch [21/50] batch [760/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.5209 (0.8895) lr 1.5878e-03 eta 5:20:30
epoch [21/50] batch [780/796] time 0.831 (0.832) data 0.000 (0.001) loss 1.0483 (0.8945) lr 1.5878e-03 eta 5:20:13
epoch [22/50] batch [20/796] time 0.817 (0.858) data 0.000 (0.028) loss 1.0199 (0.9779) lr 1.5358e-03 eta 5:29:41
epoch [22/50] batch [40/796] time 0.819 (0.845) data 0.000 (0.014) loss 0.4707 (0.9249) lr 1.5358e-03 eta 5:24:24
epoch [22/50] batch [60/796] time 0.838 (0.840) data 0.000 (0.010) loss 2.0404 (0.9002) lr 1.5358e-03 eta 5:22:28
epoch [22/50] batch [80/796] time 0.843 (0.838) data 0.000 (0.007) loss 0.9811 (0.9029) lr 1.5358e-03 eta 5:21:19
epoch [22/50] batch [100/796] time 0.840 (0.836) data 0.000 (0.006) loss 0.3013 (0.8867) lr 1.5358e-03 eta 5:20:21
epoch [22/50] batch [120/796] time 0.829 (0.835) data 0.000 (0.005) loss 1.0758 (0.8865) lr 1.5358e-03 eta 5:19:32
epoch [22/50] batch [140/796] time 0.828 (0.835) data 0.000 (0.004) loss 1.3185 (0.8787) lr 1.5358e-03 eta 5:19:07
epoch [22/50] batch [160/796] time 0.828 (0.834) data 0.000 (0.004) loss 1.1489 (0.8962) lr 1.5358e-03 eta 5:18:43
epoch [22/50] batch [180/796] time 0.838 (0.834) data 0.000 (0.003) loss 0.4005 (0.9014) lr 1.5358e-03 eta 5:18:17
epoch [22/50] batch [200/796] time 0.839 (0.833) data 0.000 (0.003) loss 0.7207 (0.8847) lr 1.5358e-03 eta 5:17:50
epoch [22/50] batch [220/796] time 0.818 (0.833) data 0.000 (0.003) loss 0.7743 (0.8781) lr 1.5358e-03 eta 5:17:24
epoch [22/50] batch [240/796] time 0.839 (0.833) data 0.000 (0.003) loss 1.4455 (0.8819) lr 1.5358e-03 eta 5:17:02
epoch [22/50] batch [260/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.9101 (0.8816) lr 1.5358e-03 eta 5:16:39
epoch [22/50] batch [280/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.9542 (0.8744) lr 1.5358e-03 eta 5:16:16
epoch [22/50] batch [300/796] time 0.841 (0.832) data 0.000 (0.002) loss 0.1988 (0.8795) lr 1.5358e-03 eta 5:15:57
epoch [22/50] batch [320/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.3336 (0.8768) lr 1.5358e-03 eta 5:15:38
epoch [22/50] batch [340/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.1370 (0.8697) lr 1.5358e-03 eta 5:15:20
epoch [22/50] batch [360/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.0805 (0.8689) lr 1.5358e-03 eta 5:15:01
epoch [22/50] batch [380/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.6626 (0.8607) lr 1.5358e-03 eta 5:14:43
epoch [22/50] batch [400/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.7544 (0.8625) lr 1.5358e-03 eta 5:14:26
epoch [22/50] batch [420/796] time 0.840 (0.832) data 0.000 (0.002) loss 0.6346 (0.8867) lr 1.5358e-03 eta 5:14:08
epoch [22/50] batch [440/796] time 0.819 (0.832) data 0.000 (0.001) loss 1.0304 (0.8719) lr 1.5358e-03 eta 5:13:50
epoch [22/50] batch [460/796] time 0.841 (0.832) data 0.000 (0.001) loss 1.4535 (0.8672) lr 1.5358e-03 eta 5:13:32
epoch [22/50] batch [480/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.2801 (0.8815) lr 1.5358e-03 eta 5:13:15
epoch [22/50] batch [500/796] time 0.840 (0.832) data 0.000 (0.001) loss 1.8394 (0.8813) lr 1.5358e-03 eta 5:12:58
epoch [22/50] batch [520/796] time 0.831 (0.831) data 0.000 (0.001) loss 1.1809 (0.8818) lr 1.5358e-03 eta 5:12:39
epoch [22/50] batch [540/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.9996 (0.8831) lr 1.5358e-03 eta 5:12:23
epoch [22/50] batch [560/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.0318 (0.8861) lr 1.5358e-03 eta 5:12:06
epoch [22/50] batch [580/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.0141 (0.8886) lr 1.5358e-03 eta 5:11:51
epoch [22/50] batch [600/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.3653 (0.8886) lr 1.5358e-03 eta 5:11:32
epoch [22/50] batch [620/796] time 0.821 (0.831) data 0.000 (0.001) loss 0.6511 (0.8865) lr 1.5358e-03 eta 5:11:15
epoch [22/50] batch [640/796] time 0.841 (0.831) data 0.000 (0.001) loss 2.7662 (0.8850) lr 1.5358e-03 eta 5:10:58
epoch [22/50] batch [660/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.7594 (0.8802) lr 1.5358e-03 eta 5:10:41
epoch [22/50] batch [680/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.8421 (0.8778) lr 1.5358e-03 eta 5:10:24
epoch [22/50] batch [700/796] time 0.837 (0.831) data 0.000 (0.001) loss 1.3114 (0.8815) lr 1.5358e-03 eta 5:10:06
epoch [22/50] batch [720/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.0802 (0.8855) lr 1.5358e-03 eta 5:09:49
epoch [22/50] batch [740/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2194 (0.8890) lr 1.5358e-03 eta 5:09:30
epoch [22/50] batch [760/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.1400 (0.8804) lr 1.5358e-03 eta 5:09:13
epoch [22/50] batch [780/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.5030 (0.8747) lr 1.5358e-03 eta 5:08:55
epoch [23/50] batch [20/796] time 0.809 (0.863) data 0.000 (0.032) loss 0.8404 (0.7969) lr 1.4818e-03 eta 5:20:28
epoch [23/50] batch [40/796] time 0.839 (0.847) data 0.000 (0.016) loss 0.2885 (0.8748) lr 1.4818e-03 eta 5:14:04
epoch [23/50] batch [60/796] time 0.817 (0.841) data 0.000 (0.011) loss 0.6512 (0.8481) lr 1.4818e-03 eta 5:11:35
epoch [23/50] batch [80/796] time 0.809 (0.839) data 0.000 (0.008) loss 0.3126 (0.8850) lr 1.4818e-03 eta 5:10:33
epoch [23/50] batch [100/796] time 0.818 (0.838) data 0.000 (0.007) loss 1.2901 (0.8784) lr 1.4818e-03 eta 5:09:44
epoch [23/50] batch [120/796] time 0.808 (0.836) data 0.000 (0.005) loss 0.3510 (0.8782) lr 1.4818e-03 eta 5:08:58
epoch [23/50] batch [140/796] time 0.838 (0.836) data 0.000 (0.005) loss 1.5439 (0.8920) lr 1.4818e-03 eta 5:08:25
epoch [23/50] batch [160/796] time 0.838 (0.835) data 0.000 (0.004) loss 0.3156 (0.9143) lr 1.4818e-03 eta 5:08:00
epoch [23/50] batch [180/796] time 0.817 (0.835) data 0.000 (0.004) loss 0.9956 (0.9265) lr 1.4818e-03 eta 5:07:33
epoch [23/50] batch [200/796] time 0.838 (0.834) data 0.000 (0.003) loss 0.0573 (0.9286) lr 1.4818e-03 eta 5:07:04
epoch [23/50] batch [220/796] time 0.838 (0.834) data 0.000 (0.003) loss 0.9015 (0.9215) lr 1.4818e-03 eta 5:06:38
epoch [23/50] batch [240/796] time 0.838 (0.834) data 0.000 (0.003) loss 0.5360 (0.9379) lr 1.4818e-03 eta 5:06:18
epoch [23/50] batch [260/796] time 0.838 (0.833) data 0.000 (0.003) loss 0.6440 (0.9330) lr 1.4818e-03 eta 5:05:57
epoch [23/50] batch [280/796] time 0.818 (0.833) data 0.000 (0.002) loss 2.8793 (0.9417) lr 1.4818e-03 eta 5:05:36
epoch [23/50] batch [300/796] time 0.830 (0.833) data 0.000 (0.002) loss 0.4245 (0.9304) lr 1.4818e-03 eta 5:05:18
epoch [23/50] batch [320/796] time 0.830 (0.833) data 0.000 (0.002) loss 2.3386 (0.9325) lr 1.4818e-03 eta 5:04:56
epoch [23/50] batch [340/796] time 0.843 (0.833) data 0.000 (0.002) loss 1.0289 (0.9119) lr 1.4818e-03 eta 5:04:34
epoch [23/50] batch [360/796] time 0.808 (0.832) data 0.000 (0.002) loss 1.9976 (0.9220) lr 1.4818e-03 eta 5:04:10
epoch [23/50] batch [380/796] time 0.842 (0.832) data 0.000 (0.002) loss 2.9404 (0.9268) lr 1.4818e-03 eta 5:03:49
epoch [23/50] batch [400/796] time 0.808 (0.832) data 0.000 (0.002) loss 0.0977 (0.9263) lr 1.4818e-03 eta 5:03:30
epoch [23/50] batch [420/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.8375 (0.9241) lr 1.4818e-03 eta 5:03:12
epoch [23/50] batch [440/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.7422 (0.9123) lr 1.4818e-03 eta 5:02:52
epoch [23/50] batch [460/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.8078 (0.8997) lr 1.4818e-03 eta 5:02:35
epoch [23/50] batch [480/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.1442 (0.8938) lr 1.4818e-03 eta 5:02:18
epoch [23/50] batch [500/796] time 0.830 (0.832) data 0.000 (0.001) loss 0.6383 (0.8925) lr 1.4818e-03 eta 5:02:02
epoch [23/50] batch [520/796] time 0.838 (0.832) data 0.000 (0.001) loss 1.4046 (0.8967) lr 1.4818e-03 eta 5:01:44
epoch [23/50] batch [540/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.5656 (0.8898) lr 1.4818e-03 eta 5:01:27
epoch [23/50] batch [560/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.1651 (0.8840) lr 1.4818e-03 eta 5:01:09
epoch [23/50] batch [580/796] time 0.840 (0.832) data 0.000 (0.001) loss 1.3149 (0.8846) lr 1.4818e-03 eta 5:00:52
epoch [23/50] batch [600/796] time 0.819 (0.832) data 0.000 (0.001) loss 0.9671 (0.8852) lr 1.4818e-03 eta 5:00:34
epoch [23/50] batch [620/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.6566 (0.8774) lr 1.4818e-03 eta 5:00:16
epoch [23/50] batch [640/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.7421 (0.8859) lr 1.4818e-03 eta 4:59:59
epoch [23/50] batch [660/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.5999 (0.8880) lr 1.4818e-03 eta 4:59:42
epoch [23/50] batch [680/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.7822 (0.8812) lr 1.4818e-03 eta 4:59:24
epoch [23/50] batch [700/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.8003 (0.8769) lr 1.4818e-03 eta 4:59:05
epoch [23/50] batch [720/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.1460 (0.8839) lr 1.4818e-03 eta 4:58:47
epoch [23/50] batch [740/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.8228 (0.8798) lr 1.4818e-03 eta 4:58:28
epoch [23/50] batch [760/796] time 0.841 (0.831) data 0.000 (0.001) loss 1.1107 (0.8747) lr 1.4818e-03 eta 4:58:10
epoch [23/50] batch [780/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.8650 (0.8726) lr 1.4818e-03 eta 4:57:54
epoch [24/50] batch [20/796] time 0.838 (0.860) data 0.000 (0.027) loss 0.8274 (0.5421) lr 1.4258e-03 eta 5:07:40
epoch [24/50] batch [40/796] time 0.838 (0.844) data 0.000 (0.014) loss 0.2708 (0.7943) lr 1.4258e-03 eta 5:01:55
epoch [24/50] batch [60/796] time 0.838 (0.840) data 0.000 (0.009) loss 1.7506 (0.8524) lr 1.4258e-03 eta 5:00:06
epoch [24/50] batch [80/796] time 0.808 (0.837) data 0.000 (0.007) loss 1.4017 (0.8948) lr 1.4258e-03 eta 4:58:48
epoch [24/50] batch [100/796] time 0.838 (0.837) data 0.000 (0.006) loss 0.1920 (0.8746) lr 1.4258e-03 eta 4:58:20
epoch [24/50] batch [120/796] time 0.839 (0.836) data 0.000 (0.005) loss 1.0175 (0.8467) lr 1.4258e-03 eta 4:57:41
epoch [24/50] batch [140/796] time 0.818 (0.835) data 0.000 (0.004) loss 0.9601 (0.8526) lr 1.4258e-03 eta 4:57:02
epoch [24/50] batch [160/796] time 0.818 (0.834) data 0.000 (0.004) loss 0.4248 (0.8574) lr 1.4258e-03 eta 4:56:34
epoch [24/50] batch [180/796] time 0.830 (0.834) data 0.000 (0.003) loss 0.5612 (0.8688) lr 1.4258e-03 eta 4:56:12
epoch [24/50] batch [200/796] time 0.828 (0.833) data 0.000 (0.003) loss 0.2940 (0.8596) lr 1.4258e-03 eta 4:55:45
epoch [24/50] batch [220/796] time 0.817 (0.833) data 0.000 (0.003) loss 2.1613 (0.8620) lr 1.4258e-03 eta 4:55:25
epoch [24/50] batch [240/796] time 0.838 (0.833) data 0.000 (0.002) loss 3.0319 (0.8669) lr 1.4258e-03 eta 4:54:57
epoch [24/50] batch [260/796] time 0.842 (0.832) data 0.000 (0.002) loss 0.5234 (0.8721) lr 1.4258e-03 eta 4:54:34
epoch [24/50] batch [280/796] time 0.837 (0.832) data 0.000 (0.002) loss 0.1391 (0.8642) lr 1.4258e-03 eta 4:54:17
epoch [24/50] batch [300/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.5422 (0.8669) lr 1.4258e-03 eta 4:53:57
epoch [24/50] batch [320/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.6709 (0.8762) lr 1.4258e-03 eta 4:53:36
epoch [24/50] batch [340/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.7780 (0.8698) lr 1.4258e-03 eta 4:53:21
epoch [24/50] batch [360/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.9195 (0.8745) lr 1.4258e-03 eta 4:53:02
epoch [24/50] batch [380/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.8031 (0.8770) lr 1.4258e-03 eta 4:52:40
epoch [24/50] batch [400/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.0819 (0.8694) lr 1.4258e-03 eta 4:52:22
epoch [24/50] batch [420/796] time 0.809 (0.832) data 0.000 (0.001) loss 0.1847 (0.8828) lr 1.4258e-03 eta 4:52:03
epoch [24/50] batch [440/796] time 0.844 (0.832) data 0.000 (0.001) loss 0.9551 (0.8908) lr 1.4258e-03 eta 4:51:49
epoch [24/50] batch [460/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.3584 (0.8901) lr 1.4258e-03 eta 4:51:29
epoch [24/50] batch [480/796] time 0.819 (0.831) data 0.000 (0.001) loss 4.3416 (0.8927) lr 1.4258e-03 eta 4:51:11
epoch [24/50] batch [500/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.7342 (0.8972) lr 1.4258e-03 eta 4:50:52
epoch [24/50] batch [520/796] time 0.811 (0.831) data 0.000 (0.001) loss 0.2338 (0.8984) lr 1.4258e-03 eta 4:50:34
epoch [24/50] batch [540/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.3565 (0.8905) lr 1.4258e-03 eta 4:50:19
epoch [24/50] batch [560/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.4371 (0.8951) lr 1.4258e-03 eta 4:50:02
epoch [24/50] batch [580/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.6106 (0.8896) lr 1.4258e-03 eta 4:49:45
epoch [24/50] batch [600/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.4690 (0.8859) lr 1.4258e-03 eta 4:49:27
epoch [24/50] batch [620/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.2663 (0.8855) lr 1.4258e-03 eta 4:49:09
epoch [24/50] batch [640/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.0797 (0.8822) lr 1.4258e-03 eta 4:48:51
epoch [24/50] batch [660/796] time 0.837 (0.831) data 0.000 (0.001) loss 2.0238 (0.8896) lr 1.4258e-03 eta 4:48:35
epoch [24/50] batch [680/796] time 0.830 (0.831) data 0.000 (0.001) loss 2.3855 (0.8867) lr 1.4258e-03 eta 4:48:19
epoch [24/50] batch [700/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.1802 (0.8780) lr 1.4258e-03 eta 4:48:02
epoch [24/50] batch [720/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.0453 (0.8763) lr 1.4258e-03 eta 4:47:45
epoch [24/50] batch [740/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.6464 (0.8725) lr 1.4258e-03 eta 4:47:29
epoch [24/50] batch [760/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.8337 (0.8767) lr 1.4258e-03 eta 4:47:12
epoch [24/50] batch [780/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.5272 (0.8779) lr 1.4258e-03 eta 4:46:54
epoch [25/50] batch [20/796] time 0.829 (0.858) data 0.000 (0.030) loss 1.6477 (1.1484) lr 1.3681e-03 eta 4:55:29
epoch [25/50] batch [40/796] time 0.818 (0.844) data 0.000 (0.015) loss 0.3328 (0.9825) lr 1.3681e-03 eta 4:50:30
epoch [25/50] batch [60/796] time 0.830 (0.839) data 0.000 (0.010) loss 1.2468 (0.9000) lr 1.3681e-03 eta 4:48:33
epoch [25/50] batch [80/796] time 0.840 (0.836) data 0.000 (0.008) loss 1.0849 (0.8879) lr 1.3681e-03 eta 4:47:17
epoch [25/50] batch [100/796] time 0.820 (0.835) data 0.000 (0.006) loss 0.4690 (0.9122) lr 1.3681e-03 eta 4:46:38
epoch [25/50] batch [120/796] time 0.840 (0.835) data 0.000 (0.005) loss 0.8091 (0.9381) lr 1.3681e-03 eta 4:46:11
epoch [25/50] batch [140/796] time 0.838 (0.834) data 0.000 (0.004) loss 0.1732 (0.9270) lr 1.3681e-03 eta 4:45:48
epoch [25/50] batch [160/796] time 0.840 (0.834) data 0.000 (0.004) loss 0.4037 (0.9362) lr 1.3681e-03 eta 4:45:22
epoch [25/50] batch [180/796] time 0.830 (0.834) data 0.000 (0.003) loss 1.4350 (0.9401) lr 1.3681e-03 eta 4:45:00
epoch [25/50] batch [200/796] time 0.829 (0.833) data 0.000 (0.003) loss 0.3583 (0.9291) lr 1.3681e-03 eta 4:44:37
epoch [25/50] batch [220/796] time 0.838 (0.833) data 0.000 (0.003) loss 0.9396 (0.9284) lr 1.3681e-03 eta 4:44:15
epoch [25/50] batch [240/796] time 0.838 (0.833) data 0.000 (0.003) loss 1.5200 (0.9356) lr 1.3681e-03 eta 4:43:55
epoch [25/50] batch [260/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.2755 (0.9395) lr 1.3681e-03 eta 4:43:32
epoch [25/50] batch [280/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.4488 (0.9376) lr 1.3681e-03 eta 4:43:12
epoch [25/50] batch [300/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.9817 (0.9288) lr 1.3681e-03 eta 4:42:56
epoch [25/50] batch [320/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.3141 (0.9392) lr 1.3681e-03 eta 4:42:39
epoch [25/50] batch [340/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.4097 (0.9296) lr 1.3681e-03 eta 4:42:20
epoch [25/50] batch [360/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.2426 (0.9155) lr 1.3681e-03 eta 4:42:01
epoch [25/50] batch [380/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.6589 (0.9165) lr 1.3681e-03 eta 4:41:41
epoch [25/50] batch [400/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.9460 (0.9046) lr 1.3681e-03 eta 4:41:24
epoch [25/50] batch [420/796] time 0.818 (0.832) data 0.000 (0.002) loss 2.0554 (0.9197) lr 1.3681e-03 eta 4:41:05
epoch [25/50] batch [440/796] time 0.819 (0.832) data 0.000 (0.002) loss 0.1501 (0.9134) lr 1.3681e-03 eta 4:40:49
epoch [25/50] batch [460/796] time 0.830 (0.832) data 0.000 (0.001) loss 2.0448 (0.9079) lr 1.3681e-03 eta 4:40:30
epoch [25/50] batch [480/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.5191 (0.9048) lr 1.3681e-03 eta 4:40:12
epoch [25/50] batch [500/796] time 0.829 (0.832) data 0.000 (0.001) loss 1.2883 (0.9102) lr 1.3681e-03 eta 4:39:53
epoch [25/50] batch [520/796] time 0.836 (0.831) data 0.000 (0.001) loss 0.4782 (0.9131) lr 1.3681e-03 eta 4:39:35
epoch [25/50] batch [540/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.3333 (0.9044) lr 1.3681e-03 eta 4:39:17
epoch [25/50] batch [560/796] time 0.810 (0.831) data 0.000 (0.001) loss 0.5971 (0.9019) lr 1.3681e-03 eta 4:38:58
epoch [25/50] batch [580/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.7571 (0.8970) lr 1.3681e-03 eta 4:38:39
epoch [25/50] batch [600/796] time 0.820 (0.831) data 0.000 (0.001) loss 0.5843 (0.8946) lr 1.3681e-03 eta 4:38:23
epoch [25/50] batch [620/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.8055 (0.8937) lr 1.3681e-03 eta 4:38:05
epoch [25/50] batch [640/796] time 0.830 (0.831) data 0.000 (0.001) loss 2.4224 (0.8995) lr 1.3681e-03 eta 4:37:48
epoch [25/50] batch [660/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.7313 (0.8910) lr 1.3681e-03 eta 4:37:29
epoch [25/50] batch [680/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.6205 (0.8862) lr 1.3681e-03 eta 4:37:12
epoch [25/50] batch [700/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.0658 (0.8813) lr 1.3681e-03 eta 4:36:56
epoch [25/50] batch [720/796] time 0.841 (0.831) data 0.000 (0.001) loss 1.1278 (0.8854) lr 1.3681e-03 eta 4:36:42
epoch [25/50] batch [740/796] time 0.831 (0.831) data 0.000 (0.001) loss 2.5099 (0.8886) lr 1.3681e-03 eta 4:36:24
epoch [25/50] batch [760/796] time 0.841 (0.831) data 0.000 (0.001) loss 2.0651 (0.8926) lr 1.3681e-03 eta 4:36:08
epoch [25/50] batch [780/796] time 0.818 (0.831) data 0.000 (0.001) loss 2.6763 (0.8974) lr 1.3681e-03 eta 4:35:51
epoch [26/50] batch [20/796] time 0.808 (0.862) data 0.000 (0.030) loss 0.8267 (0.8348) lr 1.3090e-03 eta 4:45:45
epoch [26/50] batch [40/796] time 0.842 (0.847) data 0.000 (0.015) loss 2.2595 (0.8160) lr 1.3090e-03 eta 4:40:12
epoch [26/50] batch [60/796] time 0.839 (0.841) data 0.000 (0.010) loss 1.9870 (0.8039) lr 1.3090e-03 eta 4:37:58
epoch [26/50] batch [80/796] time 0.839 (0.838) data 0.000 (0.008) loss 1.0378 (0.8720) lr 1.3090e-03 eta 4:36:51
epoch [26/50] batch [100/796] time 0.818 (0.836) data 0.000 (0.006) loss 0.3129 (0.9083) lr 1.3090e-03 eta 4:35:52
epoch [26/50] batch [120/796] time 0.838 (0.835) data 0.000 (0.005) loss 0.1175 (0.8988) lr 1.3090e-03 eta 4:35:18
epoch [26/50] batch [140/796] time 0.831 (0.835) data 0.000 (0.004) loss 0.2948 (0.9218) lr 1.3090e-03 eta 4:34:50
epoch [26/50] batch [160/796] time 0.817 (0.834) data 0.000 (0.004) loss 1.7429 (0.9414) lr 1.3090e-03 eta 4:34:20
epoch [26/50] batch [180/796] time 0.819 (0.834) data 0.000 (0.003) loss 0.7299 (0.9134) lr 1.3090e-03 eta 4:33:58
epoch [26/50] batch [200/796] time 0.817 (0.833) data 0.000 (0.003) loss 1.1342 (0.9012) lr 1.3090e-03 eta 4:33:34
epoch [26/50] batch [220/796] time 0.830 (0.833) data 0.000 (0.003) loss 0.5746 (0.8838) lr 1.3090e-03 eta 4:33:11
epoch [26/50] batch [240/796] time 0.838 (0.833) data 0.000 (0.003) loss 1.1019 (0.8792) lr 1.3090e-03 eta 4:32:49
epoch [26/50] batch [260/796] time 0.808 (0.832) data 0.000 (0.002) loss 0.3428 (0.8820) lr 1.3090e-03 eta 4:32:27
epoch [26/50] batch [280/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.8959 (0.8742) lr 1.3090e-03 eta 4:32:07
epoch [26/50] batch [300/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.4027 (0.8767) lr 1.3090e-03 eta 4:31:42
epoch [26/50] batch [320/796] time 0.821 (0.832) data 0.000 (0.002) loss 0.8462 (0.8726) lr 1.3090e-03 eta 4:31:25
epoch [26/50] batch [340/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.8458 (0.8664) lr 1.3090e-03 eta 4:31:05
epoch [26/50] batch [360/796] time 0.828 (0.831) data 0.000 (0.002) loss 1.2242 (0.8584) lr 1.3090e-03 eta 4:30:46
epoch [26/50] batch [380/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.9931 (0.8566) lr 1.3090e-03 eta 4:30:27
epoch [26/50] batch [400/796] time 0.837 (0.831) data 0.000 (0.002) loss 1.5375 (0.8466) lr 1.3090e-03 eta 4:30:09
epoch [26/50] batch [420/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.4876 (0.8503) lr 1.3090e-03 eta 4:29:55
epoch [26/50] batch [440/796] time 0.829 (0.831) data 0.000 (0.002) loss 0.2233 (0.8623) lr 1.3090e-03 eta 4:29:36
epoch [26/50] batch [460/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.4341 (0.8541) lr 1.3090e-03 eta 4:29:15
epoch [26/50] batch [480/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.7325 (0.8476) lr 1.3090e-03 eta 4:28:57
epoch [26/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.1680 (0.8571) lr 1.3090e-03 eta 4:28:41
epoch [26/50] batch [520/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.9346 (0.8544) lr 1.3090e-03 eta 4:28:22
epoch [26/50] batch [540/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.6893 (0.8551) lr 1.3090e-03 eta 4:28:04
epoch [26/50] batch [560/796] time 0.837 (0.831) data 0.000 (0.001) loss 1.5275 (0.8589) lr 1.3090e-03 eta 4:27:46
epoch [26/50] batch [580/796] time 0.819 (0.831) data 0.000 (0.001) loss 1.6267 (0.8605) lr 1.3090e-03 eta 4:27:29
epoch [26/50] batch [600/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.0730 (0.8587) lr 1.3090e-03 eta 4:27:11
epoch [26/50] batch [620/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.8499 (0.8640) lr 1.3090e-03 eta 4:26:55
epoch [26/50] batch [640/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.5301 (0.8657) lr 1.3090e-03 eta 4:26:37
epoch [26/50] batch [660/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.1648 (0.8674) lr 1.3090e-03 eta 4:26:19
epoch [26/50] batch [680/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.0722 (0.8693) lr 1.3090e-03 eta 4:26:04
epoch [26/50] batch [700/796] time 0.842 (0.831) data 0.000 (0.001) loss 0.2852 (0.8596) lr 1.3090e-03 eta 4:25:47
epoch [26/50] batch [720/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.2240 (0.8534) lr 1.3090e-03 eta 4:25:32
epoch [26/50] batch [740/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.3883 (0.8623) lr 1.3090e-03 eta 4:25:15
epoch [26/50] batch [760/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.5060 (0.8612) lr 1.3090e-03 eta 4:24:57
epoch [26/50] batch [780/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.3060 (0.8571) lr 1.3090e-03 eta 4:24:40
epoch [27/50] batch [20/796] time 0.809 (0.861) data 0.000 (0.030) loss 0.9681 (0.8239) lr 1.2487e-03 eta 4:33:46
epoch [27/50] batch [40/796] time 0.818 (0.845) data 0.000 (0.015) loss 0.5896 (0.8043) lr 1.2487e-03 eta 4:28:31
epoch [27/50] batch [60/796] time 0.817 (0.840) data 0.000 (0.010) loss 0.9206 (0.8839) lr 1.2487e-03 eta 4:26:34
epoch [27/50] batch [80/796] time 0.838 (0.837) data 0.000 (0.008) loss 0.2589 (0.8919) lr 1.2487e-03 eta 4:25:16
epoch [27/50] batch [100/796] time 0.829 (0.835) data 0.000 (0.006) loss 0.2172 (0.8748) lr 1.2487e-03 eta 4:24:26
epoch [27/50] batch [120/796] time 0.830 (0.834) data 0.000 (0.005) loss 1.2718 (0.8610) lr 1.2487e-03 eta 4:23:57
epoch [27/50] batch [140/796] time 0.820 (0.834) data 0.000 (0.005) loss 1.7056 (0.8734) lr 1.2487e-03 eta 4:23:34
epoch [27/50] batch [160/796] time 0.831 (0.834) data 0.000 (0.004) loss 0.2809 (0.8737) lr 1.2487e-03 eta 4:23:17
epoch [27/50] batch [180/796] time 0.840 (0.834) data 0.000 (0.004) loss 0.2087 (0.8785) lr 1.2487e-03 eta 4:23:03
epoch [27/50] batch [200/796] time 0.812 (0.834) data 0.000 (0.003) loss 0.7620 (0.9033) lr 1.2487e-03 eta 4:22:42
epoch [27/50] batch [220/796] time 0.810 (0.834) data 0.000 (0.003) loss 0.5622 (0.9046) lr 1.2487e-03 eta 4:22:25
epoch [27/50] batch [240/796] time 0.838 (0.833) data 0.000 (0.003) loss 1.9791 (0.9064) lr 1.2487e-03 eta 4:22:00
epoch [27/50] batch [260/796] time 0.840 (0.833) data 0.000 (0.003) loss 2.3124 (0.9153) lr 1.2487e-03 eta 4:21:39
epoch [27/50] batch [280/796] time 0.839 (0.833) data 0.000 (0.002) loss 0.3176 (0.9445) lr 1.2487e-03 eta 4:21:21
epoch [27/50] batch [300/796] time 0.843 (0.833) data 0.000 (0.002) loss 3.6301 (0.9562) lr 1.2487e-03 eta 4:21:02
epoch [27/50] batch [320/796] time 0.818 (0.833) data 0.000 (0.002) loss 1.6797 (0.9638) lr 1.2487e-03 eta 4:20:41
epoch [27/50] batch [340/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.6551 (0.9574) lr 1.2487e-03 eta 4:20:20
epoch [27/50] batch [360/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.8893 (0.9587) lr 1.2487e-03 eta 4:20:00
epoch [27/50] batch [380/796] time 0.818 (0.832) data 0.000 (0.002) loss 3.0994 (0.9425) lr 1.2487e-03 eta 4:19:43
epoch [27/50] batch [400/796] time 0.817 (0.832) data 0.000 (0.002) loss 1.8132 (0.9574) lr 1.2487e-03 eta 4:19:26
epoch [27/50] batch [420/796] time 0.837 (0.832) data 0.000 (0.002) loss 1.1768 (0.9490) lr 1.2487e-03 eta 4:19:06
epoch [27/50] batch [440/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.8786 (0.9442) lr 1.2487e-03 eta 4:18:50
epoch [27/50] batch [460/796] time 0.808 (0.832) data 0.000 (0.002) loss 0.9788 (0.9383) lr 1.2487e-03 eta 4:18:31
epoch [27/50] batch [480/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.8153 (0.9374) lr 1.2487e-03 eta 4:18:10
epoch [27/50] batch [500/796] time 0.828 (0.832) data 0.000 (0.001) loss 1.0649 (0.9339) lr 1.2487e-03 eta 4:17:53
epoch [27/50] batch [520/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.3983 (0.9382) lr 1.2487e-03 eta 4:17:34
epoch [27/50] batch [540/796] time 0.840 (0.832) data 0.000 (0.001) loss 1.0963 (0.9386) lr 1.2487e-03 eta 4:17:17
epoch [27/50] batch [560/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.2376 (0.9355) lr 1.2487e-03 eta 4:17:00
epoch [27/50] batch [580/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.3495 (0.9330) lr 1.2487e-03 eta 4:16:41
epoch [27/50] batch [600/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.4811 (0.9304) lr 1.2487e-03 eta 4:16:22
epoch [27/50] batch [620/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.1882 (0.9212) lr 1.2487e-03 eta 4:16:04
epoch [27/50] batch [640/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.6224 (0.9173) lr 1.2487e-03 eta 4:15:46
epoch [27/50] batch [660/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.1077 (0.9131) lr 1.2487e-03 eta 4:15:28
epoch [27/50] batch [680/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.5155 (0.9021) lr 1.2487e-03 eta 4:15:11
epoch [27/50] batch [700/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.6542 (0.8985) lr 1.2487e-03 eta 4:14:54
epoch [27/50] batch [720/796] time 0.831 (0.831) data 0.000 (0.001) loss 0.7148 (0.8923) lr 1.2487e-03 eta 4:14:36
epoch [27/50] batch [740/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.1715 (0.8853) lr 1.2487e-03 eta 4:14:19
epoch [27/50] batch [760/796] time 0.809 (0.831) data 0.000 (0.001) loss 0.5102 (0.8848) lr 1.2487e-03 eta 4:14:02
epoch [27/50] batch [780/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.1537 (0.8786) lr 1.2487e-03 eta 4:13:45
epoch [28/50] batch [20/796] time 0.838 (0.862) data 0.000 (0.030) loss 1.1052 (0.8019) lr 1.1874e-03 eta 4:22:51
epoch [28/50] batch [40/796] time 0.831 (0.846) data 0.000 (0.015) loss 0.8383 (0.7539) lr 1.1874e-03 eta 4:17:35
epoch [28/50] batch [60/796] time 0.839 (0.841) data 0.000 (0.010) loss 0.8207 (0.7972) lr 1.1874e-03 eta 4:15:47
epoch [28/50] batch [80/796] time 0.838 (0.838) data 0.000 (0.008) loss 0.3677 (0.8174) lr 1.1874e-03 eta 4:14:38
epoch [28/50] batch [100/796] time 0.840 (0.836) data 0.000 (0.006) loss 0.3798 (0.7971) lr 1.1874e-03 eta 4:13:50
epoch [28/50] batch [120/796] time 0.837 (0.835) data 0.000 (0.005) loss 0.6232 (0.8288) lr 1.1874e-03 eta 4:13:14
epoch [28/50] batch [140/796] time 0.838 (0.835) data 0.000 (0.004) loss 0.0513 (0.8477) lr 1.1874e-03 eta 4:12:45
epoch [28/50] batch [160/796] time 0.808 (0.834) data 0.000 (0.004) loss 0.0524 (0.8635) lr 1.1874e-03 eta 4:12:16
epoch [28/50] batch [180/796] time 0.837 (0.833) data 0.000 (0.004) loss 0.6536 (0.8471) lr 1.1874e-03 eta 4:11:44
epoch [28/50] batch [200/796] time 0.838 (0.833) data 0.000 (0.003) loss 1.3777 (0.8499) lr 1.1874e-03 eta 4:11:16
epoch [28/50] batch [220/796] time 0.839 (0.832) data 0.000 (0.003) loss 0.0376 (0.8417) lr 1.1874e-03 eta 4:10:55
epoch [28/50] batch [240/796] time 0.845 (0.832) data 0.004 (0.003) loss 0.0559 (0.8331) lr 1.1874e-03 eta 4:10:37
epoch [28/50] batch [260/796] time 0.823 (0.832) data 0.000 (0.003) loss 0.4867 (0.8566) lr 1.1874e-03 eta 4:10:13
epoch [28/50] batch [280/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.3854 (0.8612) lr 1.1874e-03 eta 4:09:55
epoch [28/50] batch [300/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.7005 (0.8487) lr 1.1874e-03 eta 4:09:39
epoch [28/50] batch [320/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.9861 (0.8446) lr 1.1874e-03 eta 4:09:19
epoch [28/50] batch [340/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.7843 (0.8445) lr 1.1874e-03 eta 4:09:02
epoch [28/50] batch [360/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.9134 (0.8554) lr 1.1874e-03 eta 4:08:44
epoch [28/50] batch [380/796] time 0.818 (0.831) data 0.000 (0.002) loss 0.8173 (0.8594) lr 1.1874e-03 eta 4:08:25
epoch [28/50] batch [400/796] time 0.831 (0.831) data 0.000 (0.002) loss 0.2044 (0.8525) lr 1.1874e-03 eta 4:08:09
epoch [28/50] batch [420/796] time 0.817 (0.831) data 0.000 (0.002) loss 0.7136 (0.8508) lr 1.1874e-03 eta 4:07:51
epoch [28/50] batch [440/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.2377 (0.8527) lr 1.1874e-03 eta 4:07:37
epoch [28/50] batch [460/796] time 0.846 (0.832) data 0.000 (0.002) loss 0.3221 (0.8502) lr 1.1874e-03 eta 4:07:21
epoch [28/50] batch [480/796] time 0.843 (0.832) data 0.000 (0.001) loss 0.1356 (0.8511) lr 1.1874e-03 eta 4:07:04
epoch [28/50] batch [500/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.5005 (0.8522) lr 1.1874e-03 eta 4:06:46
epoch [28/50] batch [520/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.3440 (0.8539) lr 1.1874e-03 eta 4:06:29
epoch [28/50] batch [540/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.5390 (0.8541) lr 1.1874e-03 eta 4:06:09
epoch [28/50] batch [560/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.9384 (0.8543) lr 1.1874e-03 eta 4:05:52
epoch [28/50] batch [580/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.5551 (0.8495) lr 1.1874e-03 eta 4:05:34
epoch [28/50] batch [600/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.1230 (0.8509) lr 1.1874e-03 eta 4:05:18
epoch [28/50] batch [620/796] time 0.809 (0.831) data 0.000 (0.001) loss 0.4922 (0.8501) lr 1.1874e-03 eta 4:05:01
epoch [28/50] batch [640/796] time 0.808 (0.831) data 0.000 (0.001) loss 1.0306 (0.8489) lr 1.1874e-03 eta 4:04:44
epoch [28/50] batch [660/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.2463 (0.8449) lr 1.1874e-03 eta 4:04:26
epoch [28/50] batch [680/796] time 0.809 (0.831) data 0.000 (0.001) loss 0.0812 (0.8418) lr 1.1874e-03 eta 4:04:09
epoch [28/50] batch [700/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.8010 (0.8488) lr 1.1874e-03 eta 4:03:53
epoch [28/50] batch [720/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.9490 (0.8499) lr 1.1874e-03 eta 4:03:35
epoch [28/50] batch [740/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.3811 (0.8478) lr 1.1874e-03 eta 4:03:19
epoch [28/50] batch [760/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.3516 (0.8486) lr 1.1874e-03 eta 4:02:59
epoch [28/50] batch [780/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.2286 (0.8443) lr 1.1874e-03 eta 4:02:44
epoch [29/50] batch [20/796] time 0.811 (0.854) data 0.000 (0.028) loss 0.3914 (0.7008) lr 1.1253e-03 eta 4:09:03
epoch [29/50] batch [40/796] time 0.838 (0.843) data 0.000 (0.014) loss 0.6593 (0.9030) lr 1.1253e-03 eta 4:05:25
epoch [29/50] batch [60/796] time 0.839 (0.839) data 0.000 (0.009) loss 1.0205 (0.7821) lr 1.1253e-03 eta 4:03:54
epoch [29/50] batch [80/796] time 0.840 (0.837) data 0.000 (0.007) loss 0.0252 (0.8760) lr 1.1253e-03 eta 4:03:13
epoch [29/50] batch [100/796] time 0.841 (0.836) data 0.000 (0.006) loss 0.4083 (0.8753) lr 1.1253e-03 eta 4:02:38
epoch [29/50] batch [120/796] time 0.839 (0.835) data 0.000 (0.005) loss 0.2332 (0.8851) lr 1.1253e-03 eta 4:02:10
epoch [29/50] batch [140/796] time 0.839 (0.835) data 0.000 (0.004) loss 1.4549 (0.8302) lr 1.1253e-03 eta 4:01:44
epoch [29/50] batch [160/796] time 0.818 (0.834) data 0.000 (0.004) loss 0.3106 (0.8477) lr 1.1253e-03 eta 4:01:15
epoch [29/50] batch [180/796] time 0.838 (0.834) data 0.000 (0.003) loss 1.6730 (0.8531) lr 1.1253e-03 eta 4:00:48
epoch [29/50] batch [200/796] time 0.839 (0.833) data 0.000 (0.003) loss 1.2872 (0.8535) lr 1.1253e-03 eta 4:00:25
epoch [29/50] batch [220/796] time 0.830 (0.833) data 0.000 (0.003) loss 1.6294 (0.8572) lr 1.1253e-03 eta 4:00:04
epoch [29/50] batch [240/796] time 0.834 (0.833) data 0.000 (0.002) loss 2.0366 (0.8707) lr 1.1253e-03 eta 3:59:44
epoch [29/50] batch [260/796] time 0.839 (0.833) data 0.000 (0.002) loss 1.0901 (0.8569) lr 1.1253e-03 eta 3:59:24
epoch [29/50] batch [280/796] time 0.821 (0.833) data 0.000 (0.002) loss 0.7688 (0.8424) lr 1.1253e-03 eta 3:59:06
epoch [29/50] batch [300/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.4226 (0.8507) lr 1.1253e-03 eta 3:58:48
epoch [29/50] batch [320/796] time 0.810 (0.832) data 0.000 (0.002) loss 2.3623 (0.8452) lr 1.1253e-03 eta 3:58:27
epoch [29/50] batch [340/796] time 0.844 (0.832) data 0.000 (0.002) loss 0.2423 (0.8393) lr 1.1253e-03 eta 3:58:09
epoch [29/50] batch [360/796] time 0.817 (0.832) data 0.000 (0.002) loss 1.0442 (0.8458) lr 1.1253e-03 eta 3:57:49
epoch [29/50] batch [380/796] time 0.820 (0.832) data 0.000 (0.002) loss 0.0091 (0.8389) lr 1.1253e-03 eta 3:57:31
epoch [29/50] batch [400/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.6596 (0.8432) lr 1.1253e-03 eta 3:57:13
epoch [29/50] batch [420/796] time 0.831 (0.832) data 0.000 (0.001) loss 0.1549 (0.8294) lr 1.1253e-03 eta 3:56:55
epoch [29/50] batch [440/796] time 0.817 (0.832) data 0.000 (0.001) loss 0.6533 (0.8328) lr 1.1253e-03 eta 3:56:36
epoch [29/50] batch [460/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2006 (0.8396) lr 1.1253e-03 eta 3:56:18
epoch [29/50] batch [480/796] time 0.821 (0.831) data 0.000 (0.001) loss 0.2627 (0.8324) lr 1.1253e-03 eta 3:55:59
epoch [29/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.2597 (0.8312) lr 1.1253e-03 eta 3:55:42
epoch [29/50] batch [520/796] time 0.837 (0.831) data 0.000 (0.001) loss 1.0085 (0.8292) lr 1.1253e-03 eta 3:55:24
epoch [29/50] batch [540/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.5711 (0.8361) lr 1.1253e-03 eta 3:55:05
epoch [29/50] batch [560/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.1588 (0.8378) lr 1.1253e-03 eta 3:54:49
epoch [29/50] batch [580/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.1394 (0.8369) lr 1.1253e-03 eta 3:54:33
epoch [29/50] batch [600/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.6500 (0.8444) lr 1.1253e-03 eta 3:54:15
epoch [29/50] batch [620/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.2371 (0.8394) lr 1.1253e-03 eta 3:53:57
epoch [29/50] batch [640/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.0914 (0.8361) lr 1.1253e-03 eta 3:53:40
epoch [29/50] batch [660/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.8237 (0.8401) lr 1.1253e-03 eta 3:53:22
epoch [29/50] batch [680/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.7659 (0.8431) lr 1.1253e-03 eta 3:53:06
epoch [29/50] batch [700/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.2450 (0.8413) lr 1.1253e-03 eta 3:52:48
epoch [29/50] batch [720/796] time 0.843 (0.831) data 0.000 (0.001) loss 0.5839 (0.8378) lr 1.1253e-03 eta 3:52:32
epoch [29/50] batch [740/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2470 (0.8360) lr 1.1253e-03 eta 3:52:14
epoch [29/50] batch [760/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.1035 (0.8324) lr 1.1253e-03 eta 3:51:56
epoch [29/50] batch [780/796] time 0.840 (0.831) data 0.000 (0.001) loss 2.7825 (0.8419) lr 1.1253e-03 eta 3:51:40
epoch [30/50] batch [20/796] time 0.838 (0.860) data 0.000 (0.030) loss 2.4985 (0.5846) lr 1.0628e-03 eta 3:59:23
epoch [30/50] batch [40/796] time 0.817 (0.844) data 0.000 (0.015) loss 0.0943 (0.6635) lr 1.0628e-03 eta 3:54:41
epoch [30/50] batch [60/796] time 0.838 (0.840) data 0.000 (0.010) loss 0.2309 (0.7001) lr 1.0628e-03 eta 3:53:13
epoch [30/50] batch [80/796] time 0.843 (0.838) data 0.000 (0.008) loss 0.4980 (0.7234) lr 1.0628e-03 eta 3:52:17
epoch [30/50] batch [100/796] time 0.829 (0.836) data 0.000 (0.006) loss 0.3755 (0.7343) lr 1.0628e-03 eta 3:51:32
epoch [30/50] batch [120/796] time 0.837 (0.835) data 0.000 (0.005) loss 1.4674 (0.7620) lr 1.0628e-03 eta 3:51:01
epoch [30/50] batch [140/796] time 0.818 (0.835) data 0.000 (0.004) loss 0.3831 (0.8246) lr 1.0628e-03 eta 3:50:35
epoch [30/50] batch [160/796] time 0.839 (0.834) data 0.000 (0.004) loss 0.9710 (0.8405) lr 1.0628e-03 eta 3:50:11
epoch [30/50] batch [180/796] time 0.829 (0.834) data 0.000 (0.003) loss 1.7042 (0.8344) lr 1.0628e-03 eta 3:49:50
epoch [30/50] batch [200/796] time 0.843 (0.833) data 0.000 (0.003) loss 0.2886 (0.8559) lr 1.0628e-03 eta 3:49:24
epoch [30/50] batch [220/796] time 0.838 (0.833) data 0.000 (0.003) loss 0.1660 (0.8407) lr 1.0628e-03 eta 3:49:02
epoch [30/50] batch [240/796] time 0.829 (0.833) data 0.000 (0.003) loss 0.1081 (0.8420) lr 1.0628e-03 eta 3:48:39
epoch [30/50] batch [260/796] time 0.817 (0.833) data 0.000 (0.002) loss 2.1136 (0.8422) lr 1.0628e-03 eta 3:48:21
epoch [30/50] batch [280/796] time 0.837 (0.832) data 0.000 (0.002) loss 0.5930 (0.8488) lr 1.0628e-03 eta 3:48:01
epoch [30/50] batch [300/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.3961 (0.8527) lr 1.0628e-03 eta 3:47:40
epoch [30/50] batch [320/796] time 0.830 (0.832) data 0.000 (0.002) loss 1.5749 (0.8404) lr 1.0628e-03 eta 3:47:20
epoch [30/50] batch [340/796] time 0.829 (0.832) data 0.000 (0.002) loss 1.3966 (0.8364) lr 1.0628e-03 eta 3:47:00
epoch [30/50] batch [360/796] time 0.828 (0.832) data 0.000 (0.002) loss 1.7326 (0.8418) lr 1.0628e-03 eta 3:46:41
epoch [30/50] batch [380/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.1885 (0.8449) lr 1.0628e-03 eta 3:46:25
epoch [30/50] batch [400/796] time 0.817 (0.832) data 0.000 (0.002) loss 1.7462 (0.8483) lr 1.0628e-03 eta 3:46:08
epoch [30/50] batch [420/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.6973 (0.8465) lr 1.0628e-03 eta 3:45:50
epoch [30/50] batch [440/796] time 0.829 (0.831) data 0.000 (0.002) loss 1.9516 (0.8442) lr 1.0628e-03 eta 3:45:33
epoch [30/50] batch [460/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.7934 (0.8517) lr 1.0628e-03 eta 3:45:12
epoch [30/50] batch [480/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.8957 (0.8445) lr 1.0628e-03 eta 3:44:54
epoch [30/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.5300 (0.8385) lr 1.0628e-03 eta 3:44:38
epoch [30/50] batch [520/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.8917 (0.8394) lr 1.0628e-03 eta 3:44:23
epoch [30/50] batch [540/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.2969 (0.8300) lr 1.0628e-03 eta 3:44:04
epoch [30/50] batch [560/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.9860 (0.8302) lr 1.0628e-03 eta 3:43:46
epoch [30/50] batch [580/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.9630 (0.8362) lr 1.0628e-03 eta 3:43:29
epoch [30/50] batch [600/796] time 0.810 (0.831) data 0.000 (0.001) loss 0.6128 (0.8306) lr 1.0628e-03 eta 3:43:11
epoch [30/50] batch [620/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.0472 (0.8352) lr 1.0628e-03 eta 3:42:53
epoch [30/50] batch [640/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.5565 (0.8359) lr 1.0628e-03 eta 3:42:36
epoch [30/50] batch [660/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.7837 (0.8301) lr 1.0628e-03 eta 3:42:19
epoch [30/50] batch [680/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.5740 (0.8300) lr 1.0628e-03 eta 3:42:02
epoch [30/50] batch [700/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.9060 (0.8273) lr 1.0628e-03 eta 3:41:44
epoch [30/50] batch [720/796] time 0.820 (0.831) data 0.000 (0.001) loss 0.3631 (0.8277) lr 1.0628e-03 eta 3:41:27
epoch [30/50] batch [740/796] time 0.840 (0.831) data 0.000 (0.001) loss 3.5346 (0.8353) lr 1.0628e-03 eta 3:41:11
epoch [30/50] batch [760/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.2018 (0.8366) lr 1.0628e-03 eta 3:40:54
epoch [30/50] batch [780/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.6418 (0.8368) lr 1.0628e-03 eta 3:40:37
epoch [31/50] batch [20/796] time 0.828 (0.857) data 0.000 (0.027) loss 1.8489 (1.0272) lr 1.0000e-03 eta 3:47:07
epoch [31/50] batch [40/796] time 0.838 (0.844) data 0.000 (0.014) loss 0.2363 (1.0222) lr 1.0000e-03 eta 3:43:24
epoch [31/50] batch [60/796] time 0.840 (0.840) data 0.000 (0.009) loss 0.8115 (0.9327) lr 1.0000e-03 eta 3:42:07
epoch [31/50] batch [80/796] time 0.838 (0.838) data 0.000 (0.007) loss 0.9268 (0.8983) lr 1.0000e-03 eta 3:41:11
epoch [31/50] batch [100/796] time 0.839 (0.836) data 0.000 (0.006) loss 0.6945 (0.8951) lr 1.0000e-03 eta 3:40:26
epoch [31/50] batch [120/796] time 0.830 (0.835) data 0.000 (0.005) loss 1.5115 (0.9203) lr 1.0000e-03 eta 3:39:49
epoch [31/50] batch [140/796] time 0.818 (0.834) data 0.000 (0.004) loss 0.0219 (0.8828) lr 1.0000e-03 eta 3:39:21
epoch [31/50] batch [160/796] time 0.838 (0.833) data 0.000 (0.004) loss 0.5931 (0.8977) lr 1.0000e-03 eta 3:38:52
epoch [31/50] batch [180/796] time 0.838 (0.833) data 0.000 (0.003) loss 0.3094 (0.8787) lr 1.0000e-03 eta 3:38:29
epoch [31/50] batch [200/796] time 0.830 (0.832) data 0.000 (0.003) loss 0.9502 (0.8840) lr 1.0000e-03 eta 3:38:06
epoch [31/50] batch [220/796] time 0.838 (0.833) data 0.000 (0.003) loss 0.6734 (0.8909) lr 1.0000e-03 eta 3:37:51
epoch [31/50] batch [240/796] time 0.831 (0.832) data 0.000 (0.002) loss 0.4219 (0.8829) lr 1.0000e-03 eta 3:37:28
epoch [31/50] batch [260/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.0387 (0.8666) lr 1.0000e-03 eta 3:37:08
epoch [31/50] batch [280/796] time 0.823 (0.832) data 0.000 (0.002) loss 0.5362 (0.8665) lr 1.0000e-03 eta 3:36:50
epoch [31/50] batch [300/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.3328 (0.8678) lr 1.0000e-03 eta 3:36:31
epoch [31/50] batch [320/796] time 0.818 (0.832) data 0.000 (0.002) loss 3.2936 (0.8811) lr 1.0000e-03 eta 3:36:13
epoch [31/50] batch [340/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.8009 (0.8860) lr 1.0000e-03 eta 3:35:57
epoch [31/50] batch [360/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.2056 (0.8711) lr 1.0000e-03 eta 3:35:39
epoch [31/50] batch [380/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.5812 (0.8625) lr 1.0000e-03 eta 3:35:22
epoch [31/50] batch [400/796] time 0.839 (0.831) data 0.000 (0.002) loss 2.6694 (0.8613) lr 1.0000e-03 eta 3:35:04
epoch [31/50] batch [420/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.8461 (0.8550) lr 1.0000e-03 eta 3:34:47
epoch [31/50] batch [440/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.4179 (0.8600) lr 1.0000e-03 eta 3:34:30
epoch [31/50] batch [460/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.0936 (0.8593) lr 1.0000e-03 eta 3:34:11
epoch [31/50] batch [480/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.4320 (0.8512) lr 1.0000e-03 eta 3:33:52
epoch [31/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.0235 (0.8461) lr 1.0000e-03 eta 3:33:33
epoch [31/50] batch [520/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.5884 (0.8413) lr 1.0000e-03 eta 3:33:16
epoch [31/50] batch [540/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.2642 (0.8373) lr 1.0000e-03 eta 3:32:58
epoch [31/50] batch [560/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.1682 (0.8294) lr 1.0000e-03 eta 3:32:39
epoch [31/50] batch [580/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.3091 (0.8282) lr 1.0000e-03 eta 3:32:21
epoch [31/50] batch [600/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.5946 (0.8307) lr 1.0000e-03 eta 3:32:04
epoch [31/50] batch [620/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.0558 (0.8355) lr 1.0000e-03 eta 3:31:48
epoch [31/50] batch [640/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.0350 (0.8278) lr 1.0000e-03 eta 3:31:31
epoch [31/50] batch [660/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.9260 (0.8274) lr 1.0000e-03 eta 3:31:15
epoch [31/50] batch [680/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.1557 (0.8289) lr 1.0000e-03 eta 3:31:00
epoch [31/50] batch [700/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.1068 (0.8260) lr 1.0000e-03 eta 3:30:42
epoch [31/50] batch [720/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.2007 (0.8283) lr 1.0000e-03 eta 3:30:25
epoch [31/50] batch [740/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.9240 (0.8280) lr 1.0000e-03 eta 3:30:09
epoch [31/50] batch [760/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.7732 (0.8259) lr 1.0000e-03 eta 3:29:52
epoch [31/50] batch [780/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.7057 (0.8256) lr 1.0000e-03 eta 3:29:35
epoch [32/50] batch [20/796] time 0.838 (0.862) data 0.000 (0.030) loss 0.7213 (0.5728) lr 9.3721e-04 eta 3:37:04
epoch [32/50] batch [40/796] time 0.809 (0.846) data 0.000 (0.015) loss 0.4622 (0.6671) lr 9.3721e-04 eta 3:32:44
epoch [32/50] batch [60/796] time 0.837 (0.841) data 0.000 (0.010) loss 0.4754 (0.6457) lr 9.3721e-04 eta 3:31:13
epoch [32/50] batch [80/796] time 0.839 (0.839) data 0.000 (0.008) loss 0.3411 (0.6483) lr 9.3721e-04 eta 3:30:15
epoch [32/50] batch [100/796] time 0.818 (0.837) data 0.000 (0.006) loss 0.1985 (0.7404) lr 9.3721e-04 eta 3:29:33
epoch [32/50] batch [120/796] time 0.817 (0.836) data 0.000 (0.005) loss 0.2548 (0.7416) lr 9.3721e-04 eta 3:29:02
epoch [32/50] batch [140/796] time 0.838 (0.835) data 0.000 (0.005) loss 0.8001 (0.7412) lr 9.3721e-04 eta 3:28:32
epoch [32/50] batch [160/796] time 0.838 (0.834) data 0.000 (0.004) loss 3.2608 (0.7727) lr 9.3721e-04 eta 3:28:06
epoch [32/50] batch [180/796] time 0.840 (0.834) data 0.000 (0.004) loss 1.6043 (0.8062) lr 9.3721e-04 eta 3:27:43
epoch [32/50] batch [200/796] time 0.818 (0.834) data 0.000 (0.003) loss 0.7086 (0.8029) lr 9.3721e-04 eta 3:27:21
epoch [32/50] batch [220/796] time 0.817 (0.833) data 0.000 (0.003) loss 1.5775 (0.8133) lr 9.3721e-04 eta 3:26:59
epoch [32/50] batch [240/796] time 0.841 (0.833) data 0.000 (0.003) loss 1.1340 (0.8059) lr 9.3721e-04 eta 3:26:40
epoch [32/50] batch [260/796] time 0.817 (0.833) data 0.000 (0.002) loss 1.7772 (0.8207) lr 9.3721e-04 eta 3:26:20
epoch [32/50] batch [280/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.0991 (0.8163) lr 9.3721e-04 eta 3:26:00
epoch [32/50] batch [300/796] time 0.817 (0.833) data 0.000 (0.002) loss 0.6793 (0.8241) lr 9.3721e-04 eta 3:25:41
epoch [32/50] batch [320/796] time 0.834 (0.832) data 0.000 (0.002) loss 1.3292 (0.8261) lr 9.3721e-04 eta 3:25:19
epoch [32/50] batch [340/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.2608 (0.8178) lr 9.3721e-04 eta 3:25:00
epoch [32/50] batch [360/796] time 0.837 (0.832) data 0.000 (0.002) loss 0.2927 (0.8256) lr 9.3721e-04 eta 3:24:41
epoch [32/50] batch [380/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.4906 (0.8304) lr 9.3721e-04 eta 3:24:24
epoch [32/50] batch [400/796] time 0.829 (0.832) data 0.000 (0.002) loss 1.6671 (0.8346) lr 9.3721e-04 eta 3:24:05
epoch [32/50] batch [420/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.0829 (0.8309) lr 9.3721e-04 eta 3:23:47
epoch [32/50] batch [440/796] time 0.839 (0.831) data 0.000 (0.002) loss 1.0643 (0.8477) lr 9.3721e-04 eta 3:23:29
epoch [32/50] batch [460/796] time 0.817 (0.832) data 0.000 (0.001) loss 2.5580 (0.8402) lr 9.3721e-04 eta 3:23:13
epoch [32/50] batch [480/796] time 0.831 (0.831) data 0.000 (0.001) loss 1.5610 (0.8375) lr 9.3721e-04 eta 3:22:56
epoch [32/50] batch [500/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.1620 (0.8327) lr 9.3721e-04 eta 3:22:38
epoch [32/50] batch [520/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.4960 (0.8289) lr 9.3721e-04 eta 3:22:19
epoch [32/50] batch [540/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2855 (0.8299) lr 9.3721e-04 eta 3:22:05
epoch [32/50] batch [560/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.8293 (0.8254) lr 9.3721e-04 eta 3:21:47
epoch [32/50] batch [580/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.7284 (0.8232) lr 9.3721e-04 eta 3:21:30
epoch [32/50] batch [600/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.1931 (0.8300) lr 9.3721e-04 eta 3:21:12
epoch [32/50] batch [620/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.0939 (0.8189) lr 9.3721e-04 eta 3:20:54
epoch [32/50] batch [640/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.5370 (0.8192) lr 9.3721e-04 eta 3:20:36
epoch [32/50] batch [660/796] time 0.844 (0.831) data 0.000 (0.001) loss 0.9894 (0.8187) lr 9.3721e-04 eta 3:20:21
epoch [32/50] batch [680/796] time 0.809 (0.831) data 0.000 (0.001) loss 0.5391 (0.8151) lr 9.3721e-04 eta 3:20:05
epoch [32/50] batch [700/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2853 (0.8198) lr 9.3721e-04 eta 3:19:49
epoch [32/50] batch [720/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.0159 (0.8225) lr 9.3721e-04 eta 3:19:31
epoch [32/50] batch [740/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.1422 (0.8189) lr 9.3721e-04 eta 3:19:14
epoch [32/50] batch [760/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.5083 (0.8197) lr 9.3721e-04 eta 3:18:58
epoch [32/50] batch [780/796] time 0.809 (0.831) data 0.000 (0.001) loss 0.3470 (0.8223) lr 9.3721e-04 eta 3:18:40
epoch [33/50] batch [20/796] time 0.838 (0.861) data 0.000 (0.029) loss 0.4181 (0.9459) lr 8.7467e-04 eta 3:25:16
epoch [33/50] batch [40/796] time 0.829 (0.845) data 0.000 (0.014) loss 1.0854 (0.8952) lr 8.7467e-04 eta 3:21:17
epoch [33/50] batch [60/796] time 0.828 (0.840) data 0.000 (0.010) loss 0.3626 (0.8747) lr 8.7467e-04 eta 3:19:49
epoch [33/50] batch [80/796] time 0.839 (0.838) data 0.000 (0.007) loss 0.5592 (0.8254) lr 8.7467e-04 eta 3:18:54
epoch [33/50] batch [100/796] time 0.842 (0.836) data 0.000 (0.006) loss 0.6749 (0.7862) lr 8.7467e-04 eta 3:18:16
epoch [33/50] batch [120/796] time 0.831 (0.835) data 0.000 (0.005) loss 0.3633 (0.7722) lr 8.7467e-04 eta 3:17:42
epoch [33/50] batch [140/796] time 0.818 (0.834) data 0.000 (0.004) loss 0.4331 (0.8094) lr 8.7467e-04 eta 3:17:11
epoch [33/50] batch [160/796] time 0.839 (0.834) data 0.000 (0.004) loss 1.2282 (0.8357) lr 8.7467e-04 eta 3:16:50
epoch [33/50] batch [180/796] time 0.839 (0.833) data 0.000 (0.003) loss 0.1114 (0.8391) lr 8.7467e-04 eta 3:16:32
epoch [33/50] batch [200/796] time 0.830 (0.833) data 0.000 (0.003) loss 0.8101 (0.8543) lr 8.7467e-04 eta 3:16:15
epoch [33/50] batch [220/796] time 0.818 (0.833) data 0.000 (0.003) loss 0.4260 (0.8615) lr 8.7467e-04 eta 3:15:56
epoch [33/50] batch [240/796] time 0.818 (0.833) data 0.000 (0.003) loss 0.0220 (0.8639) lr 8.7467e-04 eta 3:15:39
epoch [33/50] batch [260/796] time 0.817 (0.833) data 0.000 (0.002) loss 1.9438 (0.8568) lr 8.7467e-04 eta 3:15:19
epoch [33/50] batch [280/796] time 0.830 (0.833) data 0.000 (0.002) loss 1.8955 (0.8417) lr 8.7467e-04 eta 3:14:57
epoch [33/50] batch [300/796] time 0.840 (0.833) data 0.000 (0.002) loss 0.0940 (0.8347) lr 8.7467e-04 eta 3:14:39
epoch [33/50] batch [320/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.8306 (0.8305) lr 8.7467e-04 eta 3:14:19
epoch [33/50] batch [340/796] time 0.840 (0.832) data 0.000 (0.002) loss 0.1559 (0.8222) lr 8.7467e-04 eta 3:14:00
epoch [33/50] batch [360/796] time 0.838 (0.832) data 0.000 (0.002) loss 3.0444 (0.8240) lr 8.7467e-04 eta 3:13:40
epoch [33/50] batch [380/796] time 0.828 (0.832) data 0.000 (0.002) loss 0.1192 (0.8156) lr 8.7467e-04 eta 3:13:22
epoch [33/50] batch [400/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.1240 (0.8206) lr 8.7467e-04 eta 3:13:04
epoch [33/50] batch [420/796] time 0.836 (0.832) data 0.000 (0.002) loss 1.8040 (0.8277) lr 8.7467e-04 eta 3:12:44
epoch [33/50] batch [440/796] time 0.831 (0.831) data 0.000 (0.001) loss 0.1579 (0.8355) lr 8.7467e-04 eta 3:12:25
epoch [33/50] batch [460/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.3458 (0.8405) lr 8.7467e-04 eta 3:12:07
epoch [33/50] batch [480/796] time 0.809 (0.831) data 0.000 (0.001) loss 0.5916 (0.8400) lr 8.7467e-04 eta 3:11:49
epoch [33/50] batch [500/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.3794 (0.8363) lr 8.7467e-04 eta 3:11:31
epoch [33/50] batch [520/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.8221 (0.8482) lr 8.7467e-04 eta 3:11:12
epoch [33/50] batch [540/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.0798 (0.8440) lr 8.7467e-04 eta 3:10:56
epoch [33/50] batch [560/796] time 0.841 (0.831) data 0.000 (0.001) loss 1.3144 (0.8445) lr 8.7467e-04 eta 3:10:38
epoch [33/50] batch [580/796] time 0.819 (0.831) data 0.000 (0.001) loss 2.1526 (0.8450) lr 8.7467e-04 eta 3:10:20
epoch [33/50] batch [600/796] time 0.834 (0.831) data 0.000 (0.001) loss 0.1811 (0.8478) lr 8.7467e-04 eta 3:10:03
epoch [33/50] batch [620/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.5200 (0.8479) lr 8.7467e-04 eta 3:09:46
epoch [33/50] batch [640/796] time 0.841 (0.831) data 0.000 (0.001) loss 0.0569 (0.8521) lr 8.7467e-04 eta 3:09:30
epoch [33/50] batch [660/796] time 0.836 (0.831) data 0.000 (0.001) loss 0.2427 (0.8443) lr 8.7467e-04 eta 3:09:13
epoch [33/50] batch [680/796] time 0.811 (0.831) data 0.000 (0.001) loss 1.3698 (0.8458) lr 8.7467e-04 eta 3:08:55
epoch [33/50] batch [700/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.0131 (0.8402) lr 8.7467e-04 eta 3:08:39
epoch [33/50] batch [720/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.0828 (0.8438) lr 8.7467e-04 eta 3:08:21
epoch [33/50] batch [740/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.1144 (0.8387) lr 8.7467e-04 eta 3:08:04
epoch [33/50] batch [760/796] time 0.837 (0.830) data 0.000 (0.001) loss 1.1903 (0.8490) lr 8.7467e-04 eta 3:07:47
epoch [33/50] batch [780/796] time 0.837 (0.830) data 0.000 (0.001) loss 0.2461 (0.8405) lr 8.7467e-04 eta 3:07:31
epoch [34/50] batch [20/796] time 0.817 (0.858) data 0.000 (0.027) loss 0.7461 (0.6704) lr 8.1262e-04 eta 3:13:16
epoch [34/50] batch [40/796] time 0.829 (0.843) data 0.000 (0.014) loss 0.5869 (0.7028) lr 8.1262e-04 eta 3:09:40
epoch [34/50] batch [60/796] time 0.829 (0.839) data 0.000 (0.009) loss 0.6136 (0.7347) lr 8.1262e-04 eta 3:08:23
epoch [34/50] batch [80/796] time 0.838 (0.837) data 0.000 (0.007) loss 0.4599 (0.7551) lr 8.1262e-04 eta 3:07:39
epoch [34/50] batch [100/796] time 0.838 (0.835) data 0.000 (0.006) loss 0.8754 (0.7050) lr 8.1262e-04 eta 3:06:57
epoch [34/50] batch [120/796] time 0.819 (0.834) data 0.000 (0.005) loss 0.8330 (0.7354) lr 8.1262e-04 eta 3:06:27
epoch [34/50] batch [140/796] time 0.818 (0.834) data 0.000 (0.004) loss 0.7114 (0.7294) lr 8.1262e-04 eta 3:06:06
epoch [34/50] batch [160/796] time 0.843 (0.833) data 0.000 (0.004) loss 0.2324 (0.7801) lr 8.1262e-04 eta 3:05:44
epoch [34/50] batch [180/796] time 0.837 (0.833) data 0.000 (0.003) loss 0.1110 (0.7642) lr 8.1262e-04 eta 3:05:21
epoch [34/50] batch [200/796] time 0.830 (0.833) data 0.000 (0.003) loss 0.5191 (0.7691) lr 8.1262e-04 eta 3:04:59
epoch [34/50] batch [220/796] time 0.829 (0.832) data 0.000 (0.003) loss 0.8093 (0.7712) lr 8.1262e-04 eta 3:04:38
epoch [34/50] batch [240/796] time 0.841 (0.833) data 0.000 (0.002) loss 1.3148 (0.8012) lr 8.1262e-04 eta 3:04:26
epoch [34/50] batch [260/796] time 0.842 (0.833) data 0.000 (0.002) loss 0.9847 (0.8123) lr 8.1262e-04 eta 3:04:12
epoch [34/50] batch [280/796] time 0.840 (0.833) data 0.000 (0.002) loss 0.7517 (0.8122) lr 8.1262e-04 eta 3:03:56
epoch [34/50] batch [300/796] time 0.842 (0.833) data 0.000 (0.002) loss 0.3761 (0.8049) lr 8.1262e-04 eta 3:03:39
epoch [34/50] batch [320/796] time 0.840 (0.833) data 0.000 (0.002) loss 1.1231 (0.8228) lr 8.1262e-04 eta 3:03:26
epoch [34/50] batch [340/796] time 0.839 (0.833) data 0.000 (0.002) loss 2.6085 (0.8170) lr 8.1262e-04 eta 3:03:09
epoch [34/50] batch [360/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.6536 (0.8221) lr 8.1262e-04 eta 3:02:51
epoch [34/50] batch [380/796] time 0.839 (0.833) data 0.000 (0.002) loss 0.0873 (0.8145) lr 8.1262e-04 eta 3:02:31
epoch [34/50] batch [400/796] time 0.817 (0.833) data 0.000 (0.002) loss 0.2573 (0.8172) lr 8.1262e-04 eta 3:02:13
epoch [34/50] batch [420/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.1257 (0.8265) lr 8.1262e-04 eta 3:01:55
epoch [34/50] batch [440/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.2865 (0.8200) lr 8.1262e-04 eta 3:01:38
epoch [34/50] batch [460/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.9068 (0.8244) lr 8.1262e-04 eta 3:01:20
epoch [34/50] batch [480/796] time 0.841 (0.832) data 0.000 (0.001) loss 0.7101 (0.8291) lr 8.1262e-04 eta 3:01:04
epoch [34/50] batch [500/796] time 0.841 (0.832) data 0.000 (0.001) loss 1.0181 (0.8262) lr 8.1262e-04 eta 3:00:48
epoch [34/50] batch [520/796] time 0.829 (0.832) data 0.000 (0.001) loss 1.0992 (0.8172) lr 8.1262e-04 eta 3:00:29
epoch [34/50] batch [540/796] time 0.839 (0.832) data 0.000 (0.001) loss 1.1990 (0.8207) lr 8.1262e-04 eta 3:00:11
epoch [34/50] batch [560/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.3307 (0.8155) lr 8.1262e-04 eta 2:59:53
epoch [34/50] batch [580/796] time 0.831 (0.832) data 0.000 (0.001) loss 0.1954 (0.8176) lr 8.1262e-04 eta 2:59:36
epoch [34/50] batch [600/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.7070 (0.8149) lr 8.1262e-04 eta 2:59:17
epoch [34/50] batch [620/796] time 0.839 (0.832) data 0.000 (0.001) loss 1.0509 (0.8124) lr 8.1262e-04 eta 2:59:00
epoch [34/50] batch [640/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.2353 (0.8135) lr 8.1262e-04 eta 2:58:44
epoch [34/50] batch [660/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.4633 (0.8086) lr 8.1262e-04 eta 2:58:26
epoch [34/50] batch [680/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.5916 (0.8166) lr 8.1262e-04 eta 2:58:09
epoch [34/50] batch [700/796] time 0.830 (0.832) data 0.000 (0.001) loss 1.2062 (0.8182) lr 8.1262e-04 eta 2:57:51
epoch [34/50] batch [720/796] time 0.839 (0.832) data 0.000 (0.001) loss 2.1878 (0.8177) lr 8.1262e-04 eta 2:57:35
epoch [34/50] batch [740/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.4773 (0.8232) lr 8.1262e-04 eta 2:57:17
epoch [34/50] batch [760/796] time 0.819 (0.832) data 0.000 (0.001) loss 0.2637 (0.8247) lr 8.1262e-04 eta 2:57:00
epoch [34/50] batch [780/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.4229 (0.8264) lr 8.1262e-04 eta 2:56:45
epoch [35/50] batch [20/796] time 0.809 (0.857) data 0.000 (0.029) loss 1.5303 (0.8823) lr 7.5131e-04 eta 3:01:34
epoch [35/50] batch [40/796] time 0.838 (0.843) data 0.000 (0.014) loss 1.7838 (0.8779) lr 7.5131e-04 eta 2:58:26
epoch [35/50] batch [60/796] time 0.818 (0.838) data 0.000 (0.010) loss 1.3009 (0.8693) lr 7.5131e-04 eta 2:57:08
epoch [35/50] batch [80/796] time 0.830 (0.836) data 0.000 (0.007) loss 1.0536 (0.8674) lr 7.5131e-04 eta 2:56:24
epoch [35/50] batch [100/796] time 0.840 (0.835) data 0.000 (0.006) loss 0.2593 (0.8627) lr 7.5131e-04 eta 2:55:47
epoch [35/50] batch [120/796] time 0.838 (0.834) data 0.000 (0.005) loss 0.5655 (0.8454) lr 7.5131e-04 eta 2:55:22
epoch [35/50] batch [140/796] time 0.838 (0.833) data 0.000 (0.004) loss 2.0137 (0.8164) lr 7.5131e-04 eta 2:54:58
epoch [35/50] batch [160/796] time 0.838 (0.833) data 0.000 (0.004) loss 0.7647 (0.7985) lr 7.5131e-04 eta 2:54:36
epoch [35/50] batch [180/796] time 0.842 (0.833) data 0.000 (0.003) loss 0.8128 (0.7950) lr 7.5131e-04 eta 2:54:15
epoch [35/50] batch [200/796] time 0.839 (0.832) data 0.000 (0.003) loss 2.2783 (0.8119) lr 7.5131e-04 eta 2:53:56
epoch [35/50] batch [220/796] time 0.830 (0.832) data 0.000 (0.003) loss 1.7087 (0.8413) lr 7.5131e-04 eta 2:53:38
epoch [35/50] batch [240/796] time 0.831 (0.832) data 0.000 (0.003) loss 0.1654 (0.8403) lr 7.5131e-04 eta 2:53:21
epoch [35/50] batch [260/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.2890 (0.8430) lr 7.5131e-04 eta 2:53:03
epoch [35/50] batch [280/796] time 0.840 (0.832) data 0.000 (0.002) loss 0.9236 (0.8492) lr 7.5131e-04 eta 2:52:45
epoch [35/50] batch [300/796] time 0.831 (0.832) data 0.000 (0.002) loss 0.6399 (0.8555) lr 7.5131e-04 eta 2:52:30
epoch [35/50] batch [320/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.6082 (0.8613) lr 7.5131e-04 eta 2:52:10
epoch [35/50] batch [340/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.3519 (0.8576) lr 7.5131e-04 eta 2:51:52
epoch [35/50] batch [360/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.3367 (0.8412) lr 7.5131e-04 eta 2:51:33
epoch [35/50] batch [380/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.7352 (0.8621) lr 7.5131e-04 eta 2:51:17
epoch [35/50] batch [400/796] time 0.831 (0.832) data 0.000 (0.002) loss 0.7628 (0.8594) lr 7.5131e-04 eta 2:51:01
epoch [35/50] batch [420/796] time 0.829 (0.832) data 0.000 (0.002) loss 1.2488 (0.8513) lr 7.5131e-04 eta 2:50:43
epoch [35/50] batch [440/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.7053 (0.8523) lr 7.5131e-04 eta 2:50:26
epoch [35/50] batch [460/796] time 0.840 (0.832) data 0.000 (0.001) loss 1.1391 (0.8501) lr 7.5131e-04 eta 2:50:07
epoch [35/50] batch [480/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.9953 (0.8391) lr 7.5131e-04 eta 2:49:50
epoch [35/50] batch [500/796] time 0.812 (0.831) data 0.000 (0.001) loss 0.9087 (0.8364) lr 7.5131e-04 eta 2:49:31
epoch [35/50] batch [520/796] time 0.832 (0.831) data 0.000 (0.001) loss 1.5892 (0.8333) lr 7.5131e-04 eta 2:49:14
epoch [35/50] batch [540/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.0117 (0.8475) lr 7.5131e-04 eta 2:48:56
epoch [35/50] batch [560/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.0710 (0.8410) lr 7.5131e-04 eta 2:48:39
epoch [35/50] batch [580/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.0432 (0.8411) lr 7.5131e-04 eta 2:48:22
epoch [35/50] batch [600/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.4701 (0.8341) lr 7.5131e-04 eta 2:48:06
epoch [35/50] batch [620/796] time 0.841 (0.831) data 0.000 (0.001) loss 0.5380 (0.8315) lr 7.5131e-04 eta 2:47:50
epoch [35/50] batch [640/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.0148 (0.8211) lr 7.5131e-04 eta 2:47:33
epoch [35/50] batch [660/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.9652 (0.8161) lr 7.5131e-04 eta 2:47:17
epoch [35/50] batch [680/796] time 0.842 (0.831) data 0.000 (0.001) loss 2.2025 (0.8196) lr 7.5131e-04 eta 2:47:00
epoch [35/50] batch [700/796] time 0.820 (0.831) data 0.000 (0.001) loss 0.4011 (0.8169) lr 7.5131e-04 eta 2:46:43
epoch [35/50] batch [720/796] time 0.841 (0.831) data 0.000 (0.001) loss 0.1610 (0.8210) lr 7.5131e-04 eta 2:46:29
epoch [35/50] batch [740/796] time 0.820 (0.831) data 0.000 (0.001) loss 1.3650 (0.8169) lr 7.5131e-04 eta 2:46:13
epoch [35/50] batch [760/796] time 0.820 (0.832) data 0.000 (0.001) loss 2.2094 (0.8210) lr 7.5131e-04 eta 2:45:58
epoch [35/50] batch [780/796] time 0.834 (0.832) data 0.000 (0.001) loss 0.8212 (0.8230) lr 7.5131e-04 eta 2:45:42
epoch [36/50] batch [20/796] time 0.835 (0.870) data 0.000 (0.032) loss 1.8183 (0.9666) lr 6.9098e-04 eta 2:52:46
epoch [36/50] batch [40/796] time 0.848 (0.853) data 0.000 (0.016) loss 1.1258 (0.7926) lr 6.9098e-04 eta 2:49:07
epoch [36/50] batch [60/796] time 0.842 (0.846) data 0.001 (0.011) loss 1.0682 (0.8243) lr 6.9098e-04 eta 2:47:32
epoch [36/50] batch [80/796] time 0.834 (0.843) data 0.000 (0.008) loss 0.8198 (0.8545) lr 6.9098e-04 eta 2:46:37
epoch [36/50] batch [100/796] time 0.842 (0.841) data 0.000 (0.007) loss 0.5832 (0.8088) lr 6.9098e-04 eta 2:45:59
epoch [36/50] batch [120/796] time 0.844 (0.840) data 0.000 (0.006) loss 0.4158 (0.8214) lr 6.9098e-04 eta 2:45:25
epoch [36/50] batch [140/796] time 0.844 (0.839) data 0.000 (0.005) loss 0.4989 (0.8161) lr 6.9098e-04 eta 2:44:55
epoch [36/50] batch [160/796] time 0.819 (0.838) data 0.000 (0.004) loss 0.0756 (0.7959) lr 6.9098e-04 eta 2:44:32
epoch [36/50] batch [180/796] time 0.833 (0.837) data 0.000 (0.004) loss 0.2634 (0.7782) lr 6.9098e-04 eta 2:44:08
epoch [36/50] batch [200/796] time 0.819 (0.837) data 0.000 (0.003) loss 3.1995 (0.7965) lr 6.9098e-04 eta 2:43:47
epoch [36/50] batch [220/796] time 0.830 (0.837) data 0.000 (0.003) loss 0.2445 (0.7900) lr 6.9098e-04 eta 2:43:26
epoch [36/50] batch [240/796] time 0.839 (0.836) data 0.000 (0.003) loss 0.4414 (0.7710) lr 6.9098e-04 eta 2:43:05
epoch [36/50] batch [260/796] time 0.838 (0.836) data 0.000 (0.003) loss 0.4598 (0.7888) lr 6.9098e-04 eta 2:42:43
epoch [36/50] batch [280/796] time 0.840 (0.835) data 0.000 (0.002) loss 0.1188 (0.7884) lr 6.9098e-04 eta 2:42:20
epoch [36/50] batch [300/796] time 0.831 (0.835) data 0.000 (0.002) loss 0.8655 (0.7907) lr 6.9098e-04 eta 2:41:59
epoch [36/50] batch [320/796] time 0.818 (0.835) data 0.000 (0.002) loss 1.4012 (0.7919) lr 6.9098e-04 eta 2:41:37
epoch [36/50] batch [340/796] time 0.839 (0.834) data 0.000 (0.002) loss 1.0871 (0.7922) lr 6.9098e-04 eta 2:41:14
epoch [36/50] batch [360/796] time 0.818 (0.834) data 0.000 (0.002) loss 0.0591 (0.7858) lr 6.9098e-04 eta 2:40:54
epoch [36/50] batch [380/796] time 0.840 (0.834) data 0.000 (0.002) loss 0.9174 (0.7892) lr 6.9098e-04 eta 2:40:35
epoch [36/50] batch [400/796] time 0.838 (0.833) data 0.000 (0.002) loss 1.5251 (0.7881) lr 6.9098e-04 eta 2:40:16
epoch [36/50] batch [420/796] time 0.829 (0.833) data 0.000 (0.002) loss 0.1871 (0.7872) lr 6.9098e-04 eta 2:39:58
epoch [36/50] batch [440/796] time 0.817 (0.833) data 0.000 (0.002) loss 0.3138 (0.7980) lr 6.9098e-04 eta 2:39:39
epoch [36/50] batch [460/796] time 0.818 (0.833) data 0.000 (0.002) loss 1.1440 (0.8009) lr 6.9098e-04 eta 2:39:22
epoch [36/50] batch [480/796] time 0.831 (0.833) data 0.000 (0.002) loss 0.7193 (0.7985) lr 6.9098e-04 eta 2:39:05
epoch [36/50] batch [500/796] time 0.830 (0.833) data 0.000 (0.001) loss 1.5737 (0.7981) lr 6.9098e-04 eta 2:38:47
epoch [36/50] batch [520/796] time 0.809 (0.833) data 0.000 (0.001) loss 0.3634 (0.7856) lr 6.9098e-04 eta 2:38:28
epoch [36/50] batch [540/796] time 0.837 (0.833) data 0.000 (0.001) loss 0.1313 (0.7903) lr 6.9098e-04 eta 2:38:11
epoch [36/50] batch [560/796] time 0.830 (0.832) data 0.000 (0.001) loss 0.0715 (0.7909) lr 6.9098e-04 eta 2:37:52
epoch [36/50] batch [580/796] time 0.819 (0.832) data 0.000 (0.001) loss 0.6533 (0.8023) lr 6.9098e-04 eta 2:37:34
epoch [36/50] batch [600/796] time 0.839 (0.832) data 0.001 (0.001) loss 1.1360 (0.8054) lr 6.9098e-04 eta 2:37:17
epoch [36/50] batch [620/796] time 0.830 (0.832) data 0.000 (0.001) loss 0.4039 (0.8034) lr 6.9098e-04 eta 2:36:58
epoch [36/50] batch [640/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.3786 (0.7974) lr 6.9098e-04 eta 2:36:41
epoch [36/50] batch [660/796] time 0.839 (0.832) data 0.000 (0.001) loss 1.5285 (0.7989) lr 6.9098e-04 eta 2:36:24
epoch [36/50] batch [680/796] time 0.818 (0.832) data 0.000 (0.001) loss 1.3271 (0.8001) lr 6.9098e-04 eta 2:36:06
epoch [36/50] batch [700/796] time 0.837 (0.832) data 0.000 (0.001) loss 0.3427 (0.7989) lr 6.9098e-04 eta 2:35:48
epoch [36/50] batch [720/796] time 0.830 (0.832) data 0.000 (0.001) loss 1.2273 (0.8019) lr 6.9098e-04 eta 2:35:30
epoch [36/50] batch [740/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.2971 (0.7951) lr 6.9098e-04 eta 2:35:14
epoch [36/50] batch [760/796] time 0.838 (0.832) data 0.000 (0.001) loss 1.2081 (0.7978) lr 6.9098e-04 eta 2:34:56
epoch [36/50] batch [780/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.2608 (0.7987) lr 6.9098e-04 eta 2:34:39
epoch [37/50] batch [20/796] time 0.841 (0.868) data 0.000 (0.037) loss 0.8332 (0.8488) lr 6.3188e-04 eta 2:40:56
epoch [37/50] batch [40/796] time 0.818 (0.848) data 0.000 (0.018) loss 0.8809 (0.7634) lr 6.3188e-04 eta 2:36:56
epoch [37/50] batch [60/796] time 0.819 (0.842) data 0.000 (0.012) loss 1.2264 (0.7832) lr 6.3188e-04 eta 2:35:32
epoch [37/50] batch [80/796] time 0.820 (0.839) data 0.000 (0.009) loss 0.6084 (0.8219) lr 6.3188e-04 eta 2:34:46
epoch [37/50] batch [100/796] time 0.823 (0.838) data 0.000 (0.007) loss 1.4943 (0.8136) lr 6.3188e-04 eta 2:34:14
epoch [37/50] batch [120/796] time 0.840 (0.837) data 0.000 (0.006) loss 0.0429 (0.7750) lr 6.3188e-04 eta 2:33:45
epoch [37/50] batch [140/796] time 0.811 (0.836) data 0.000 (0.005) loss 1.2694 (0.8054) lr 6.3188e-04 eta 2:33:19
epoch [37/50] batch [160/796] time 0.819 (0.835) data 0.000 (0.005) loss 1.2772 (0.7891) lr 6.3188e-04 eta 2:32:56
epoch [37/50] batch [180/796] time 0.841 (0.835) data 0.000 (0.004) loss 0.3502 (0.8038) lr 6.3188e-04 eta 2:32:36
epoch [37/50] batch [200/796] time 0.840 (0.835) data 0.000 (0.004) loss 1.3754 (0.8107) lr 6.3188e-04 eta 2:32:17
epoch [37/50] batch [220/796] time 0.819 (0.835) data 0.000 (0.004) loss 0.4144 (0.8212) lr 6.3188e-04 eta 2:31:56
epoch [37/50] batch [240/796] time 0.844 (0.834) data 0.000 (0.003) loss 1.2315 (0.8188) lr 6.3188e-04 eta 2:31:36
epoch [37/50] batch [260/796] time 0.820 (0.834) data 0.000 (0.003) loss 0.1685 (0.8042) lr 6.3188e-04 eta 2:31:18
epoch [37/50] batch [280/796] time 0.819 (0.834) data 0.000 (0.003) loss 0.8631 (0.8025) lr 6.3188e-04 eta 2:31:00
epoch [37/50] batch [300/796] time 0.834 (0.834) data 0.000 (0.003) loss 0.6948 (0.8090) lr 6.3188e-04 eta 2:30:43
epoch [37/50] batch [320/796] time 0.843 (0.834) data 0.000 (0.002) loss 1.4374 (0.8169) lr 6.3188e-04 eta 2:30:26
epoch [37/50] batch [340/796] time 0.835 (0.834) data 0.000 (0.002) loss 0.1749 (0.8170) lr 6.3188e-04 eta 2:30:08
epoch [37/50] batch [360/796] time 0.833 (0.834) data 0.000 (0.002) loss 1.4706 (0.8154) lr 6.3188e-04 eta 2:29:50
epoch [37/50] batch [380/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.0852 (0.8222) lr 6.3188e-04 eta 2:29:34
epoch [37/50] batch [400/796] time 0.820 (0.834) data 0.000 (0.002) loss 1.0831 (0.8177) lr 6.3188e-04 eta 2:29:16
epoch [37/50] batch [420/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.5025 (0.8081) lr 6.3188e-04 eta 2:28:59
epoch [37/50] batch [440/796] time 0.839 (0.834) data 0.000 (0.002) loss 1.9089 (0.8151) lr 6.3188e-04 eta 2:28:44
epoch [37/50] batch [460/796] time 0.835 (0.834) data 0.000 (0.002) loss 0.5148 (0.8131) lr 6.3188e-04 eta 2:28:26
epoch [37/50] batch [480/796] time 0.819 (0.834) data 0.000 (0.002) loss 0.4335 (0.8050) lr 6.3188e-04 eta 2:28:08
epoch [37/50] batch [500/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.1866 (0.8072) lr 6.3188e-04 eta 2:27:52
epoch [37/50] batch [520/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.3857 (0.8031) lr 6.3188e-04 eta 2:27:35
epoch [37/50] batch [540/796] time 0.843 (0.833) data 0.000 (0.002) loss 1.1573 (0.7973) lr 6.3188e-04 eta 2:27:18
epoch [37/50] batch [560/796] time 0.843 (0.833) data 0.000 (0.001) loss 0.0700 (0.8036) lr 6.3188e-04 eta 2:27:01
epoch [37/50] batch [580/796] time 0.811 (0.834) data 0.000 (0.001) loss 0.5966 (0.8135) lr 6.3188e-04 eta 2:26:45
epoch [37/50] batch [600/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.7406 (0.8110) lr 6.3188e-04 eta 2:26:28
epoch [37/50] batch [620/796] time 0.833 (0.833) data 0.000 (0.001) loss 0.3547 (0.8067) lr 6.3188e-04 eta 2:26:11
epoch [37/50] batch [640/796] time 0.834 (0.833) data 0.000 (0.001) loss 0.1821 (0.8029) lr 6.3188e-04 eta 2:25:54
epoch [37/50] batch [660/796] time 0.843 (0.833) data 0.000 (0.001) loss 0.6077 (0.8083) lr 6.3188e-04 eta 2:25:37
epoch [37/50] batch [680/796] time 0.819 (0.833) data 0.000 (0.001) loss 0.4552 (0.8120) lr 6.3188e-04 eta 2:25:20
epoch [37/50] batch [700/796] time 0.834 (0.833) data 0.000 (0.001) loss 0.1127 (0.8117) lr 6.3188e-04 eta 2:25:04
epoch [37/50] batch [720/796] time 0.820 (0.833) data 0.000 (0.001) loss 1.2122 (0.8156) lr 6.3188e-04 eta 2:24:46
epoch [37/50] batch [740/796] time 0.834 (0.833) data 0.000 (0.001) loss 0.8269 (0.8150) lr 6.3188e-04 eta 2:24:30
epoch [37/50] batch [760/796] time 0.844 (0.833) data 0.000 (0.001) loss 0.9169 (0.8130) lr 6.3188e-04 eta 2:24:13
epoch [37/50] batch [780/796] time 0.845 (0.833) data 0.000 (0.001) loss 0.1071 (0.8164) lr 6.3188e-04 eta 2:23:57
epoch [38/50] batch [20/796] time 0.843 (0.860) data 0.000 (0.027) loss 0.6516 (0.6787) lr 5.7422e-04 eta 2:28:03
epoch [38/50] batch [40/796] time 0.842 (0.846) data 0.000 (0.014) loss 1.8292 (0.7983) lr 5.7422e-04 eta 2:25:23
epoch [38/50] batch [60/796] time 0.832 (0.842) data 0.000 (0.009) loss 1.3288 (0.7997) lr 5.7422e-04 eta 2:24:18
epoch [38/50] batch [80/796] time 0.843 (0.839) data 0.000 (0.007) loss 0.0344 (0.7970) lr 5.7422e-04 eta 2:23:37
epoch [38/50] batch [100/796] time 0.820 (0.838) data 0.000 (0.006) loss 1.3124 (0.8324) lr 5.7422e-04 eta 2:23:12
epoch [38/50] batch [120/796] time 0.833 (0.837) data 0.000 (0.005) loss 0.1741 (0.8055) lr 5.7422e-04 eta 2:22:43
epoch [38/50] batch [140/796] time 0.820 (0.837) data 0.000 (0.004) loss 0.2990 (0.8305) lr 5.7422e-04 eta 2:22:20
epoch [38/50] batch [160/796] time 0.820 (0.836) data 0.000 (0.004) loss 0.6679 (0.8216) lr 5.7422e-04 eta 2:22:00
epoch [38/50] batch [180/796] time 0.843 (0.837) data 0.000 (0.003) loss 0.6103 (0.8344) lr 5.7422e-04 eta 2:21:45
epoch [38/50] batch [200/796] time 0.834 (0.836) data 0.000 (0.003) loss 0.1936 (0.8359) lr 5.7422e-04 eta 2:21:24
epoch [38/50] batch [220/796] time 0.834 (0.836) data 0.000 (0.003) loss 0.6990 (0.8293) lr 5.7422e-04 eta 2:21:03
epoch [38/50] batch [240/796] time 0.819 (0.836) data 0.000 (0.002) loss 1.2836 (0.8374) lr 5.7422e-04 eta 2:20:47
epoch [38/50] batch [260/796] time 0.844 (0.836) data 0.000 (0.002) loss 0.5117 (0.8349) lr 5.7422e-04 eta 2:20:29
epoch [38/50] batch [280/796] time 0.836 (0.836) data 0.000 (0.002) loss 0.7936 (0.8483) lr 5.7422e-04 eta 2:20:12
epoch [38/50] batch [300/796] time 0.834 (0.835) data 0.000 (0.002) loss 0.6939 (0.8323) lr 5.7422e-04 eta 2:19:52
epoch [38/50] batch [320/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.0346 (0.8377) lr 5.7422e-04 eta 2:19:35
epoch [38/50] batch [340/796] time 0.819 (0.835) data 0.000 (0.002) loss 0.8985 (0.8346) lr 5.7422e-04 eta 2:19:16
epoch [38/50] batch [360/796] time 0.835 (0.835) data 0.000 (0.002) loss 0.1530 (0.8382) lr 5.7422e-04 eta 2:18:57
epoch [38/50] batch [380/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.0190 (0.8371) lr 5.7422e-04 eta 2:18:39
epoch [38/50] batch [400/796] time 0.841 (0.835) data 0.000 (0.002) loss 1.6879 (0.8299) lr 5.7422e-04 eta 2:18:22
epoch [38/50] batch [420/796] time 0.839 (0.834) data 0.000 (0.001) loss 1.3559 (0.8242) lr 5.7422e-04 eta 2:18:03
epoch [38/50] batch [440/796] time 0.839 (0.834) data 0.000 (0.001) loss 1.4500 (0.8269) lr 5.7422e-04 eta 2:17:46
epoch [38/50] batch [460/796] time 0.838 (0.834) data 0.000 (0.001) loss 0.3302 (0.8223) lr 5.7422e-04 eta 2:17:28
epoch [38/50] batch [480/796] time 0.838 (0.834) data 0.000 (0.001) loss 0.7575 (0.8172) lr 5.7422e-04 eta 2:17:10
epoch [38/50] batch [500/796] time 0.837 (0.834) data 0.000 (0.001) loss 0.4088 (0.8125) lr 5.7422e-04 eta 2:16:52
epoch [38/50] batch [520/796] time 0.817 (0.834) data 0.000 (0.001) loss 0.8478 (0.8168) lr 5.7422e-04 eta 2:16:35
epoch [38/50] batch [540/796] time 0.817 (0.834) data 0.000 (0.001) loss 0.6621 (0.8161) lr 5.7422e-04 eta 2:16:16
epoch [38/50] batch [560/796] time 0.838 (0.834) data 0.000 (0.001) loss 0.3628 (0.8089) lr 5.7422e-04 eta 2:15:58
epoch [38/50] batch [580/796] time 0.839 (0.833) data 0.000 (0.001) loss 0.0993 (0.8090) lr 5.7422e-04 eta 2:15:41
epoch [38/50] batch [600/796] time 0.838 (0.833) data 0.000 (0.001) loss 0.8980 (0.8102) lr 5.7422e-04 eta 2:15:23
epoch [38/50] batch [620/796] time 0.810 (0.833) data 0.000 (0.001) loss 0.4555 (0.8107) lr 5.7422e-04 eta 2:15:06
epoch [38/50] batch [640/796] time 0.839 (0.833) data 0.000 (0.001) loss 3.0642 (0.8116) lr 5.7422e-04 eta 2:14:48
epoch [38/50] batch [660/796] time 0.837 (0.833) data 0.000 (0.001) loss 1.7228 (0.8065) lr 5.7422e-04 eta 2:14:31
epoch [38/50] batch [680/796] time 0.818 (0.833) data 0.000 (0.001) loss 0.0968 (0.8081) lr 5.7422e-04 eta 2:14:13
epoch [38/50] batch [700/796] time 0.829 (0.833) data 0.000 (0.001) loss 0.0629 (0.8055) lr 5.7422e-04 eta 2:13:56
epoch [38/50] batch [720/796] time 0.818 (0.833) data 0.000 (0.001) loss 0.4556 (0.8100) lr 5.7422e-04 eta 2:13:38
epoch [38/50] batch [740/796] time 0.830 (0.833) data 0.000 (0.001) loss 0.4137 (0.8043) lr 5.7422e-04 eta 2:13:21
epoch [38/50] batch [760/796] time 0.839 (0.833) data 0.000 (0.001) loss 1.1054 (0.8005) lr 5.7422e-04 eta 2:13:03
epoch [38/50] batch [780/796] time 0.838 (0.833) data 0.000 (0.001) loss 0.1318 (0.7980) lr 5.7422e-04 eta 2:12:46
epoch [39/50] batch [20/796] time 0.839 (0.862) data 0.000 (0.031) loss 0.5095 (0.6978) lr 5.1825e-04 eta 2:16:58
epoch [39/50] batch [40/796] time 0.820 (0.846) data 0.000 (0.016) loss 0.5738 (0.7508) lr 5.1825e-04 eta 2:14:02
epoch [39/50] batch [60/796] time 0.834 (0.841) data 0.000 (0.010) loss 1.0538 (0.8724) lr 5.1825e-04 eta 2:13:04
epoch [39/50] batch [80/796] time 0.821 (0.839) data 0.000 (0.008) loss 0.0466 (0.8589) lr 5.1825e-04 eta 2:12:25
epoch [39/50] batch [100/796] time 0.834 (0.838) data 0.000 (0.006) loss 0.3605 (0.8581) lr 5.1825e-04 eta 2:12:01
epoch [39/50] batch [120/796] time 0.820 (0.837) data 0.000 (0.005) loss 1.2782 (0.8758) lr 5.1825e-04 eta 2:11:37
epoch [39/50] batch [140/796] time 0.846 (0.837) data 0.000 (0.005) loss 1.0708 (0.8428) lr 5.1825e-04 eta 2:11:19
epoch [39/50] batch [160/796] time 0.842 (0.836) data 0.000 (0.004) loss 0.1090 (0.8501) lr 5.1825e-04 eta 2:10:55
epoch [39/50] batch [180/796] time 0.835 (0.836) data 0.000 (0.004) loss 0.4323 (0.8573) lr 5.1825e-04 eta 2:10:33
epoch [39/50] batch [200/796] time 0.834 (0.836) data 0.000 (0.003) loss 1.4055 (0.8800) lr 5.1825e-04 eta 2:10:16
epoch [39/50] batch [220/796] time 0.843 (0.836) data 0.000 (0.003) loss 0.2095 (0.8779) lr 5.1825e-04 eta 2:09:56
epoch [39/50] batch [240/796] time 0.834 (0.835) data 0.000 (0.003) loss 0.7622 (0.8802) lr 5.1825e-04 eta 2:09:39
epoch [39/50] batch [260/796] time 0.843 (0.835) data 0.000 (0.003) loss 2.5583 (0.8834) lr 5.1825e-04 eta 2:09:22
epoch [39/50] batch [280/796] time 0.820 (0.835) data 0.000 (0.002) loss 0.2792 (0.8769) lr 5.1825e-04 eta 2:09:02
epoch [39/50] batch [300/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.0666 (0.8739) lr 5.1825e-04 eta 2:08:47
epoch [39/50] batch [320/796] time 0.846 (0.835) data 0.000 (0.002) loss 0.6453 (0.8708) lr 5.1825e-04 eta 2:08:30
epoch [39/50] batch [340/796] time 0.820 (0.835) data 0.000 (0.002) loss 0.1293 (0.8682) lr 5.1825e-04 eta 2:08:13
epoch [39/50] batch [360/796] time 0.811 (0.835) data 0.000 (0.002) loss 0.6349 (0.8618) lr 5.1825e-04 eta 2:07:57
epoch [39/50] batch [380/796] time 0.820 (0.835) data 0.000 (0.002) loss 0.7230 (0.8564) lr 5.1825e-04 eta 2:07:40
epoch [39/50] batch [400/796] time 0.840 (0.835) data 0.000 (0.002) loss 1.6020 (0.8622) lr 5.1825e-04 eta 2:07:23
epoch [39/50] batch [420/796] time 0.820 (0.835) data 0.000 (0.002) loss 0.0339 (0.8656) lr 5.1825e-04 eta 2:07:05
epoch [39/50] batch [440/796] time 0.835 (0.835) data 0.000 (0.002) loss 0.7846 (0.8606) lr 5.1825e-04 eta 2:06:48
epoch [39/50] batch [460/796] time 0.843 (0.835) data 0.000 (0.002) loss 1.1141 (0.8696) lr 5.1825e-04 eta 2:06:31
epoch [39/50] batch [480/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.2077 (0.8647) lr 5.1825e-04 eta 2:06:14
epoch [39/50] batch [500/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.6955 (0.8568) lr 5.1825e-04 eta 2:05:56
epoch [39/50] batch [520/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.3764 (0.8474) lr 5.1825e-04 eta 2:05:41
epoch [39/50] batch [540/796] time 0.847 (0.835) data 0.000 (0.001) loss 1.4231 (0.8521) lr 5.1825e-04 eta 2:05:25
epoch [39/50] batch [560/796] time 0.822 (0.835) data 0.000 (0.001) loss 0.1335 (0.8502) lr 5.1825e-04 eta 2:05:08
epoch [39/50] batch [580/796] time 0.836 (0.835) data 0.000 (0.001) loss 0.3986 (0.8402) lr 5.1825e-04 eta 2:04:50
epoch [39/50] batch [600/796] time 0.844 (0.835) data 0.001 (0.001) loss 1.8136 (0.8411) lr 5.1825e-04 eta 2:04:33
epoch [39/50] batch [620/796] time 0.813 (0.835) data 0.000 (0.001) loss 0.8045 (0.8347) lr 5.1825e-04 eta 2:04:16
epoch [39/50] batch [640/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.0759 (0.8294) lr 5.1825e-04 eta 2:03:59
epoch [39/50] batch [660/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.6775 (0.8289) lr 5.1825e-04 eta 2:03:43
epoch [39/50] batch [680/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.3109 (0.8257) lr 5.1825e-04 eta 2:03:25
epoch [39/50] batch [700/796] time 0.819 (0.835) data 0.000 (0.001) loss 0.5130 (0.8258) lr 5.1825e-04 eta 2:03:08
epoch [39/50] batch [720/796] time 0.820 (0.835) data 0.000 (0.001) loss 1.8491 (0.8374) lr 5.1825e-04 eta 2:02:50
epoch [39/50] batch [740/796] time 0.849 (0.835) data 0.000 (0.001) loss 1.4178 (0.8388) lr 5.1825e-04 eta 2:02:33
epoch [39/50] batch [760/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.9446 (0.8376) lr 5.1825e-04 eta 2:02:17
epoch [39/50] batch [780/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.4416 (0.8367) lr 5.1825e-04 eta 2:02:01
epoch [40/50] batch [20/796] time 0.823 (0.861) data 0.000 (0.027) loss 0.2211 (0.7722) lr 4.6417e-04 eta 2:05:25
epoch [40/50] batch [40/796] time 0.845 (0.847) data 0.000 (0.014) loss 0.4475 (0.7658) lr 4.6417e-04 eta 2:03:02
epoch [40/50] batch [60/796] time 0.834 (0.842) data 0.000 (0.009) loss 0.2339 (0.7416) lr 4.6417e-04 eta 2:02:04
epoch [40/50] batch [80/796] time 0.811 (0.840) data 0.000 (0.007) loss 2.3587 (0.7508) lr 4.6417e-04 eta 2:01:29
epoch [40/50] batch [100/796] time 0.839 (0.839) data 0.000 (0.006) loss 0.2247 (0.7544) lr 4.6417e-04 eta 2:01:05
epoch [40/50] batch [120/796] time 0.839 (0.838) data 0.000 (0.005) loss 1.8336 (0.7566) lr 4.6417e-04 eta 2:00:36
epoch [40/50] batch [140/796] time 0.818 (0.837) data 0.000 (0.004) loss 1.2155 (0.7702) lr 4.6417e-04 eta 2:00:11
epoch [40/50] batch [160/796] time 0.842 (0.837) data 0.000 (0.004) loss 1.2160 (0.7832) lr 4.6417e-04 eta 1:59:50
epoch [40/50] batch [180/796] time 0.829 (0.836) data 0.000 (0.003) loss 0.2312 (0.7917) lr 4.6417e-04 eta 1:59:27
epoch [40/50] batch [200/796] time 0.818 (0.836) data 0.000 (0.003) loss 0.9855 (0.7853) lr 4.6417e-04 eta 1:59:09
epoch [40/50] batch [220/796] time 0.837 (0.835) data 0.000 (0.003) loss 0.6365 (0.7737) lr 4.6417e-04 eta 1:58:47
epoch [40/50] batch [240/796] time 0.818 (0.835) data 0.000 (0.002) loss 0.3563 (0.7636) lr 4.6417e-04 eta 1:58:27
epoch [40/50] batch [260/796] time 0.840 (0.834) data 0.000 (0.002) loss 0.9856 (0.7829) lr 4.6417e-04 eta 1:58:08
epoch [40/50] batch [280/796] time 0.831 (0.834) data 0.000 (0.002) loss 1.0586 (0.7984) lr 4.6417e-04 eta 1:57:48
epoch [40/50] batch [300/796] time 0.839 (0.834) data 0.000 (0.002) loss 1.8206 (0.7959) lr 4.6417e-04 eta 1:57:29
epoch [40/50] batch [320/796] time 0.833 (0.834) data 0.000 (0.002) loss 2.1466 (0.7980) lr 4.6417e-04 eta 1:57:12
epoch [40/50] batch [340/796] time 0.844 (0.834) data 0.000 (0.002) loss 1.6988 (0.7925) lr 4.6417e-04 eta 1:56:55
epoch [40/50] batch [360/796] time 0.839 (0.833) data 0.000 (0.002) loss 0.1424 (0.7814) lr 4.6417e-04 eta 1:56:37
epoch [40/50] batch [380/796] time 0.840 (0.833) data 0.000 (0.002) loss 0.0041 (0.7888) lr 4.6417e-04 eta 1:56:20
epoch [40/50] batch [400/796] time 0.841 (0.833) data 0.000 (0.002) loss 0.6312 (0.7871) lr 4.6417e-04 eta 1:56:03
epoch [40/50] batch [420/796] time 0.825 (0.833) data 0.000 (0.001) loss 0.1256 (0.7892) lr 4.6417e-04 eta 1:55:46
epoch [40/50] batch [440/796] time 0.844 (0.833) data 0.000 (0.001) loss 0.4693 (0.7937) lr 4.6417e-04 eta 1:55:31
epoch [40/50] batch [460/796] time 0.834 (0.833) data 0.000 (0.001) loss 0.3186 (0.7907) lr 4.6417e-04 eta 1:55:14
epoch [40/50] batch [480/796] time 0.831 (0.833) data 0.000 (0.001) loss 0.0990 (0.7833) lr 4.6417e-04 eta 1:54:57
epoch [40/50] batch [500/796] time 0.843 (0.833) data 0.000 (0.001) loss 0.2483 (0.7761) lr 4.6417e-04 eta 1:54:41
epoch [40/50] batch [520/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.2801 (0.7807) lr 4.6417e-04 eta 1:54:25
epoch [40/50] batch [540/796] time 0.818 (0.833) data 0.000 (0.001) loss 0.4954 (0.7857) lr 4.6417e-04 eta 1:54:07
epoch [40/50] batch [560/796] time 0.833 (0.833) data 0.000 (0.001) loss 0.8266 (0.7867) lr 4.6417e-04 eta 1:53:50
epoch [40/50] batch [580/796] time 0.839 (0.833) data 0.000 (0.001) loss 0.8231 (0.7813) lr 4.6417e-04 eta 1:53:33
epoch [40/50] batch [600/796] time 0.840 (0.833) data 0.000 (0.001) loss 0.1010 (0.7924) lr 4.6417e-04 eta 1:53:16
epoch [40/50] batch [620/796] time 0.840 (0.833) data 0.000 (0.001) loss 0.4618 (0.7838) lr 4.6417e-04 eta 1:52:59
epoch [40/50] batch [640/796] time 0.832 (0.833) data 0.000 (0.001) loss 0.0752 (0.7808) lr 4.6417e-04 eta 1:52:42
epoch [40/50] batch [660/796] time 0.848 (0.833) data 0.000 (0.001) loss 0.3393 (0.7805) lr 4.6417e-04 eta 1:52:25
epoch [40/50] batch [680/796] time 0.830 (0.833) data 0.000 (0.001) loss 1.7312 (0.7836) lr 4.6417e-04 eta 1:52:09
epoch [40/50] batch [700/796] time 0.830 (0.833) data 0.000 (0.001) loss 0.3373 (0.7840) lr 4.6417e-04 eta 1:51:52
epoch [40/50] batch [720/796] time 0.820 (0.833) data 0.000 (0.001) loss 1.2886 (0.7865) lr 4.6417e-04 eta 1:51:36
epoch [40/50] batch [740/796] time 0.832 (0.833) data 0.000 (0.001) loss 1.1943 (0.7880) lr 4.6417e-04 eta 1:51:19
epoch [40/50] batch [760/796] time 0.836 (0.833) data 0.000 (0.001) loss 0.6409 (0.7919) lr 4.6417e-04 eta 1:51:02
epoch [40/50] batch [780/796] time 0.823 (0.833) data 0.000 (0.001) loss 0.3913 (0.7888) lr 4.6417e-04 eta 1:50:45
epoch [41/50] batch [20/796] time 0.818 (0.860) data 0.000 (0.029) loss 1.0082 (0.7984) lr 4.1221e-04 eta 1:53:46
epoch [41/50] batch [40/796] time 0.839 (0.846) data 0.000 (0.014) loss 1.7252 (0.7915) lr 4.1221e-04 eta 1:51:38
epoch [41/50] batch [60/796] time 0.840 (0.841) data 0.000 (0.010) loss 0.1768 (0.6907) lr 4.1221e-04 eta 1:50:45
epoch [41/50] batch [80/796] time 0.839 (0.839) data 0.000 (0.007) loss 2.7098 (0.7458) lr 4.1221e-04 eta 1:50:07
epoch [41/50] batch [100/796] time 0.839 (0.837) data 0.000 (0.006) loss 0.8982 (0.7318) lr 4.1221e-04 eta 1:49:36
epoch [41/50] batch [120/796] time 0.818 (0.836) data 0.000 (0.005) loss 1.1404 (0.7917) lr 4.1221e-04 eta 1:49:11
epoch [41/50] batch [140/796] time 0.817 (0.834) data 0.000 (0.004) loss 0.4871 (0.7702) lr 4.1221e-04 eta 1:48:45
epoch [41/50] batch [160/796] time 0.838 (0.834) data 0.000 (0.004) loss 0.0492 (0.7445) lr 4.1221e-04 eta 1:48:23
epoch [41/50] batch [180/796] time 0.829 (0.834) data 0.000 (0.003) loss 0.4107 (0.7563) lr 4.1221e-04 eta 1:48:05
epoch [41/50] batch [200/796] time 0.818 (0.833) data 0.000 (0.003) loss 0.3852 (0.7523) lr 4.1221e-04 eta 1:47:46
epoch [41/50] batch [220/796] time 0.830 (0.833) data 0.000 (0.003) loss 0.8509 (0.7605) lr 4.1221e-04 eta 1:47:27
epoch [41/50] batch [240/796] time 0.818 (0.833) data 0.000 (0.003) loss 0.0832 (0.7640) lr 4.1221e-04 eta 1:47:08
epoch [41/50] batch [260/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.9923 (0.7594) lr 4.1221e-04 eta 1:46:48
epoch [41/50] batch [280/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.2337 (0.7549) lr 4.1221e-04 eta 1:46:32
epoch [41/50] batch [300/796] time 0.840 (0.832) data 0.000 (0.002) loss 0.4574 (0.7537) lr 4.1221e-04 eta 1:46:15
epoch [41/50] batch [320/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.0775 (0.7487) lr 4.1221e-04 eta 1:45:58
epoch [41/50] batch [340/796] time 0.839 (0.832) data 0.000 (0.002) loss 2.2700 (0.7592) lr 4.1221e-04 eta 1:45:40
epoch [41/50] batch [360/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.5194 (0.7563) lr 4.1221e-04 eta 1:45:23
epoch [41/50] batch [380/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.8088 (0.7544) lr 4.1221e-04 eta 1:45:06
epoch [41/50] batch [400/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.1390 (0.7484) lr 4.1221e-04 eta 1:44:47
epoch [41/50] batch [420/796] time 0.832 (0.832) data 0.000 (0.002) loss 0.1110 (0.7469) lr 4.1221e-04 eta 1:44:30
epoch [41/50] batch [440/796] time 0.829 (0.832) data 0.000 (0.001) loss 1.2634 (0.7497) lr 4.1221e-04 eta 1:44:14
epoch [41/50] batch [460/796] time 0.808 (0.832) data 0.000 (0.001) loss 0.8290 (0.7558) lr 4.1221e-04 eta 1:43:56
epoch [41/50] batch [480/796] time 0.838 (0.832) data 0.000 (0.001) loss 1.8602 (0.7548) lr 4.1221e-04 eta 1:43:39
epoch [41/50] batch [500/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.1977 (0.7545) lr 4.1221e-04 eta 1:43:22
epoch [41/50] batch [520/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.7765 (0.7627) lr 4.1221e-04 eta 1:43:05
epoch [41/50] batch [540/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.1046 (0.7540) lr 4.1221e-04 eta 1:42:48
epoch [41/50] batch [560/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.2774 (0.7604) lr 4.1221e-04 eta 1:42:31
epoch [41/50] batch [580/796] time 0.833 (0.831) data 0.000 (0.001) loss 0.1428 (0.7586) lr 4.1221e-04 eta 1:42:14
epoch [41/50] batch [600/796] time 0.821 (0.831) data 0.000 (0.001) loss 1.3649 (0.7686) lr 4.1221e-04 eta 1:41:57
epoch [41/50] batch [620/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.5816 (0.7686) lr 4.1221e-04 eta 1:41:40
epoch [41/50] batch [640/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.5440 (0.7701) lr 4.1221e-04 eta 1:41:24
epoch [41/50] batch [660/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.0491 (0.7778) lr 4.1221e-04 eta 1:41:07
epoch [41/50] batch [680/796] time 0.842 (0.831) data 0.000 (0.001) loss 0.2070 (0.7777) lr 4.1221e-04 eta 1:40:50
epoch [41/50] batch [700/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.1724 (0.7772) lr 4.1221e-04 eta 1:40:34
epoch [41/50] batch [720/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.3267 (0.7750) lr 4.1221e-04 eta 1:40:17
epoch [41/50] batch [740/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.5093 (0.7771) lr 4.1221e-04 eta 1:39:59
epoch [41/50] batch [760/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.4892 (0.7727) lr 4.1221e-04 eta 1:39:43
epoch [41/50] batch [780/796] time 0.810 (0.831) data 0.000 (0.001) loss 0.8297 (0.7772) lr 4.1221e-04 eta 1:39:26
epoch [42/50] batch [20/796] time 0.818 (0.861) data 0.000 (0.031) loss 0.7774 (1.1529) lr 3.6258e-04 eta 1:42:28
epoch [42/50] batch [40/796] time 0.809 (0.844) data 0.000 (0.015) loss 0.2264 (0.8880) lr 3.6258e-04 eta 1:40:14
epoch [42/50] batch [60/796] time 0.838 (0.840) data 0.000 (0.010) loss 1.8587 (0.9253) lr 3.6258e-04 eta 1:39:27
epoch [42/50] batch [80/796] time 0.830 (0.838) data 0.000 (0.008) loss 1.3069 (0.8711) lr 3.6258e-04 eta 1:38:53
epoch [42/50] batch [100/796] time 0.817 (0.836) data 0.000 (0.006) loss 0.4260 (0.7838) lr 3.6258e-04 eta 1:38:25
epoch [42/50] batch [120/796] time 0.829 (0.835) data 0.000 (0.005) loss 0.2388 (0.8237) lr 3.6258e-04 eta 1:38:01
epoch [42/50] batch [140/796] time 0.819 (0.835) data 0.000 (0.005) loss 1.5704 (0.8219) lr 3.6258e-04 eta 1:37:42
epoch [42/50] batch [160/796] time 0.821 (0.834) data 0.000 (0.004) loss 0.1261 (0.8177) lr 3.6258e-04 eta 1:37:21
epoch [42/50] batch [180/796] time 0.842 (0.834) data 0.000 (0.004) loss 1.7658 (0.8257) lr 3.6258e-04 eta 1:37:02
epoch [42/50] batch [200/796] time 0.838 (0.833) data 0.000 (0.003) loss 0.2504 (0.8182) lr 3.6258e-04 eta 1:36:43
epoch [42/50] batch [220/796] time 0.831 (0.833) data 0.000 (0.003) loss 0.2865 (0.8104) lr 3.6258e-04 eta 1:36:24
epoch [42/50] batch [240/796] time 0.818 (0.833) data 0.000 (0.003) loss 0.3508 (0.7841) lr 3.6258e-04 eta 1:36:04
epoch [42/50] batch [260/796] time 0.838 (0.832) data 0.000 (0.003) loss 0.0280 (0.7883) lr 3.6258e-04 eta 1:35:45
epoch [42/50] batch [280/796] time 0.840 (0.832) data 0.000 (0.002) loss 0.3034 (0.7886) lr 3.6258e-04 eta 1:35:27
epoch [42/50] batch [300/796] time 0.841 (0.832) data 0.000 (0.002) loss 0.8957 (0.7742) lr 3.6258e-04 eta 1:35:11
epoch [42/50] batch [320/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.4376 (0.7789) lr 3.6258e-04 eta 1:34:55
epoch [42/50] batch [340/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.7347 (0.7934) lr 3.6258e-04 eta 1:34:38
epoch [42/50] batch [360/796] time 0.812 (0.832) data 0.000 (0.002) loss 0.4142 (0.7856) lr 3.6258e-04 eta 1:34:19
epoch [42/50] batch [380/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.6056 (0.7949) lr 3.6258e-04 eta 1:34:02
epoch [42/50] batch [400/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.0722 (0.7831) lr 3.6258e-04 eta 1:33:45
epoch [42/50] batch [420/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.0801 (0.7912) lr 3.6258e-04 eta 1:33:27
epoch [42/50] batch [440/796] time 0.838 (0.831) data 0.000 (0.002) loss 1.2590 (0.7915) lr 3.6258e-04 eta 1:33:10
epoch [42/50] batch [460/796] time 0.818 (0.831) data 0.000 (0.002) loss 0.0603 (0.7944) lr 3.6258e-04 eta 1:32:53
epoch [42/50] batch [480/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.5948 (0.8040) lr 3.6258e-04 eta 1:32:36
epoch [42/50] batch [500/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.2993 (0.8061) lr 3.6258e-04 eta 1:32:19
epoch [42/50] batch [520/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.3668 (0.8011) lr 3.6258e-04 eta 1:32:03
epoch [42/50] batch [540/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.2657 (0.7978) lr 3.6258e-04 eta 1:31:45
epoch [42/50] batch [560/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.3528 (0.7964) lr 3.6258e-04 eta 1:31:29
epoch [42/50] batch [580/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.2936 (0.7902) lr 3.6258e-04 eta 1:31:12
epoch [42/50] batch [600/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.2945 (0.7914) lr 3.6258e-04 eta 1:30:55
epoch [42/50] batch [620/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.2282 (0.7882) lr 3.6258e-04 eta 1:30:38
epoch [42/50] batch [640/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.8157 (0.7889) lr 3.6258e-04 eta 1:30:21
epoch [42/50] batch [660/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.2890 (0.7883) lr 3.6258e-04 eta 1:30:04
epoch [42/50] batch [680/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.6581 (0.7864) lr 3.6258e-04 eta 1:29:47
epoch [42/50] batch [700/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.7506 (0.7875) lr 3.6258e-04 eta 1:29:31
epoch [42/50] batch [720/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.3823 (0.7890) lr 3.6258e-04 eta 1:29:14
epoch [42/50] batch [740/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.8831 (0.7946) lr 3.6258e-04 eta 1:28:57
epoch [42/50] batch [760/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.9060 (0.7990) lr 3.6258e-04 eta 1:28:40
epoch [42/50] batch [780/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.1409 (0.7965) lr 3.6258e-04 eta 1:28:24
epoch [43/50] batch [20/796] time 0.818 (0.861) data 0.000 (0.027) loss 0.0962 (0.6698) lr 3.1545e-04 eta 1:31:06
epoch [43/50] batch [40/796] time 0.840 (0.846) data 0.000 (0.013) loss 1.3190 (0.8542) lr 3.1545e-04 eta 1:29:13
epoch [43/50] batch [60/796] time 0.810 (0.840) data 0.000 (0.009) loss 0.9220 (0.8083) lr 3.1545e-04 eta 1:28:19
epoch [43/50] batch [80/796] time 0.818 (0.838) data 0.000 (0.007) loss 0.6775 (0.7860) lr 3.1545e-04 eta 1:27:48
epoch [43/50] batch [100/796] time 0.817 (0.836) data 0.000 (0.005) loss 0.2785 (0.7456) lr 3.1545e-04 eta 1:27:23
epoch [43/50] batch [120/796] time 0.818 (0.836) data 0.000 (0.005) loss 1.1724 (0.7847) lr 3.1545e-04 eta 1:27:00
epoch [43/50] batch [140/796] time 0.838 (0.835) data 0.000 (0.004) loss 0.5254 (0.7807) lr 3.1545e-04 eta 1:26:37
epoch [43/50] batch [160/796] time 0.829 (0.834) data 0.000 (0.004) loss 0.2482 (0.7955) lr 3.1545e-04 eta 1:26:18
epoch [43/50] batch [180/796] time 0.838 (0.834) data 0.000 (0.003) loss 0.0289 (0.7822) lr 3.1545e-04 eta 1:25:59
epoch [43/50] batch [200/796] time 0.829 (0.833) data 0.000 (0.003) loss 2.1643 (0.7844) lr 3.1545e-04 eta 1:25:40
epoch [43/50] batch [220/796] time 0.808 (0.833) data 0.000 (0.003) loss 0.0460 (0.7689) lr 3.1545e-04 eta 1:25:21
epoch [43/50] batch [240/796] time 0.830 (0.833) data 0.000 (0.002) loss 1.3865 (0.7640) lr 3.1545e-04 eta 1:25:02
epoch [43/50] batch [260/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.7152 (0.7696) lr 3.1545e-04 eta 1:24:43
epoch [43/50] batch [280/796] time 0.809 (0.832) data 0.000 (0.002) loss 0.3723 (0.7676) lr 3.1545e-04 eta 1:24:26
epoch [43/50] batch [300/796] time 0.845 (0.832) data 0.000 (0.002) loss 0.6457 (0.7565) lr 3.1545e-04 eta 1:24:07
epoch [43/50] batch [320/796] time 0.809 (0.832) data 0.000 (0.002) loss 0.4207 (0.7550) lr 3.1545e-04 eta 1:23:50
epoch [43/50] batch [340/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.1981 (0.7653) lr 3.1545e-04 eta 1:23:34
epoch [43/50] batch [360/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.5459 (0.7575) lr 3.1545e-04 eta 1:23:17
epoch [43/50] batch [380/796] time 0.837 (0.832) data 0.000 (0.002) loss 2.1920 (0.7576) lr 3.1545e-04 eta 1:22:59
epoch [43/50] batch [400/796] time 0.832 (0.831) data 0.000 (0.002) loss 0.7744 (0.7572) lr 3.1545e-04 eta 1:22:42
epoch [43/50] batch [420/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.1449 (0.7551) lr 3.1545e-04 eta 1:22:24
epoch [43/50] batch [440/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.8373 (0.7717) lr 3.1545e-04 eta 1:22:08
epoch [43/50] batch [460/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2857 (0.7708) lr 3.1545e-04 eta 1:21:51
epoch [43/50] batch [480/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.0716 (0.7694) lr 3.1545e-04 eta 1:21:35
epoch [43/50] batch [500/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.1330 (0.7618) lr 3.1545e-04 eta 1:21:17
epoch [43/50] batch [520/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.7934 (0.7656) lr 3.1545e-04 eta 1:21:00
epoch [43/50] batch [540/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.2050 (0.7636) lr 3.1545e-04 eta 1:20:44
epoch [43/50] batch [560/796] time 0.839 (0.831) data 0.000 (0.001) loss 2.2207 (0.7610) lr 3.1545e-04 eta 1:20:27
epoch [43/50] batch [580/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.0416 (0.7511) lr 3.1545e-04 eta 1:20:10
epoch [43/50] batch [600/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.5451 (0.7543) lr 3.1545e-04 eta 1:19:53
epoch [43/50] batch [620/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.5163 (0.7525) lr 3.1545e-04 eta 1:19:36
epoch [43/50] batch [640/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.3543 (0.7539) lr 3.1545e-04 eta 1:19:19
epoch [43/50] batch [660/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.1231 (0.7519) lr 3.1545e-04 eta 1:19:03
epoch [43/50] batch [680/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.2669 (0.7467) lr 3.1545e-04 eta 1:18:46
epoch [43/50] batch [700/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.8053 (0.7451) lr 3.1545e-04 eta 1:18:29
epoch [43/50] batch [720/796] time 0.809 (0.831) data 0.000 (0.001) loss 1.1737 (0.7406) lr 3.1545e-04 eta 1:18:12
epoch [43/50] batch [740/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.8746 (0.7453) lr 3.1545e-04 eta 1:17:55
epoch [43/50] batch [760/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.4248 (0.7422) lr 3.1545e-04 eta 1:17:38
epoch [43/50] batch [780/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.1719 (0.7506) lr 3.1545e-04 eta 1:17:22
epoch [44/50] batch [20/796] time 0.838 (0.862) data 0.000 (0.029) loss 0.1609 (0.8389) lr 2.7103e-04 eta 1:19:44
epoch [44/50] batch [40/796] time 0.838 (0.846) data 0.000 (0.015) loss 0.2319 (0.7750) lr 2.7103e-04 eta 1:17:57
epoch [44/50] batch [60/796] time 0.829 (0.841) data 0.000 (0.010) loss 0.1810 (0.7556) lr 2.7103e-04 eta 1:17:13
epoch [44/50] batch [80/796] time 0.832 (0.839) data 0.000 (0.007) loss 1.1236 (0.7207) lr 2.7103e-04 eta 1:16:48
epoch [44/50] batch [100/796] time 0.830 (0.838) data 0.000 (0.006) loss 0.2906 (0.7327) lr 2.7103e-04 eta 1:16:25
epoch [44/50] batch [120/796] time 0.838 (0.837) data 0.000 (0.005) loss 0.5548 (0.7340) lr 2.7103e-04 eta 1:16:00
epoch [44/50] batch [140/796] time 0.830 (0.836) data 0.000 (0.004) loss 0.1107 (0.7609) lr 2.7103e-04 eta 1:15:38
epoch [44/50] batch [160/796] time 0.819 (0.835) data 0.000 (0.004) loss 0.2184 (0.7394) lr 2.7103e-04 eta 1:15:18
epoch [44/50] batch [180/796] time 0.843 (0.834) data 0.000 (0.003) loss 0.4024 (0.7789) lr 2.7103e-04 eta 1:14:59
epoch [44/50] batch [200/796] time 0.830 (0.834) data 0.000 (0.003) loss 2.0572 (0.7699) lr 2.7103e-04 eta 1:14:40
epoch [44/50] batch [220/796] time 0.832 (0.834) data 0.000 (0.003) loss 0.2650 (0.7638) lr 2.7103e-04 eta 1:14:21
epoch [44/50] batch [240/796] time 0.809 (0.834) data 0.000 (0.003) loss 1.5893 (0.7756) lr 2.7103e-04 eta 1:14:04
epoch [44/50] batch [260/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.5606 (0.7793) lr 2.7103e-04 eta 1:13:45
epoch [44/50] batch [280/796] time 0.839 (0.833) data 0.000 (0.002) loss 1.8785 (0.7860) lr 2.7103e-04 eta 1:13:27
epoch [44/50] batch [300/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.8996 (0.7850) lr 2.7103e-04 eta 1:13:08
epoch [44/50] batch [320/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.4947 (0.7690) lr 2.7103e-04 eta 1:12:51
epoch [44/50] batch [340/796] time 0.837 (0.832) data 0.000 (0.002) loss 0.2049 (0.7899) lr 2.7103e-04 eta 1:12:34
epoch [44/50] batch [360/796] time 0.821 (0.832) data 0.000 (0.002) loss 0.5431 (0.7818) lr 2.7103e-04 eta 1:12:17
epoch [44/50] batch [380/796] time 0.808 (0.832) data 0.000 (0.002) loss 1.1446 (0.7865) lr 2.7103e-04 eta 1:12:00
epoch [44/50] batch [400/796] time 0.840 (0.832) data 0.000 (0.002) loss 0.1100 (0.7827) lr 2.7103e-04 eta 1:11:43
epoch [44/50] batch [420/796] time 0.837 (0.832) data 0.000 (0.002) loss 1.4439 (0.7917) lr 2.7103e-04 eta 1:11:26
epoch [44/50] batch [440/796] time 0.817 (0.832) data 0.000 (0.002) loss 1.4793 (0.8135) lr 2.7103e-04 eta 1:11:10
epoch [44/50] batch [460/796] time 0.833 (0.832) data 0.000 (0.001) loss 2.4147 (0.8078) lr 2.7103e-04 eta 1:10:53
epoch [44/50] batch [480/796] time 0.828 (0.832) data 0.000 (0.001) loss 0.5467 (0.8090) lr 2.7103e-04 eta 1:10:36
epoch [44/50] batch [500/796] time 0.819 (0.832) data 0.000 (0.001) loss 0.2752 (0.8094) lr 2.7103e-04 eta 1:10:19
epoch [44/50] batch [520/796] time 0.829 (0.832) data 0.000 (0.001) loss 2.3803 (0.8155) lr 2.7103e-04 eta 1:10:03
epoch [44/50] batch [540/796] time 0.839 (0.832) data 0.000 (0.001) loss 1.6153 (0.8169) lr 2.7103e-04 eta 1:09:45
epoch [44/50] batch [560/796] time 0.842 (0.832) data 0.000 (0.001) loss 1.0440 (0.8162) lr 2.7103e-04 eta 1:09:28
epoch [44/50] batch [580/796] time 0.840 (0.832) data 0.000 (0.001) loss 2.4597 (0.8135) lr 2.7103e-04 eta 1:09:12
epoch [44/50] batch [600/796] time 0.830 (0.832) data 0.000 (0.001) loss 1.2910 (0.8177) lr 2.7103e-04 eta 1:08:55
epoch [44/50] batch [620/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.4613 (0.8204) lr 2.7103e-04 eta 1:08:38
epoch [44/50] batch [640/796] time 0.809 (0.832) data 0.000 (0.001) loss 0.1340 (0.8233) lr 2.7103e-04 eta 1:08:21
epoch [44/50] batch [660/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.3494 (0.8155) lr 2.7103e-04 eta 1:08:04
epoch [44/50] batch [680/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.6294 (0.8105) lr 2.7103e-04 eta 1:07:47
epoch [44/50] batch [700/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.0398 (0.8145) lr 2.7103e-04 eta 1:07:30
epoch [44/50] batch [720/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.3555 (0.8170) lr 2.7103e-04 eta 1:07:13
epoch [44/50] batch [740/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.2790 (0.8111) lr 2.7103e-04 eta 1:06:57
epoch [44/50] batch [760/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.4708 (0.8107) lr 2.7103e-04 eta 1:06:40
epoch [44/50] batch [780/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.0503 (0.8145) lr 2.7103e-04 eta 1:06:23
epoch [45/50] batch [20/796] time 0.818 (0.859) data 0.000 (0.029) loss 0.7440 (0.5130) lr 2.2949e-04 eta 1:08:03
epoch [45/50] batch [40/796] time 0.821 (0.845) data 0.000 (0.014) loss 0.4165 (0.6118) lr 2.2949e-04 eta 1:06:40
epoch [45/50] batch [60/796] time 0.838 (0.840) data 0.000 (0.010) loss 0.9008 (0.6168) lr 2.2949e-04 eta 1:06:00
epoch [45/50] batch [80/796] time 0.839 (0.837) data 0.000 (0.007) loss 0.1772 (0.6235) lr 2.2949e-04 eta 1:05:31
epoch [45/50] batch [100/796] time 0.838 (0.836) data 0.000 (0.006) loss 0.9196 (0.6663) lr 2.2949e-04 eta 1:05:10
epoch [45/50] batch [120/796] time 0.818 (0.836) data 0.000 (0.005) loss 0.2544 (0.6985) lr 2.2949e-04 eta 1:04:50
epoch [45/50] batch [140/796] time 0.818 (0.835) data 0.000 (0.004) loss 2.3034 (0.7115) lr 2.2949e-04 eta 1:04:32
epoch [45/50] batch [160/796] time 0.832 (0.835) data 0.000 (0.004) loss 1.1205 (0.7716) lr 2.2949e-04 eta 1:04:12
epoch [45/50] batch [180/796] time 0.820 (0.834) data 0.000 (0.003) loss 0.5225 (0.7713) lr 2.2949e-04 eta 1:03:53
epoch [45/50] batch [200/796] time 0.810 (0.834) data 0.000 (0.003) loss 0.1390 (0.7916) lr 2.2949e-04 eta 1:03:35
epoch [45/50] batch [220/796] time 0.818 (0.834) data 0.000 (0.003) loss 0.3683 (0.7981) lr 2.2949e-04 eta 1:03:18
epoch [45/50] batch [240/796] time 0.818 (0.834) data 0.000 (0.003) loss 0.1649 (0.8082) lr 2.2949e-04 eta 1:03:01
epoch [45/50] batch [260/796] time 0.840 (0.833) data 0.000 (0.002) loss 0.1469 (0.7961) lr 2.2949e-04 eta 1:02:43
epoch [45/50] batch [280/796] time 0.829 (0.833) data 0.000 (0.002) loss 0.6888 (0.8030) lr 2.2949e-04 eta 1:02:25
epoch [45/50] batch [300/796] time 0.841 (0.833) data 0.000 (0.002) loss 1.3382 (0.8095) lr 2.2949e-04 eta 1:02:07
epoch [45/50] batch [320/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.0532 (0.8200) lr 2.2949e-04 eta 1:01:50
epoch [45/50] batch [340/796] time 0.831 (0.832) data 0.000 (0.002) loss 2.0390 (0.8104) lr 2.2949e-04 eta 1:01:32
epoch [45/50] batch [360/796] time 0.844 (0.832) data 0.000 (0.002) loss 1.1660 (0.8400) lr 2.2949e-04 eta 1:01:15
epoch [45/50] batch [380/796] time 0.821 (0.832) data 0.000 (0.002) loss 0.7883 (0.8341) lr 2.2949e-04 eta 1:00:58
epoch [45/50] batch [400/796] time 0.829 (0.832) data 0.000 (0.002) loss 1.1169 (0.8350) lr 2.2949e-04 eta 1:00:40
epoch [45/50] batch [420/796] time 0.842 (0.832) data 0.000 (0.002) loss 0.0729 (0.8350) lr 2.2949e-04 eta 1:00:24
epoch [45/50] batch [440/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.5877 (0.8191) lr 2.2949e-04 eta 1:00:08
epoch [45/50] batch [460/796] time 0.830 (0.832) data 0.000 (0.001) loss 1.0017 (0.8118) lr 2.2949e-04 eta 0:59:51
epoch [45/50] batch [480/796] time 0.829 (0.832) data 0.000 (0.001) loss 0.0739 (0.8042) lr 2.2949e-04 eta 0:59:34
epoch [45/50] batch [500/796] time 0.847 (0.832) data 0.000 (0.001) loss 0.6345 (0.7983) lr 2.2949e-04 eta 0:59:17
epoch [45/50] batch [520/796] time 0.817 (0.832) data 0.000 (0.001) loss 0.4847 (0.7968) lr 2.2949e-04 eta 0:59:00
epoch [45/50] batch [540/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.0332 (0.7906) lr 2.2949e-04 eta 0:58:43
epoch [45/50] batch [560/796] time 0.830 (0.832) data 0.000 (0.001) loss 0.6659 (0.7964) lr 2.2949e-04 eta 0:58:27
epoch [45/50] batch [580/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.9535 (0.7942) lr 2.2949e-04 eta 0:58:10
epoch [45/50] batch [600/796] time 0.817 (0.832) data 0.000 (0.001) loss 0.4335 (0.7979) lr 2.2949e-04 eta 0:57:53
epoch [45/50] batch [620/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.2850 (0.7928) lr 2.2949e-04 eta 0:57:36
epoch [45/50] batch [640/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.0217 (0.7917) lr 2.2949e-04 eta 0:57:19
epoch [45/50] batch [660/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.8751 (0.7923) lr 2.2949e-04 eta 0:57:02
epoch [45/50] batch [680/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.9445 (0.7874) lr 2.2949e-04 eta 0:56:46
epoch [45/50] batch [700/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.8807 (0.7850) lr 2.2949e-04 eta 0:56:29
epoch [45/50] batch [720/796] time 0.830 (0.832) data 0.000 (0.001) loss 1.1448 (0.7870) lr 2.2949e-04 eta 0:56:12
epoch [45/50] batch [740/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.6456 (0.7909) lr 2.2949e-04 eta 0:55:56
epoch [45/50] batch [760/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.4618 (0.7938) lr 2.2949e-04 eta 0:55:39
epoch [45/50] batch [780/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.9102 (0.7944) lr 2.2949e-04 eta 0:55:22
epoch [46/50] batch [20/796] time 0.819 (0.859) data 0.000 (0.028) loss 0.0368 (1.0393) lr 1.9098e-04 eta 0:56:40
epoch [46/50] batch [40/796] time 0.834 (0.847) data 0.000 (0.014) loss 0.0345 (0.8514) lr 1.9098e-04 eta 0:55:36
epoch [46/50] batch [60/796] time 0.818 (0.840) data 0.000 (0.009) loss 0.5473 (0.8383) lr 1.9098e-04 eta 0:54:54
epoch [46/50] batch [80/796] time 0.818 (0.839) data 0.000 (0.007) loss 0.1640 (0.8033) lr 1.9098e-04 eta 0:54:30
epoch [46/50] batch [100/796] time 0.829 (0.837) data 0.000 (0.006) loss 1.4058 (0.7566) lr 1.9098e-04 eta 0:54:06
epoch [46/50] batch [120/796] time 0.830 (0.836) data 0.000 (0.005) loss 0.7291 (0.8026) lr 1.9098e-04 eta 0:53:47
epoch [46/50] batch [140/796] time 0.838 (0.835) data 0.000 (0.004) loss 0.4472 (0.7782) lr 1.9098e-04 eta 0:53:26
epoch [46/50] batch [160/796] time 0.839 (0.835) data 0.000 (0.004) loss 0.0573 (0.7633) lr 1.9098e-04 eta 0:53:08
epoch [46/50] batch [180/796] time 0.832 (0.834) data 0.000 (0.003) loss 2.3327 (0.7638) lr 1.9098e-04 eta 0:52:50
epoch [46/50] batch [200/796] time 0.818 (0.834) data 0.000 (0.003) loss 1.0147 (0.7546) lr 1.9098e-04 eta 0:52:32
epoch [46/50] batch [220/796] time 1.753 (0.849) data 0.000 (0.003) loss 0.1882 (0.7329) lr 1.9098e-04 eta 0:53:10
epoch [46/50] batch [240/796] time 0.911 (0.857) data 0.000 (0.002) loss 0.7187 (0.7277) lr 1.9098e-04 eta 0:53:24
epoch [46/50] batch [260/796] time 0.835 (0.861) data 0.000 (0.002) loss 0.6179 (0.7460) lr 1.9098e-04 eta 0:53:21
epoch [46/50] batch [280/796] time 0.817 (0.868) data 0.000 (0.002) loss 0.2024 (0.7454) lr 1.9098e-04 eta 0:53:31
epoch [46/50] batch [300/796] time 1.768 (0.874) data 0.001 (0.002) loss 1.0612 (0.7567) lr 1.9098e-04 eta 0:53:36
epoch [46/50] batch [320/796] time 0.938 (0.874) data 0.000 (0.002) loss 1.5520 (0.7440) lr 1.9098e-04 eta 0:53:20
epoch [46/50] batch [340/796] time 1.839 (0.911) data 0.000 (0.002) loss 0.6234 (0.7333) lr 1.9098e-04 eta 0:55:17
epoch [46/50] batch [360/796] time 1.863 (0.964) data 0.000 (0.002) loss 1.4501 (0.7394) lr 1.9098e-04 eta 0:58:08
epoch [46/50] batch [380/796] time 1.830 (1.011) data 0.000 (0.002) loss 0.0954 (0.7399) lr 1.9098e-04 eta 1:00:38
epoch [46/50] batch [400/796] time 1.859 (1.053) data 0.000 (0.002) loss 1.1897 (0.7453) lr 1.9098e-04 eta 1:02:49
epoch [46/50] batch [420/796] time 0.920 (1.050) data 0.000 (0.002) loss 1.5366 (0.7453) lr 1.9098e-04 eta 1:02:19
epoch [46/50] batch [440/796] time 0.837 (1.048) data 0.000 (0.002) loss 0.4259 (0.7457) lr 1.9098e-04 eta 1:01:48
epoch [46/50] batch [460/796] time 0.825 (1.044) data 0.000 (0.001) loss 0.3490 (0.7450) lr 1.9098e-04 eta 1:01:13
epoch [46/50] batch [480/796] time 0.920 (1.037) data 0.000 (0.001) loss 0.0836 (0.7516) lr 1.9098e-04 eta 1:00:29
epoch [46/50] batch [500/796] time 1.756 (1.062) data 0.000 (0.001) loss 0.1063 (0.7412) lr 1.9098e-04 eta 1:01:34
epoch [46/50] batch [520/796] time 0.923 (1.058) data 0.000 (0.001) loss 1.1766 (0.7443) lr 1.9098e-04 eta 1:00:59
epoch [46/50] batch [540/796] time 0.833 (1.061) data 0.000 (0.001) loss 2.0618 (0.7409) lr 1.9098e-04 eta 1:00:49
epoch [46/50] batch [560/796] time 0.950 (1.055) data 0.000 (0.001) loss 0.4542 (0.7380) lr 1.9098e-04 eta 1:00:06
epoch [46/50] batch [580/796] time 1.368 (1.063) data 0.000 (0.001) loss 0.1227 (0.7436) lr 1.9098e-04 eta 1:00:12
epoch [46/50] batch [600/796] time 1.607 (1.070) data 0.001 (0.001) loss 2.1312 (0.7567) lr 1.9098e-04 eta 1:00:15
epoch [46/50] batch [620/796] time 0.911 (1.063) data 0.000 (0.001) loss 0.3085 (0.7580) lr 1.9098e-04 eta 0:59:33
epoch [46/50] batch [640/796] time 0.840 (1.060) data 0.000 (0.001) loss 1.2802 (0.7555) lr 1.9098e-04 eta 0:59:00
epoch [46/50] batch [660/796] time 0.841 (1.061) data 0.000 (0.001) loss 0.7346 (0.7574) lr 1.9098e-04 eta 0:58:41
epoch [46/50] batch [680/796] time 0.902 (1.055) data 0.000 (0.001) loss 1.0049 (0.7623) lr 1.9098e-04 eta 0:58:02
epoch [46/50] batch [700/796] time 0.973 (1.051) data 0.000 (0.001) loss 0.7876 (0.7650) lr 1.9098e-04 eta 0:57:27
epoch [46/50] batch [720/796] time 0.819 (1.048) data 0.000 (0.001) loss 0.7146 (0.7683) lr 1.9098e-04 eta 0:56:55
epoch [46/50] batch [740/796] time 0.819 (1.046) data 0.000 (0.001) loss 0.0308 (0.7648) lr 1.9098e-04 eta 0:56:28
epoch [46/50] batch [760/796] time 0.898 (1.041) data 0.000 (0.001) loss 0.1653 (0.7668) lr 1.9098e-04 eta 0:55:51
epoch [46/50] batch [780/796] time 1.827 (1.056) data 0.000 (0.001) loss 1.1689 (0.7652) lr 1.9098e-04 eta 0:56:20
epoch [47/50] batch [20/796] time 1.840 (1.871) data 0.000 (0.030) loss 0.6837 (0.6506) lr 1.5567e-04 eta 1:38:39
epoch [47/50] batch [40/796] time 1.743 (1.854) data 0.000 (0.015) loss 2.7625 (0.6630) lr 1.5567e-04 eta 1:37:09
epoch [47/50] batch [60/796] time 0.898 (1.524) data 0.001 (0.010) loss 0.3112 (0.7356) lr 1.5567e-04 eta 1:19:20
epoch [47/50] batch [80/796] time 0.859 (1.384) data 0.000 (0.008) loss 0.9594 (0.7206) lr 1.5567e-04 eta 1:11:37
epoch [47/50] batch [100/796] time 0.831 (1.299) data 0.000 (0.006) loss 0.8825 (0.7191) lr 1.5567e-04 eta 1:06:45
epoch [47/50] batch [120/796] time 1.752 (1.239) data 0.001 (0.005) loss 1.4604 (0.7381) lr 1.5567e-04 eta 1:03:16
epoch [47/50] batch [140/796] time 0.813 (1.302) data 0.000 (0.005) loss 0.6416 (0.7910) lr 1.5567e-04 eta 1:06:02
epoch [47/50] batch [160/796] time 0.918 (1.248) data 0.000 (0.004) loss 0.6650 (0.8017) lr 1.5567e-04 eta 1:02:52
epoch [47/50] batch [180/796] time 0.842 (1.235) data 0.000 (0.004) loss 1.8396 (0.7972) lr 1.5567e-04 eta 1:01:49
epoch [47/50] batch [200/796] time 1.308 (1.215) data 0.000 (0.003) loss 0.4950 (0.8039) lr 1.5567e-04 eta 1:00:24
epoch [47/50] batch [220/796] time 1.586 (1.215) data 0.000 (0.003) loss 1.8529 (0.8043) lr 1.5567e-04 eta 1:00:00
epoch [47/50] batch [240/796] time 0.840 (1.215) data 0.000 (0.003) loss 1.7203 (0.8123) lr 1.5567e-04 eta 0:59:35
epoch [47/50] batch [260/796] time 0.919 (1.189) data 0.000 (0.003) loss 1.3723 (0.8262) lr 1.5567e-04 eta 0:57:56
epoch [47/50] batch [280/796] time 0.967 (1.172) data 0.000 (0.002) loss 0.8694 (0.8247) lr 1.5567e-04 eta 0:56:43
epoch [47/50] batch [300/796] time 0.830 (1.165) data 0.000 (0.002) loss 0.2351 (0.8293) lr 1.5567e-04 eta 0:56:00
epoch [47/50] batch [320/796] time 0.840 (1.150) data 0.000 (0.002) loss 3.2736 (0.8336) lr 1.5567e-04 eta 0:54:53
epoch [47/50] batch [340/796] time 0.912 (1.133) data 0.000 (0.002) loss 0.1215 (0.8280) lr 1.5567e-04 eta 0:53:43
epoch [47/50] batch [360/796] time 0.929 (1.122) data 0.000 (0.002) loss 0.2061 (0.8255) lr 1.5567e-04 eta 0:52:48
epoch [47/50] batch [380/796] time 0.831 (1.114) data 0.000 (0.002) loss 0.8933 (0.8287) lr 1.5567e-04 eta 0:52:03
epoch [47/50] batch [400/796] time 1.206 (1.103) data 0.000 (0.002) loss 0.6128 (0.8277) lr 1.5567e-04 eta 0:51:11
epoch [47/50] batch [420/796] time 1.852 (1.139) data 0.000 (0.002) loss 1.0524 (0.8311) lr 1.5567e-04 eta 0:52:27
epoch [47/50] batch [440/796] time 1.850 (1.171) data 0.000 (0.002) loss 0.2499 (0.8266) lr 1.5567e-04 eta 0:53:33
epoch [47/50] batch [460/796] time 1.832 (1.201) data 0.000 (0.002) loss 0.1963 (0.8241) lr 1.5567e-04 eta 0:54:30
epoch [47/50] batch [480/796] time 0.845 (1.218) data 0.000 (0.002) loss 0.6035 (0.8119) lr 1.5567e-04 eta 0:54:52
epoch [47/50] batch [500/796] time 1.297 (1.205) data 0.000 (0.001) loss 1.6462 (0.8152) lr 1.5567e-04 eta 0:53:55
epoch [47/50] batch [520/796] time 0.948 (1.196) data 0.000 (0.001) loss 1.1449 (0.8155) lr 1.5567e-04 eta 0:53:05
epoch [47/50] batch [540/796] time 0.834 (1.186) data 0.000 (0.001) loss 1.0781 (0.8083) lr 1.5567e-04 eta 0:52:15
epoch [47/50] batch [560/796] time 1.755 (1.186) data 0.000 (0.001) loss 0.1463 (0.8010) lr 1.5567e-04 eta 0:51:50
epoch [47/50] batch [580/796] time 0.843 (1.195) data 0.000 (0.001) loss 1.4117 (0.8082) lr 1.5567e-04 eta 0:51:51
epoch [47/50] batch [600/796] time 1.718 (1.186) data 0.000 (0.001) loss 1.2696 (0.8041) lr 1.5567e-04 eta 0:51:03
epoch [47/50] batch [620/796] time 0.841 (1.182) data 0.000 (0.001) loss 1.3719 (0.8077) lr 1.5567e-04 eta 0:50:31
epoch [47/50] batch [640/796] time 1.151 (1.180) data 0.000 (0.001) loss 0.5776 (0.8078) lr 1.5567e-04 eta 0:50:01
epoch [47/50] batch [660/796] time 0.905 (1.182) data 0.000 (0.001) loss 0.4187 (0.8069) lr 1.5567e-04 eta 0:49:42
epoch [47/50] batch [680/796] time 0.842 (1.179) data 0.000 (0.001) loss 0.2184 (0.8029) lr 1.5567e-04 eta 0:49:13
epoch [47/50] batch [700/796] time 0.839 (1.174) data 0.000 (0.001) loss 0.2288 (0.8001) lr 1.5567e-04 eta 0:48:35
epoch [47/50] batch [720/796] time 0.819 (1.164) data 0.000 (0.001) loss 0.0635 (0.7965) lr 1.5567e-04 eta 0:47:48
epoch [47/50] batch [740/796] time 0.843 (1.155) data 0.000 (0.001) loss 0.1309 (0.7947) lr 1.5567e-04 eta 0:47:03
epoch [47/50] batch [760/796] time 0.841 (1.147) data 0.000 (0.001) loss 1.3394 (0.7917) lr 1.5567e-04 eta 0:46:20
epoch [47/50] batch [780/796] time 0.820 (1.139) data 0.000 (0.001) loss 0.4083 (0.7915) lr 1.5567e-04 eta 0:45:38
epoch [48/50] batch [20/796] time 0.830 (0.866) data 0.000 (0.030) loss 1.2354 (1.1116) lr 1.2369e-04 eta 0:34:09
epoch [48/50] batch [40/796] time 0.838 (0.849) data 0.000 (0.015) loss 0.7727 (0.8701) lr 1.2369e-04 eta 0:33:12
epoch [48/50] batch [60/796] time 0.841 (0.843) data 0.000 (0.010) loss 3.0942 (0.8588) lr 1.2369e-04 eta 0:32:42
epoch [48/50] batch [80/796] time 0.839 (0.839) data 0.000 (0.008) loss 1.4908 (0.8766) lr 1.2369e-04 eta 0:32:16
epoch [48/50] batch [100/796] time 0.819 (0.837) data 0.000 (0.006) loss 1.7250 (0.8489) lr 1.2369e-04 eta 0:31:56
epoch [48/50] batch [120/796] time 0.839 (0.836) data 0.000 (0.005) loss 0.3121 (0.8385) lr 1.2369e-04 eta 0:31:36
epoch [48/50] batch [140/796] time 0.837 (0.835) data 0.000 (0.005) loss 0.1400 (0.8047) lr 1.2369e-04 eta 0:31:17
epoch [48/50] batch [160/796] time 0.838 (0.835) data 0.000 (0.004) loss 0.1356 (0.7881) lr 1.2369e-04 eta 0:30:59
epoch [48/50] batch [180/796] time 0.829 (0.834) data 0.000 (0.004) loss 1.0346 (0.8032) lr 1.2369e-04 eta 0:30:41
epoch [48/50] batch [200/796] time 0.817 (0.834) data 0.000 (0.003) loss 0.0491 (0.8065) lr 1.2369e-04 eta 0:30:23
epoch [48/50] batch [220/796] time 0.829 (0.833) data 0.000 (0.003) loss 0.8021 (0.7889) lr 1.2369e-04 eta 0:30:06
epoch [48/50] batch [240/796] time 0.829 (0.833) data 0.000 (0.003) loss 0.4058 (0.7852) lr 1.2369e-04 eta 0:29:48
epoch [48/50] batch [260/796] time 0.829 (0.832) data 0.000 (0.003) loss 0.2762 (0.7845) lr 1.2369e-04 eta 0:29:31
epoch [48/50] batch [280/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.2867 (0.7764) lr 1.2369e-04 eta 0:29:14
epoch [48/50] batch [300/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.2908 (0.7815) lr 1.2369e-04 eta 0:28:57
epoch [48/50] batch [320/796] time 0.828 (0.832) data 0.000 (0.002) loss 0.0561 (0.7828) lr 1.2369e-04 eta 0:28:40
epoch [48/50] batch [340/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.6400 (0.7799) lr 1.2369e-04 eta 0:28:23
epoch [48/50] batch [360/796] time 0.829 (0.831) data 0.000 (0.002) loss 1.4975 (0.7794) lr 1.2369e-04 eta 0:28:06
epoch [48/50] batch [380/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.5378 (0.7885) lr 1.2369e-04 eta 0:27:49
epoch [48/50] batch [400/796] time 0.818 (0.831) data 0.000 (0.002) loss 0.2290 (0.7983) lr 1.2369e-04 eta 0:27:32
epoch [48/50] batch [420/796] time 0.817 (0.831) data 0.000 (0.002) loss 0.3742 (0.7938) lr 1.2369e-04 eta 0:27:16
epoch [48/50] batch [440/796] time 0.838 (0.831) data 0.000 (0.002) loss 0.0879 (0.7821) lr 1.2369e-04 eta 0:26:59
epoch [48/50] batch [460/796] time 0.809 (0.831) data 0.000 (0.002) loss 0.2597 (0.7730) lr 1.2369e-04 eta 0:26:43
epoch [48/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.001) loss 2.4803 (0.7722) lr 1.2369e-04 eta 0:26:26
epoch [48/50] batch [500/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.6624 (0.7709) lr 1.2369e-04 eta 0:26:09
epoch [48/50] batch [520/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.0338 (0.7722) lr 1.2369e-04 eta 0:25:53
epoch [48/50] batch [540/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.8460 (0.7710) lr 1.2369e-04 eta 0:25:36
epoch [48/50] batch [560/796] time 0.837 (0.831) data 0.000 (0.001) loss 1.3198 (0.7773) lr 1.2369e-04 eta 0:25:19
epoch [48/50] batch [580/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.3815 (0.7763) lr 1.2369e-04 eta 0:25:03
epoch [48/50] batch [600/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.3710 (0.7692) lr 1.2369e-04 eta 0:24:46
epoch [48/50] batch [620/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.5214 (0.7682) lr 1.2369e-04 eta 0:24:29
epoch [48/50] batch [640/796] time 0.821 (0.831) data 0.000 (0.001) loss 0.6358 (0.7759) lr 1.2369e-04 eta 0:24:13
epoch [48/50] batch [660/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.4467 (0.7802) lr 1.2369e-04 eta 0:23:56
epoch [48/50] batch [680/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.8351 (0.7810) lr 1.2369e-04 eta 0:23:39
epoch [48/50] batch [700/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.2314 (0.7868) lr 1.2369e-04 eta 0:23:22
epoch [48/50] batch [720/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.0153 (0.7864) lr 1.2369e-04 eta 0:23:06
epoch [48/50] batch [740/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.0600 (0.7861) lr 1.2369e-04 eta 0:22:49
epoch [48/50] batch [760/796] time 0.830 (0.831) data 0.000 (0.001) loss 3.4296 (0.7916) lr 1.2369e-04 eta 0:22:33
epoch [48/50] batch [780/796] time 0.841 (0.831) data 0.000 (0.001) loss 0.0944 (0.7914) lr 1.2369e-04 eta 0:22:16
epoch [49/50] batch [20/796] time 0.840 (0.861) data 0.000 (0.030) loss 0.0729 (0.6912) lr 9.5173e-05 eta 0:22:33
epoch [49/50] batch [40/796] time 0.818 (0.846) data 0.000 (0.015) loss 0.8807 (0.7288) lr 9.5173e-05 eta 0:21:52
epoch [49/50] batch [60/796] time 0.818 (0.839) data 0.000 (0.010) loss 0.2348 (0.7445) lr 9.5173e-05 eta 0:21:25
epoch [49/50] batch [80/796] time 0.838 (0.837) data 0.000 (0.008) loss 0.8760 (0.8005) lr 9.5173e-05 eta 0:21:06
epoch [49/50] batch [100/796] time 0.829 (0.836) data 0.000 (0.006) loss 0.7486 (0.7831) lr 9.5173e-05 eta 0:20:47
epoch [49/50] batch [120/796] time 0.829 (0.835) data 0.000 (0.005) loss 1.2353 (0.7916) lr 9.5173e-05 eta 0:20:29
epoch [49/50] batch [140/796] time 0.839 (0.835) data 0.000 (0.004) loss 1.5705 (0.8053) lr 9.5173e-05 eta 0:20:12
epoch [49/50] batch [160/796] time 0.829 (0.834) data 0.000 (0.004) loss 1.2970 (0.8156) lr 9.5173e-05 eta 0:19:54
epoch [49/50] batch [180/796] time 0.818 (0.834) data 0.000 (0.003) loss 0.1705 (0.8120) lr 9.5173e-05 eta 0:19:37
epoch [49/50] batch [200/796] time 0.818 (0.833) data 0.000 (0.003) loss 1.1192 (0.7936) lr 9.5173e-05 eta 0:19:19
epoch [49/50] batch [220/796] time 0.818 (0.833) data 0.000 (0.003) loss 1.5047 (0.7852) lr 9.5173e-05 eta 0:19:02
epoch [49/50] batch [240/796] time 0.829 (0.833) data 0.000 (0.003) loss 0.0839 (0.7681) lr 9.5173e-05 eta 0:18:45
epoch [49/50] batch [260/796] time 0.839 (0.833) data 0.000 (0.002) loss 0.0944 (0.7537) lr 9.5173e-05 eta 0:18:28
epoch [49/50] batch [280/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.1471 (0.7480) lr 9.5173e-05 eta 0:18:11
epoch [49/50] batch [300/796] time 0.839 (0.832) data 0.000 (0.002) loss 2.4716 (0.7537) lr 9.5173e-05 eta 0:17:55
epoch [49/50] batch [320/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.3907 (0.7488) lr 9.5173e-05 eta 0:17:38
epoch [49/50] batch [340/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.1834 (0.7435) lr 9.5173e-05 eta 0:17:21
epoch [49/50] batch [360/796] time 0.809 (0.832) data 0.000 (0.002) loss 0.8112 (0.7450) lr 9.5173e-05 eta 0:17:04
epoch [49/50] batch [380/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.2752 (0.7528) lr 9.5173e-05 eta 0:16:47
epoch [49/50] batch [400/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.8302 (0.7404) lr 9.5173e-05 eta 0:16:31
epoch [49/50] batch [420/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.6513 (0.7472) lr 9.5173e-05 eta 0:16:14
epoch [49/50] batch [440/796] time 0.829 (0.832) data 0.000 (0.002) loss 1.4299 (0.7488) lr 9.5173e-05 eta 0:15:57
epoch [49/50] batch [460/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.3593 (0.7593) lr 9.5173e-05 eta 0:15:41
epoch [49/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.9626 (0.7682) lr 9.5173e-05 eta 0:15:24
epoch [49/50] batch [500/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.2105 (0.7684) lr 9.5173e-05 eta 0:15:07
epoch [49/50] batch [520/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2818 (0.7605) lr 9.5173e-05 eta 0:14:51
epoch [49/50] batch [540/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2456 (0.7604) lr 9.5173e-05 eta 0:14:34
epoch [49/50] batch [560/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.4990 (0.7594) lr 9.5173e-05 eta 0:14:17
epoch [49/50] batch [580/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.5435 (0.7663) lr 9.5173e-05 eta 0:14:01
epoch [49/50] batch [600/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.0997 (0.7707) lr 9.5173e-05 eta 0:13:44
epoch [49/50] batch [620/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.1675 (0.7741) lr 9.5173e-05 eta 0:13:27
epoch [49/50] batch [640/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.7946 (0.7794) lr 9.5173e-05 eta 0:13:11
epoch [49/50] batch [660/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.4106 (0.7725) lr 9.5173e-05 eta 0:12:54
epoch [49/50] batch [680/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.1383 (0.7687) lr 9.5173e-05 eta 0:12:37
epoch [49/50] batch [700/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2921 (0.7694) lr 9.5173e-05 eta 0:12:21
epoch [49/50] batch [720/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.0212 (0.7706) lr 9.5173e-05 eta 0:12:04
epoch [49/50] batch [740/796] time 0.839 (0.831) data 0.000 (0.001) loss 2.0783 (0.7833) lr 9.5173e-05 eta 0:11:48
epoch [49/50] batch [760/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.7010 (0.7866) lr 9.5173e-05 eta 0:11:31
epoch [49/50] batch [780/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.6204 (0.7827) lr 9.5173e-05 eta 0:11:14
epoch [50/50] batch [20/796] time 0.829 (0.859) data 0.000 (0.027) loss 0.3717 (0.6594) lr 7.0224e-05 eta 0:11:06
epoch [50/50] batch [40/796] time 0.838 (0.844) data 0.000 (0.013) loss 0.5166 (0.7941) lr 7.0224e-05 eta 0:10:37
epoch [50/50] batch [60/796] time 0.809 (0.839) data 0.002 (0.009) loss 1.4674 (0.8193) lr 7.0224e-05 eta 0:10:17
epoch [50/50] batch [80/796] time 0.839 (0.837) data 0.000 (0.007) loss 0.8437 (0.8358) lr 7.0224e-05 eta 0:09:59
epoch [50/50] batch [100/796] time 0.839 (0.836) data 0.000 (0.005) loss 1.0788 (0.8323) lr 7.0224e-05 eta 0:09:41
epoch [50/50] batch [120/796] time 0.818 (0.835) data 0.000 (0.005) loss 0.0612 (0.7826) lr 7.0224e-05 eta 0:09:24
epoch [50/50] batch [140/796] time 0.839 (0.835) data 0.000 (0.004) loss 0.1160 (0.7690) lr 7.0224e-05 eta 0:09:07
epoch [50/50] batch [160/796] time 0.842 (0.835) data 0.000 (0.003) loss 1.5313 (0.7791) lr 7.0224e-05 eta 0:08:50
epoch [50/50] batch [180/796] time 0.829 (0.835) data 0.000 (0.003) loss 0.3670 (0.7763) lr 7.0224e-05 eta 0:08:34
epoch [50/50] batch [200/796] time 0.838 (0.834) data 0.000 (0.003) loss 0.4807 (0.7647) lr 7.0224e-05 eta 0:08:17
epoch [50/50] batch [220/796] time 0.838 (0.834) data 0.000 (0.003) loss 0.2201 (0.7695) lr 7.0224e-05 eta 0:08:00
epoch [50/50] batch [240/796] time 0.830 (0.834) data 0.000 (0.002) loss 0.2363 (0.7523) lr 7.0224e-05 eta 0:07:43
epoch [50/50] batch [260/796] time 0.818 (0.833) data 0.000 (0.002) loss 0.3538 (0.7463) lr 7.0224e-05 eta 0:07:26
epoch [50/50] batch [280/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.0995 (0.7452) lr 7.0224e-05 eta 0:07:09
epoch [50/50] batch [300/796] time 0.851 (0.833) data 0.000 (0.002) loss 1.4650 (0.7480) lr 7.0224e-05 eta 0:06:53
epoch [50/50] batch [320/796] time 0.818 (0.833) data 0.000 (0.002) loss 0.4387 (0.7425) lr 7.0224e-05 eta 0:06:36
epoch [50/50] batch [340/796] time 0.828 (0.833) data 0.000 (0.002) loss 0.5209 (0.7498) lr 7.0224e-05 eta 0:06:19
epoch [50/50] batch [360/796] time 0.830 (0.833) data 0.000 (0.002) loss 2.6158 (0.7565) lr 7.0224e-05 eta 0:06:03
epoch [50/50] batch [380/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.5879 (0.7793) lr 7.0224e-05 eta 0:05:46
epoch [50/50] batch [400/796] time 0.830 (0.832) data 0.000 (0.001) loss 0.4484 (0.7779) lr 7.0224e-05 eta 0:05:29
epoch [50/50] batch [420/796] time 0.830 (0.832) data 0.000 (0.001) loss 1.0262 (0.7921) lr 7.0224e-05 eta 0:05:12
epoch [50/50] batch [440/796] time 0.808 (0.832) data 0.000 (0.001) loss 0.0070 (0.7863) lr 7.0224e-05 eta 0:04:56
epoch [50/50] batch [460/796] time 0.818 (0.832) data 0.000 (0.001) loss 1.6941 (0.8073) lr 7.0224e-05 eta 0:04:39
epoch [50/50] batch [480/796] time 0.829 (0.832) data 0.000 (0.001) loss 0.0206 (0.7976) lr 7.0224e-05 eta 0:04:23
epoch [50/50] batch [500/796] time 0.839 (0.832) data 0.000 (0.001) loss 1.2816 (0.7928) lr 7.0224e-05 eta 0:04:06
epoch [50/50] batch [520/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.4715 (0.7855) lr 7.0224e-05 eta 0:03:49
epoch [50/50] batch [540/796] time 0.840 (0.832) data 0.000 (0.001) loss 0.1510 (0.7966) lr 7.0224e-05 eta 0:03:33
epoch [50/50] batch [560/796] time 0.817 (0.832) data 0.000 (0.001) loss 1.0912 (0.7994) lr 7.0224e-05 eta 0:03:16
epoch [50/50] batch [580/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.9279 (0.8000) lr 7.0224e-05 eta 0:02:59
epoch [50/50] batch [600/796] time 0.818 (0.832) data 0.000 (0.001) loss 1.0103 (0.8032) lr 7.0224e-05 eta 0:02:43
epoch [50/50] batch [620/796] time 0.829 (0.832) data 0.000 (0.001) loss 0.0421 (0.8019) lr 7.0224e-05 eta 0:02:26
epoch [50/50] batch [640/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.6366 (0.8099) lr 7.0224e-05 eta 0:02:09
epoch [50/50] batch [660/796] time 0.848 (0.832) data 0.000 (0.001) loss 0.3081 (0.8004) lr 7.0224e-05 eta 0:01:53
epoch [50/50] batch [680/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.3819 (0.7995) lr 7.0224e-05 eta 0:01:36
epoch [50/50] batch [700/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.7371 (0.8074) lr 7.0224e-05 eta 0:01:19
epoch [50/50] batch [720/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.7606 (0.8036) lr 7.0224e-05 eta 0:01:03
epoch [50/50] batch [740/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.5748 (0.7988) lr 7.0224e-05 eta 0:00:46
epoch [50/50] batch [760/796] time 0.835 (0.832) data 0.000 (0.001) loss 0.6223 (0.7977) lr 7.0224e-05 eta 0:00:29
epoch [50/50] batch [780/796] time 0.829 (0.832) data 0.000 (0.001) loss 0.3832 (0.8034) lr 7.0224e-05 eta 0:00:13
Checkpoint saved to output/base2new/train_base/sun397/vit_b16_ep50_c4_BZ4_ProDA/seed1/prompt_learner/model.pth.tar-50
Finish training
Deploy the last-epoch model
Evaluate on the *test* set
=> result
* total: 9,950
* correct: 8,370
* accuracy: 84.12%
* error: 15.88%
* macro_f1: 83.91%
Elapsed: 9:36:41
