***************
** Arguments **
***************
backbone: 
config_file: configs/trainers/ProDA/vit_b16_ep50_c4_BZ4_ProDA.yaml
dataset_config_file: configs/datasets/imagenet.yaml
eval_only: False
head: 
load_epoch: None
model_dir: 
no_train: False
opts: ['DATASET.NUM_SHOTS', '16', 'DATASET.SUBSAMPLE_CLASSES', 'base']
output_dir: output/base2new/train_base/imagenet/vit_b16_ep50_c4_BZ4_ProDA/seed3
resume: 
root: /mnt/hdd/DATA
seed: 3
source_domains: None
target_domains: None
trainer: ProDA
transforms: None
************
** Config **
************
DATALOADER:
  K_TRANSFORMS: 1
  NUM_WORKERS: 8
  RETURN_IMG0: False
  TEST:
    BATCH_SIZE: 100
    SAMPLER: SequentialSampler
  TRAIN_U:
    BATCH_SIZE: 32
    N_DOMAIN: 0
    N_INS: 16
    SAME_AS_X: True
    SAMPLER: RandomSampler
  TRAIN_X:
    BATCH_SIZE: 4
    N_DOMAIN: 0
    N_INS: 16
    SAMPLER: RandomSampler
DATASET:
  ALL_AS_UNLABELED: False
  CIFAR_C_LEVEL: 1
  CIFAR_C_TYPE: 
  NAME: ImageNet
  NUM_LABELED: -1
  NUM_SHOTS: 16
  ROOT: /mnt/hdd/DATA
  SOURCE_DOMAINS: ()
  STL10_FOLD: -1
  SUBSAMPLE_CLASSES: base
  TARGET_DOMAINS: ()
  VAL_PERCENT: 0.1
INPUT:
  COLORJITTER_B: 0.4
  COLORJITTER_C: 0.4
  COLORJITTER_H: 0.1
  COLORJITTER_S: 0.4
  CROP_PADDING: 4
  CUTOUT_LEN: 16
  CUTOUT_N: 1
  GB_K: 21
  GB_P: 0.5
  GN_MEAN: 0.0
  GN_STD: 0.15
  INTERPOLATION: bicubic
  NO_TRANSFORM: False
  PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
  PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
  RANDAUGMENT_M: 10
  RANDAUGMENT_N: 2
  RGS_P: 0.2
  RRCROP_SCALE: (0.08, 1.0)
  SIZE: (224, 224)
  TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize')
MODEL:
  BACKBONE:
    NAME: ViT-B/16
    PRETRAINED: True
  HEAD:
    ACTIVATION: relu
    BN: True
    DROPOUT: 0.0
    HIDDEN_LAYERS: ()
    NAME: 
  INIT_WEIGHTS: 
OPTIM:
  ADAM_BETA1: 0.9
  ADAM_BETA2: 0.999
  BASE_LR_MULT: 0.1
  GAMMA: 0.1
  LR: 0.002
  LR_SCHEDULER: cosine
  MAX_EPOCH: 50
  MOMENTUM: 0.9
  NAME: sgd
  NEW_LAYERS: ()
  RMSPROP_ALPHA: 0.99
  SGD_DAMPNING: 0
  SGD_NESTEROV: False
  STAGED_LR: False
  STEPSIZE: (-1,)
  WARMUP_CONS_LR: 1e-05
  WARMUP_EPOCH: 5
  WARMUP_MIN_LR: 1e-05
  WARMUP_RECOUNT: True
  WARMUP_TYPE: constant
  WEIGHT_DECAY: 0.0005
OUTPUT_DIR: output/base2new/train_base/imagenet/vit_b16_ep50_c4_BZ4_ProDA/seed3
RESUME: 
SEED: 3
TEST:
  COMPUTE_CMAT: False
  EVALUATOR: Classification
  FINAL_MODEL: last_step
  NO_TEST: False
  PER_CLASS_RESULT: False
  SPLIT: test
TRAIN:
  CHECKPOINT_FREQ: 0
  COUNT_ITER: train_x
  PRINT_FREQ: 20
TRAINER:
  CDAC:
    CLASS_LR_MULTI: 10
    P_THRESH: 0.95
    RAMPUP_COEF: 30
    RAMPUP_ITRS: 1000
    STRONG_TRANSFORMS: ()
    TOPK_MATCH: 5
  COCOOP:
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  COOP:
    CLASS_TOKEN_POSITION: end
    CSC: False
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  CROSSGRAD:
    ALPHA_D: 0.5
    ALPHA_F: 0.5
    EPS_D: 1.0
    EPS_F: 1.0
  DAEL:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DAELDG:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DDAIG:
    ALPHA: 0.5
    CLAMP: False
    CLAMP_MAX: 1.0
    CLAMP_MIN: -1.0
    G_ARCH: 
    LMDA: 0.3
    WARMUP: 0
  DOMAINMIX:
    ALPHA: 1.0
    BETA: 1.0
    TYPE: crossdomain
  ENTMIN:
    LMDA: 0.001
  FIXMATCH:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 1.0
  IVLP:
    CTX_INIT: a photo of a
    N_CTX_TEXT: 2
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_TEXT: 9
    PROMPT_DEPTH_VISION: 9
  M3SDA:
    LMDA: 0.5
    N_STEP_F: 4
  MAPLE:
    CTX_INIT: a photo of a
    N_CTX: 4
    PREC: fp16
    PROMPT_DEPTH: 9
  MCD:
    N_STEP_F: 4
  MEANTEACHER:
    EMA_ALPHA: 0.999
    RAMPUP: 5
    WEIGHT_U: 1.0
  MIXMATCH:
    MIXUP_BETA: 0.75
    RAMPUP: 20000
    TEMP: 2.0
    WEIGHT_U: 100.0
  MME:
    LMDA: 0.1
  NAME: ProDA
  ProDA:
    N_CTX: 4
    N_PROMPT: 32
    PREC: fp16
  SE:
    CONF_THRE: 0.95
    EMA_ALPHA: 0.999
    RAMPUP: 300
  VPT:
    CTX_INIT: a photo of a
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_VISION: 1
USE_CUDA: True
VERBOSE: True
VERSION: 1
Collecting env info ...
** System info **
PyTorch version: 2.2.1+cu121
Is debug build: False
CUDA used to build PyTorch: 12.1
ROCM used to build PyTorch: N/A

OS: Debian GNU/Linux 12 (bookworm) (x86_64)
GCC version: (Debian 12.2.0-14) 12.2.0
Clang version: Could not collect
CMake version: Could not collect
Libc version: glibc-2.36

Python version: 3.11.2 (main, Mar 13 2023, 12:18:29) [GCC 12.2.0] (64-bit runtime)
Python platform: Linux-6.5.13-3-pve-x86_64-with-glibc2.36
Is CUDA available: True
CUDA runtime version: 11.8.89
CUDA_MODULE_LOADING set to: LAZY
GPU models and configuration: 
GPU 0: NVIDIA A800 80GB PCIe
GPU 1: NVIDIA A800 80GB PCIe

Nvidia driver version: 525.147.05
cuDNN version: Could not collect
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True

CPU:
Architecture:                       x86_64
CPU op-mode(s):                     32-bit, 64-bit
Address sizes:                      46 bits physical, 57 bits virtual
Byte Order:                         Little Endian
CPU(s):                             64
On-line CPU(s) list:                18,20,22,23,25-27,29,31,32,34,37,46-49
Off-line CPU(s) list:               0-17,19,21,24,28,30,33,35,36,38-45,50-63
Vendor ID:                          GenuineIntel
Model name:                         Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz
CPU family:                         6
Model:                              106
Thread(s) per core:                 2
Core(s) per socket:                 16
Socket(s):                          2
Stepping:                           6
CPU(s) scaling MHz:                 98%
CPU max MHz:                        3500.0000
CPU min MHz:                        800.0000
BogoMIPS:                           5800.00
Flags:                              fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect wbnoinvd dtherm ida arat pln pts vnmi avx512vbmi umip pku ospke avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid fsrm md_clear pconfig flush_l1d arch_capabilities
Virtualization:                     VT-x
L1d cache:                          1.5 MiB (32 instances)
L1i cache:                          1 MiB (32 instances)
L2 cache:                           40 MiB (32 instances)
L3 cache:                           48 MiB (2 instances)
NUMA node(s):                       2
NUMA node0 CPU(s):                  0-15,32-47
NUMA node1 CPU(s):                  16-31,48-63
Vulnerability Gather data sampling: Vulnerable: No microcode
Vulnerability Itlb multihit:        Not affected
Vulnerability L1tf:                 Not affected
Vulnerability Mds:                  Not affected
Vulnerability Meltdown:             Not affected
Vulnerability Mmio stale data:      Mitigation; Clear CPU buffers; SMT vulnerable
Vulnerability Retbleed:             Not affected
Vulnerability Spec rstack overflow: Not affected
Vulnerability Spec store bypass:    Mitigation; Speculative Store Bypass disabled via prctl
Vulnerability Spectre v1:           Mitigation; usercopy/swapgs barriers and __user pointer sanitization
Vulnerability Spectre v2:           Mitigation; Enhanced / Automatic IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
Vulnerability Srbds:                Not affected
Vulnerability Tsx async abort:      Not affected

Versions of relevant libraries:
[pip3] flake8==3.7.9
[pip3] flake8==3.7.9
[pip3] numpy==1.26.4
[pip3] torch==2.2.1
[pip3] torchaudio==2.2.1
[pip3] torchvision==0.17.1
[pip3] triton==2.2.0
[conda] Could not collect
        Pillow (10.2.0)

Loading trainer: ProDA
Loading dataset: ImageNet
Loading preprocessed few-shot data from /mnt/hdd/DATA/imagenet/split_fewshot/shot_16_shuffled-seed_3.pkl
SUBSAMPLE BASE CLASSES!
Building transform_train
+ random resized crop (size=(224, 224), scale=(0.08, 1.0))
+ random flip
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
Building transform_test
+ resize the smaller edge to 224
+ 224x224 center crop
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
---------  --------
Dataset    ImageNet
# classes  500
# train_x  8,000
# val      25,000
# test     25,000
---------  --------
Loading CLIP (backbone: ViT-B/16)
Building custom CLIP
Turning off gradients in both the image and the text encoder
Parameters to be updated: {'prompt_learner.ctx'}
Loading evaluator: Classification
No checkpoint found, train from scratch
Initialize tensorboard (log_dir=output/base2new/train_base/imagenet/vit_b16_ep50_c4_BZ4_ProDA/seed3/tensorboard)
epoch [1/50] batch [20/2000] time 2.028 (2.171) data 0.000 (0.039) loss 0.8349 (2.9473) lr 1.0000e-05 eta 2 days, 12:17:59
epoch [1/50] batch [40/2000] time 2.054 (2.104) data 0.000 (0.019) loss 1.8248 (2.6865) lr 1.0000e-05 eta 2 days, 10:24:49
epoch [1/50] batch [60/2000] time 2.054 (2.079) data 0.001 (0.013) loss 1.0773 (2.4050) lr 1.0000e-05 eta 2 days, 9:42:39
epoch [1/50] batch [80/2000] time 1.999 (2.066) data 0.000 (0.010) loss 2.0057 (2.3833) lr 1.0000e-05 eta 2 days, 9:20:23
epoch [1/50] batch [100/2000] time 2.051 (2.059) data 0.000 (0.008) loss 2.1979 (2.2769) lr 1.0000e-05 eta 2 days, 9:08:39
epoch [1/50] batch [120/2000] time 1.998 (2.055) data 0.000 (0.007) loss 0.6683 (2.3181) lr 1.0000e-05 eta 2 days, 9:00:31
epoch [1/50] batch [140/2000] time 2.000 (2.051) data 0.000 (0.006) loss 0.7790 (2.2493) lr 1.0000e-05 eta 2 days, 8:53:20
epoch [1/50] batch [160/2000] time 1.999 (2.049) data 0.000 (0.005) loss 2.5242 (2.2688) lr 1.0000e-05 eta 2 days, 8:48:43
epoch [1/50] batch [180/2000] time 1.998 (2.046) data 0.000 (0.004) loss 1.9480 (2.2458) lr 1.0000e-05 eta 2 days, 8:43:43
epoch [1/50] batch [200/2000] time 2.051 (2.045) data 0.000 (0.004) loss 1.6309 (2.2047) lr 1.0000e-05 eta 2 days, 8:41:22
epoch [1/50] batch [220/2000] time 2.034 (2.044) data 0.000 (0.004) loss 3.6836 (2.2097) lr 1.0000e-05 eta 2 days, 8:38:41
epoch [1/50] batch [240/2000] time 2.036 (2.043) data 0.000 (0.003) loss 2.0737 (2.2190) lr 1.0000e-05 eta 2 days, 8:36:23
epoch [1/50] batch [260/2000] time 2.034 (2.042) data 0.000 (0.003) loss 2.3099 (2.2045) lr 1.0000e-05 eta 2 days, 8:34:30
epoch [1/50] batch [280/2000] time 1.997 (2.041) data 0.000 (0.003) loss 2.0567 (2.1675) lr 1.0000e-05 eta 2 days, 8:32:01
epoch [1/50] batch [300/2000] time 2.050 (2.040) data 0.000 (0.003) loss 2.8901 (2.1409) lr 1.0000e-05 eta 2 days, 8:29:51
epoch [1/50] batch [320/2000] time 2.050 (2.040) data 0.000 (0.003) loss 1.8357 (2.1366) lr 1.0000e-05 eta 2 days, 8:28:37
epoch [1/50] batch [340/2000] time 1.997 (2.039) data 0.000 (0.002) loss 1.1096 (2.1231) lr 1.0000e-05 eta 2 days, 8:26:36
epoch [1/50] batch [360/2000] time 2.051 (2.038) data 0.000 (0.002) loss 1.4199 (2.1465) lr 1.0000e-05 eta 2 days, 8:24:26
epoch [1/50] batch [380/2000] time 2.049 (2.038) data 0.000 (0.002) loss 2.1388 (2.1281) lr 1.0000e-05 eta 2 days, 8:23:00
epoch [1/50] batch [400/2000] time 2.051 (2.037) data 0.000 (0.002) loss 0.9475 (2.1111) lr 1.0000e-05 eta 2 days, 8:21:10
epoch [1/50] batch [420/2000] time 2.050 (2.037) data 0.000 (0.002) loss 4.0680 (2.1313) lr 1.0000e-05 eta 2 days, 8:19:55
epoch [1/50] batch [440/2000] time 2.050 (2.036) data 0.000 (0.002) loss 3.3326 (2.1271) lr 1.0000e-05 eta 2 days, 8:18:53
epoch [1/50] batch [460/2000] time 2.025 (2.036) data 0.000 (0.002) loss 2.2766 (2.1241) lr 1.0000e-05 eta 2 days, 8:17:23
epoch [1/50] batch [480/2000] time 2.056 (2.036) data 0.000 (0.002) loss 2.8072 (2.1189) lr 1.0000e-05 eta 2 days, 8:16:14
epoch [1/50] batch [500/2000] time 2.058 (2.035) data 0.000 (0.002) loss 1.2793 (2.1083) lr 1.0000e-05 eta 2 days, 8:14:37
epoch [1/50] batch [520/2000] time 2.054 (2.035) data 0.000 (0.002) loss 1.7451 (2.0908) lr 1.0000e-05 eta 2 days, 8:13:40
epoch [1/50] batch [540/2000] time 2.051 (2.035) data 0.000 (0.002) loss 1.3572 (2.0893) lr 1.0000e-05 eta 2 days, 8:13:03
epoch [1/50] batch [560/2000] time 2.051 (2.035) data 0.000 (0.002) loss 0.9201 (2.0906) lr 1.0000e-05 eta 2 days, 8:12:24
epoch [1/50] batch [580/2000] time 2.049 (2.035) data 0.000 (0.002) loss 1.2825 (2.0931) lr 1.0000e-05 eta 2 days, 8:11:15
epoch [1/50] batch [600/2000] time 2.001 (2.035) data 0.001 (0.001) loss 1.7551 (2.0810) lr 1.0000e-05 eta 2 days, 8:10:37
epoch [1/50] batch [620/2000] time 1.997 (2.035) data 0.000 (0.001) loss 3.5990 (2.0695) lr 1.0000e-05 eta 2 days, 8:09:50
epoch [1/50] batch [640/2000] time 2.025 (2.034) data 0.000 (0.001) loss 2.9665 (2.0653) lr 1.0000e-05 eta 2 days, 8:08:57
epoch [1/50] batch [660/2000] time 2.049 (2.034) data 0.000 (0.001) loss 1.0922 (2.0627) lr 1.0000e-05 eta 2 days, 8:07:55
epoch [1/50] batch [680/2000] time 2.027 (2.034) data 0.000 (0.001) loss 1.0942 (2.0578) lr 1.0000e-05 eta 2 days, 8:06:46
epoch [1/50] batch [700/2000] time 2.030 (2.034) data 0.000 (0.001) loss 0.3896 (2.0529) lr 1.0000e-05 eta 2 days, 8:05:50
epoch [1/50] batch [720/2000] time 2.029 (2.033) data 0.000 (0.001) loss 1.3938 (2.0418) lr 1.0000e-05 eta 2 days, 8:04:31
epoch [1/50] batch [740/2000] time 1.996 (2.033) data 0.000 (0.001) loss 1.9040 (2.0371) lr 1.0000e-05 eta 2 days, 8:03:24
epoch [1/50] batch [760/2000] time 1.995 (2.033) data 0.000 (0.001) loss 0.7861 (2.0320) lr 1.0000e-05 eta 2 days, 8:02:22
epoch [1/50] batch [780/2000] time 1.998 (2.033) data 0.000 (0.001) loss 2.1962 (2.0260) lr 1.0000e-05 eta 2 days, 8:01:18
epoch [1/50] batch [800/2000] time 2.027 (2.033) data 0.000 (0.001) loss 1.1293 (2.0297) lr 1.0000e-05 eta 2 days, 8:00:28
epoch [1/50] batch [820/2000] time 1.973 (2.032) data 0.000 (0.001) loss 2.0513 (2.0289) lr 1.0000e-05 eta 2 days, 7:59:32
epoch [1/50] batch [840/2000] time 1.996 (2.032) data 0.000 (0.001) loss 1.3006 (2.0267) lr 1.0000e-05 eta 2 days, 7:58:41
epoch [1/50] batch [860/2000] time 2.029 (2.032) data 0.000 (0.001) loss 2.6412 (2.0232) lr 1.0000e-05 eta 2 days, 7:57:43
epoch [1/50] batch [880/2000] time 1.997 (2.032) data 0.000 (0.001) loss 0.1701 (2.0130) lr 1.0000e-05 eta 2 days, 7:56:40
epoch [1/50] batch [900/2000] time 2.050 (2.032) data 0.000 (0.001) loss 2.1737 (2.0058) lr 1.0000e-05 eta 2 days, 7:56:00
epoch [1/50] batch [920/2000] time 2.050 (2.032) data 0.000 (0.001) loss 0.6718 (1.9989) lr 1.0000e-05 eta 2 days, 7:55:04
epoch [1/50] batch [940/2000] time 2.051 (2.032) data 0.000 (0.001) loss 2.2068 (2.0032) lr 1.0000e-05 eta 2 days, 7:54:11
epoch [1/50] batch [960/2000] time 2.028 (2.032) data 0.000 (0.001) loss 1.0639 (1.9899) lr 1.0000e-05 eta 2 days, 7:53:26
epoch [1/50] batch [980/2000] time 2.050 (2.032) data 0.000 (0.001) loss 0.8357 (1.9925) lr 1.0000e-05 eta 2 days, 7:52:52
epoch [1/50] batch [1000/2000] time 2.048 (2.032) data 0.000 (0.001) loss 2.0570 (1.9883) lr 1.0000e-05 eta 2 days, 7:52:08
epoch [1/50] batch [1020/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.1042 (1.9784) lr 1.0000e-05 eta 2 days, 7:51:14
epoch [1/50] batch [1040/2000] time 2.001 (2.031) data 0.000 (0.001) loss 1.3899 (1.9699) lr 1.0000e-05 eta 2 days, 7:50:36
epoch [1/50] batch [1060/2000] time 1.997 (2.032) data 0.000 (0.001) loss 2.6428 (1.9616) lr 1.0000e-05 eta 2 days, 7:49:57
epoch [1/50] batch [1080/2000] time 2.049 (2.031) data 0.000 (0.001) loss 3.7599 (1.9574) lr 1.0000e-05 eta 2 days, 7:49:06
epoch [1/50] batch [1100/2000] time 2.026 (2.031) data 0.000 (0.001) loss 3.2614 (1.9588) lr 1.0000e-05 eta 2 days, 7:48:22
epoch [1/50] batch [1120/2000] time 2.001 (2.031) data 0.000 (0.001) loss 2.1586 (1.9537) lr 1.0000e-05 eta 2 days, 7:47:39
epoch [1/50] batch [1140/2000] time 2.054 (2.031) data 0.001 (0.001) loss 1.2248 (1.9477) lr 1.0000e-05 eta 2 days, 7:46:56
epoch [1/50] batch [1160/2000] time 2.001 (2.031) data 0.000 (0.001) loss 2.6986 (1.9534) lr 1.0000e-05 eta 2 days, 7:46:02
epoch [1/50] batch [1180/2000] time 2.032 (2.031) data 0.000 (0.001) loss 2.0098 (1.9529) lr 1.0000e-05 eta 2 days, 7:45:12
epoch [1/50] batch [1200/2000] time 1.997 (2.031) data 0.000 (0.001) loss 1.8232 (1.9505) lr 1.0000e-05 eta 2 days, 7:44:29
epoch [1/50] batch [1220/2000] time 2.027 (2.031) data 0.000 (0.001) loss 3.9265 (1.9492) lr 1.0000e-05 eta 2 days, 7:43:44
epoch [1/50] batch [1240/2000] time 2.028 (2.031) data 0.000 (0.001) loss 3.4449 (1.9522) lr 1.0000e-05 eta 2 days, 7:42:52
epoch [1/50] batch [1260/2000] time 2.030 (2.031) data 0.000 (0.001) loss 4.6867 (1.9530) lr 1.0000e-05 eta 2 days, 7:42:10
epoch [1/50] batch [1280/2000] time 2.053 (2.031) data 0.000 (0.001) loss 2.5994 (1.9539) lr 1.0000e-05 eta 2 days, 7:41:32
epoch [1/50] batch [1300/2000] time 2.031 (2.031) data 0.000 (0.001) loss 0.1970 (1.9523) lr 1.0000e-05 eta 2 days, 7:40:49
epoch [1/50] batch [1320/2000] time 2.029 (2.031) data 0.000 (0.001) loss 2.8268 (1.9492) lr 1.0000e-05 eta 2 days, 7:39:59
epoch [1/50] batch [1340/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.6184 (1.9467) lr 1.0000e-05 eta 2 days, 7:39:10
epoch [1/50] batch [1360/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.9921 (1.9461) lr 1.0000e-05 eta 2 days, 7:38:35
epoch [1/50] batch [1380/2000] time 1.996 (2.031) data 0.000 (0.001) loss 3.2338 (1.9480) lr 1.0000e-05 eta 2 days, 7:37:52
epoch [1/50] batch [1400/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.6223 (1.9480) lr 1.0000e-05 eta 2 days, 7:37:04
epoch [1/50] batch [1420/2000] time 2.055 (2.031) data 0.000 (0.001) loss 0.5070 (1.9407) lr 1.0000e-05 eta 2 days, 7:36:26
epoch [1/50] batch [1440/2000] time 2.055 (2.031) data 0.000 (0.001) loss 2.1563 (1.9330) lr 1.0000e-05 eta 2 days, 7:35:46
epoch [1/50] batch [1460/2000] time 2.000 (2.031) data 0.000 (0.001) loss 3.6304 (1.9358) lr 1.0000e-05 eta 2 days, 7:34:57
epoch [1/50] batch [1480/2000] time 2.053 (2.031) data 0.000 (0.001) loss 2.9257 (1.9309) lr 1.0000e-05 eta 2 days, 7:34:26
epoch [1/50] batch [1500/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.1246 (1.9228) lr 1.0000e-05 eta 2 days, 7:33:50
epoch [1/50] batch [1520/2000] time 2.054 (2.031) data 0.000 (0.001) loss 1.0367 (1.9285) lr 1.0000e-05 eta 2 days, 7:33:03
epoch [1/50] batch [1540/2000] time 1.981 (2.031) data 0.000 (0.001) loss 3.1005 (1.9201) lr 1.0000e-05 eta 2 days, 7:32:21
epoch [1/50] batch [1560/2000] time 2.058 (2.031) data 0.000 (0.001) loss 0.3951 (1.9135) lr 1.0000e-05 eta 2 days, 7:32:01
epoch [1/50] batch [1580/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.6352 (1.9090) lr 1.0000e-05 eta 2 days, 7:31:20
epoch [1/50] batch [1600/2000] time 2.028 (2.031) data 0.000 (0.001) loss 2.2420 (1.9049) lr 1.0000e-05 eta 2 days, 7:30:39
epoch [1/50] batch [1620/2000] time 2.054 (2.031) data 0.000 (0.001) loss 3.5515 (1.9018) lr 1.0000e-05 eta 2 days, 7:29:58
epoch [1/50] batch [1640/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.4756 (1.8977) lr 1.0000e-05 eta 2 days, 7:29:11
epoch [1/50] batch [1660/2000] time 2.028 (2.031) data 0.000 (0.001) loss 1.9265 (1.8954) lr 1.0000e-05 eta 2 days, 7:28:22
epoch [1/50] batch [1680/2000] time 2.054 (2.031) data 0.001 (0.001) loss 2.3433 (1.8918) lr 1.0000e-05 eta 2 days, 7:27:47
epoch [1/50] batch [1700/2000] time 2.030 (2.031) data 0.000 (0.001) loss 2.7455 (1.8909) lr 1.0000e-05 eta 2 days, 7:27:11
epoch [1/50] batch [1720/2000] time 2.001 (2.031) data 0.000 (0.001) loss 1.9559 (1.8880) lr 1.0000e-05 eta 2 days, 7:26:27
epoch [1/50] batch [1740/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.0668 (1.8812) lr 1.0000e-05 eta 2 days, 7:25:57
epoch [1/50] batch [1760/2000] time 1.998 (2.031) data 0.000 (0.001) loss 1.3062 (1.8782) lr 1.0000e-05 eta 2 days, 7:25:14
epoch [1/50] batch [1780/2000] time 2.053 (2.031) data 0.000 (0.001) loss 3.6245 (1.8750) lr 1.0000e-05 eta 2 days, 7:24:27
epoch [1/50] batch [1800/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.3889 (1.8721) lr 1.0000e-05 eta 2 days, 7:23:49
epoch [1/50] batch [1820/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.8823 (1.8685) lr 1.0000e-05 eta 2 days, 7:23:15
epoch [1/50] batch [1840/2000] time 2.032 (2.031) data 0.000 (0.001) loss 1.4520 (1.8658) lr 1.0000e-05 eta 2 days, 7:22:28
epoch [1/50] batch [1860/2000] time 2.001 (2.031) data 0.000 (0.001) loss 1.7238 (1.8643) lr 1.0000e-05 eta 2 days, 7:21:46
epoch [1/50] batch [1880/2000] time 1.999 (2.031) data 0.000 (0.001) loss 2.7424 (1.8608) lr 1.0000e-05 eta 2 days, 7:21:04
epoch [1/50] batch [1900/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.8213 (1.8623) lr 1.0000e-05 eta 2 days, 7:20:19
epoch [1/50] batch [1920/2000] time 1.974 (2.031) data 0.000 (0.001) loss 1.3309 (1.8587) lr 1.0000e-05 eta 2 days, 7:19:32
epoch [1/50] batch [1940/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.5273 (1.8562) lr 1.0000e-05 eta 2 days, 7:18:48
epoch [1/50] batch [1960/2000] time 1.999 (2.031) data 0.000 (0.001) loss 1.2919 (1.8571) lr 1.0000e-05 eta 2 days, 7:18:03
epoch [1/50] batch [1980/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.9896 (1.8527) lr 1.0000e-05 eta 2 days, 7:17:21
epoch [1/50] batch [2000/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.9852 (1.8522) lr 1.0000e-05 eta 2 days, 7:16:33
epoch [2/50] batch [20/2000] time 2.025 (2.057) data 0.000 (0.027) loss 1.4658 (1.9588) lr 1.0000e-05 eta 2 days, 7:58:22
epoch [2/50] batch [40/2000] time 1.996 (2.040) data 0.000 (0.014) loss 1.4736 (1.7163) lr 1.0000e-05 eta 2 days, 7:29:50
epoch [2/50] batch [60/2000] time 2.032 (2.037) data 0.001 (0.009) loss 0.4859 (1.5585) lr 1.0000e-05 eta 2 days, 7:24:16
epoch [2/50] batch [80/2000] time 2.052 (2.034) data 0.000 (0.007) loss 0.5886 (1.6499) lr 1.0000e-05 eta 2 days, 7:19:12
epoch [2/50] batch [100/2000] time 2.031 (2.032) data 0.000 (0.006) loss 1.6045 (1.6969) lr 1.0000e-05 eta 2 days, 7:15:09
epoch [2/50] batch [120/2000] time 2.029 (2.032) data 0.000 (0.005) loss 2.9131 (1.7178) lr 1.0000e-05 eta 2 days, 7:15:05
epoch [2/50] batch [140/2000] time 1.997 (2.032) data 0.000 (0.004) loss 1.8262 (1.6762) lr 1.0000e-05 eta 2 days, 7:14:15
epoch [2/50] batch [160/2000] time 2.053 (2.032) data 0.000 (0.004) loss 1.3542 (1.6914) lr 1.0000e-05 eta 2 days, 7:13:40
epoch [2/50] batch [180/2000] time 2.053 (2.032) data 0.000 (0.003) loss 2.8287 (1.7163) lr 1.0000e-05 eta 2 days, 7:13:26
epoch [2/50] batch [200/2000] time 2.004 (2.032) data 0.000 (0.003) loss 0.7840 (1.7211) lr 1.0000e-05 eta 2 days, 7:11:56
epoch [2/50] batch [220/2000] time 2.057 (2.031) data 0.000 (0.003) loss 2.3780 (1.6637) lr 1.0000e-05 eta 2 days, 7:10:27
epoch [2/50] batch [240/2000] time 1.999 (2.031) data 0.000 (0.002) loss 0.1486 (1.6751) lr 1.0000e-05 eta 2 days, 7:09:11
epoch [2/50] batch [260/2000] time 2.053 (2.031) data 0.000 (0.002) loss 1.1990 (1.6842) lr 1.0000e-05 eta 2 days, 7:08:17
epoch [2/50] batch [280/2000] time 2.036 (2.031) data 0.000 (0.002) loss 1.6661 (1.6737) lr 1.0000e-05 eta 2 days, 7:07:12
epoch [2/50] batch [300/2000] time 1.999 (2.031) data 0.000 (0.002) loss 1.1352 (1.6831) lr 1.0000e-05 eta 2 days, 7:06:53
epoch [2/50] batch [320/2000] time 2.049 (2.031) data 0.000 (0.002) loss 3.6301 (1.6989) lr 1.0000e-05 eta 2 days, 7:06:41
epoch [2/50] batch [340/2000] time 2.031 (2.031) data 0.000 (0.002) loss 2.6553 (1.6899) lr 1.0000e-05 eta 2 days, 7:05:52
epoch [2/50] batch [360/2000] time 1.996 (2.031) data 0.000 (0.002) loss 0.9356 (1.7157) lr 1.0000e-05 eta 2 days, 7:04:31
epoch [2/50] batch [380/2000] time 1.999 (2.031) data 0.000 (0.002) loss 2.5288 (1.7150) lr 1.0000e-05 eta 2 days, 7:03:55
epoch [2/50] batch [400/2000] time 1.974 (2.031) data 0.000 (0.002) loss 4.6142 (1.7310) lr 1.0000e-05 eta 2 days, 7:03:10
epoch [2/50] batch [420/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.5089 (1.7229) lr 1.0000e-05 eta 2 days, 7:02:36
epoch [2/50] batch [440/2000] time 2.050 (2.030) data 0.000 (0.001) loss 3.1760 (1.7396) lr 1.0000e-05 eta 2 days, 7:01:19
epoch [2/50] batch [460/2000] time 2.049 (2.030) data 0.000 (0.001) loss 2.9144 (1.7336) lr 1.0000e-05 eta 2 days, 7:00:41
epoch [2/50] batch [480/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.2403 (1.7317) lr 1.0000e-05 eta 2 days, 6:59:47
epoch [2/50] batch [500/2000] time 1.972 (2.030) data 0.000 (0.001) loss 2.7635 (1.7237) lr 1.0000e-05 eta 2 days, 6:59:00
epoch [2/50] batch [520/2000] time 1.974 (2.030) data 0.000 (0.001) loss 0.2732 (1.7133) lr 1.0000e-05 eta 2 days, 6:58:04
epoch [2/50] batch [540/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.4337 (1.7173) lr 1.0000e-05 eta 2 days, 6:57:16
epoch [2/50] batch [560/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.7247 (1.7217) lr 1.0000e-05 eta 2 days, 6:56:52
epoch [2/50] batch [580/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.7702 (1.7070) lr 1.0000e-05 eta 2 days, 6:56:06
epoch [2/50] batch [600/2000] time 2.003 (2.030) data 0.001 (0.001) loss 0.4617 (1.7129) lr 1.0000e-05 eta 2 days, 6:55:18
epoch [2/50] batch [620/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.4191 (1.7042) lr 1.0000e-05 eta 2 days, 6:54:34
epoch [2/50] batch [640/2000] time 2.001 (2.030) data 0.000 (0.001) loss 1.9367 (1.7075) lr 1.0000e-05 eta 2 days, 6:53:50
epoch [2/50] batch [660/2000] time 2.055 (2.030) data 0.000 (0.001) loss 3.8364 (1.7132) lr 1.0000e-05 eta 2 days, 6:53:40
epoch [2/50] batch [680/2000] time 2.004 (2.030) data 0.000 (0.001) loss 1.8495 (1.7109) lr 1.0000e-05 eta 2 days, 6:53:08
epoch [2/50] batch [700/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.6255 (1.7047) lr 1.0000e-05 eta 2 days, 6:52:47
epoch [2/50] batch [720/2000] time 2.050 (2.031) data 0.000 (0.001) loss 2.6506 (1.6997) lr 1.0000e-05 eta 2 days, 6:52:35
epoch [2/50] batch [740/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.9017 (1.6921) lr 1.0000e-05 eta 2 days, 6:52:01
epoch [2/50] batch [760/2000] time 2.047 (2.031) data 0.000 (0.001) loss 0.3866 (1.6863) lr 1.0000e-05 eta 2 days, 6:51:22
epoch [2/50] batch [780/2000] time 2.001 (2.031) data 0.000 (0.001) loss 0.6735 (1.6856) lr 1.0000e-05 eta 2 days, 6:50:41
epoch [2/50] batch [800/2000] time 2.000 (2.031) data 0.000 (0.001) loss 0.6668 (1.6740) lr 1.0000e-05 eta 2 days, 6:50:15
epoch [2/50] batch [820/2000] time 2.049 (2.031) data 0.000 (0.001) loss 2.4390 (1.6782) lr 1.0000e-05 eta 2 days, 6:49:33
epoch [2/50] batch [840/2000] time 2.024 (2.031) data 0.000 (0.001) loss 1.0935 (1.6735) lr 1.0000e-05 eta 2 days, 6:48:40
epoch [2/50] batch [860/2000] time 1.996 (2.031) data 0.000 (0.001) loss 2.4798 (1.6856) lr 1.0000e-05 eta 2 days, 6:47:49
epoch [2/50] batch [880/2000] time 2.051 (2.031) data 0.000 (0.001) loss 2.1071 (1.6878) lr 1.0000e-05 eta 2 days, 6:46:56
epoch [2/50] batch [900/2000] time 2.056 (2.031) data 0.000 (0.001) loss 3.8628 (1.6859) lr 1.0000e-05 eta 2 days, 6:46:19
epoch [2/50] batch [920/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.7569 (1.6862) lr 1.0000e-05 eta 2 days, 6:45:26
epoch [2/50] batch [940/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.9604 (1.6849) lr 1.0000e-05 eta 2 days, 6:44:50
epoch [2/50] batch [960/2000] time 2.050 (2.031) data 0.000 (0.001) loss 2.6956 (1.6818) lr 1.0000e-05 eta 2 days, 6:44:17
epoch [2/50] batch [980/2000] time 2.050 (2.031) data 0.000 (0.001) loss 1.9146 (1.6820) lr 1.0000e-05 eta 2 days, 6:43:41
epoch [2/50] batch [1000/2000] time 2.052 (2.031) data 0.000 (0.001) loss 2.6671 (1.6842) lr 1.0000e-05 eta 2 days, 6:42:57
epoch [2/50] batch [1020/2000] time 2.052 (2.031) data 0.000 (0.001) loss 2.9853 (1.6842) lr 1.0000e-05 eta 2 days, 6:42:06
epoch [2/50] batch [1040/2000] time 1.999 (2.030) data 0.000 (0.001) loss 5.1095 (1.6810) lr 1.0000e-05 eta 2 days, 6:41:12
epoch [2/50] batch [1060/2000] time 2.026 (2.030) data 0.000 (0.001) loss 0.0637 (1.6781) lr 1.0000e-05 eta 2 days, 6:40:31
epoch [2/50] batch [1080/2000] time 1.998 (2.031) data 0.000 (0.001) loss 1.4416 (1.6800) lr 1.0000e-05 eta 2 days, 6:39:58
epoch [2/50] batch [1100/2000] time 2.049 (2.030) data 0.000 (0.001) loss 2.6026 (1.6803) lr 1.0000e-05 eta 2 days, 6:39:08
epoch [2/50] batch [1120/2000] time 1.975 (2.030) data 0.000 (0.001) loss 1.9779 (1.6758) lr 1.0000e-05 eta 2 days, 6:38:19
epoch [2/50] batch [1140/2000] time 2.051 (2.030) data 0.001 (0.001) loss 0.5932 (1.6779) lr 1.0000e-05 eta 2 days, 6:37:30
epoch [2/50] batch [1160/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.6031 (1.6805) lr 1.0000e-05 eta 2 days, 6:36:41
epoch [2/50] batch [1180/2000] time 1.977 (2.030) data 0.000 (0.001) loss 1.3449 (1.6815) lr 1.0000e-05 eta 2 days, 6:36:05
epoch [2/50] batch [1200/2000] time 2.027 (2.030) data 0.000 (0.001) loss 2.4469 (1.6787) lr 1.0000e-05 eta 2 days, 6:35:24
epoch [2/50] batch [1220/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.8973 (1.6701) lr 1.0000e-05 eta 2 days, 6:34:37
epoch [2/50] batch [1240/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.4862 (1.6652) lr 1.0000e-05 eta 2 days, 6:33:46
epoch [2/50] batch [1260/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.3142 (1.6637) lr 1.0000e-05 eta 2 days, 6:32:57
epoch [2/50] batch [1280/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.0210 (1.6629) lr 1.0000e-05 eta 2 days, 6:32:08
epoch [2/50] batch [1300/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.2821 (1.6562) lr 1.0000e-05 eta 2 days, 6:31:31
epoch [2/50] batch [1320/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.6446 (1.6561) lr 1.0000e-05 eta 2 days, 6:30:30
epoch [2/50] batch [1340/2000] time 2.001 (2.030) data 0.000 (0.001) loss 1.6826 (1.6482) lr 1.0000e-05 eta 2 days, 6:29:43
epoch [2/50] batch [1360/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.6067 (1.6474) lr 1.0000e-05 eta 2 days, 6:29:08
epoch [2/50] batch [1380/2000] time 1.998 (2.030) data 0.000 (0.001) loss 2.4135 (1.6429) lr 1.0000e-05 eta 2 days, 6:28:32
epoch [2/50] batch [1400/2000] time 2.002 (2.030) data 0.000 (0.001) loss 0.7420 (1.6476) lr 1.0000e-05 eta 2 days, 6:27:52
epoch [2/50] batch [1420/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.7306 (1.6421) lr 1.0000e-05 eta 2 days, 6:27:06
epoch [2/50] batch [1440/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.5258 (1.6401) lr 1.0000e-05 eta 2 days, 6:26:17
epoch [2/50] batch [1460/2000] time 1.998 (2.030) data 0.000 (0.001) loss 4.0116 (1.6409) lr 1.0000e-05 eta 2 days, 6:25:29
epoch [2/50] batch [1480/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.2198 (1.6393) lr 1.0000e-05 eta 2 days, 6:24:48
epoch [2/50] batch [1500/2000] time 2.052 (2.029) data 0.000 (0.001) loss 2.2678 (1.6412) lr 1.0000e-05 eta 2 days, 6:24:04
epoch [2/50] batch [1520/2000] time 2.026 (2.030) data 0.000 (0.001) loss 1.5146 (1.6427) lr 1.0000e-05 eta 2 days, 6:23:27
epoch [2/50] batch [1540/2000] time 2.051 (2.030) data 0.000 (0.001) loss 2.9890 (1.6429) lr 1.0000e-05 eta 2 days, 6:22:47
epoch [2/50] batch [1560/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.5634 (1.6426) lr 1.0000e-05 eta 2 days, 6:22:14
epoch [2/50] batch [1580/2000] time 2.000 (2.030) data 0.000 (0.001) loss 2.0621 (1.6415) lr 1.0000e-05 eta 2 days, 6:21:33
epoch [2/50] batch [1600/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.7586 (1.6441) lr 1.0000e-05 eta 2 days, 6:20:45
epoch [2/50] batch [1620/2000] time 2.000 (2.029) data 0.000 (0.001) loss 3.5817 (1.6436) lr 1.0000e-05 eta 2 days, 6:19:58
epoch [2/50] batch [1640/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.8847 (1.6454) lr 1.0000e-05 eta 2 days, 6:19:16
epoch [2/50] batch [1660/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.3905 (1.6427) lr 1.0000e-05 eta 2 days, 6:18:30
epoch [2/50] batch [1680/2000] time 2.050 (2.029) data 0.001 (0.001) loss 2.4555 (1.6462) lr 1.0000e-05 eta 2 days, 6:17:52
epoch [2/50] batch [1700/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.1617 (1.6455) lr 1.0000e-05 eta 2 days, 6:17:09
epoch [2/50] batch [1720/2000] time 2.030 (2.029) data 0.000 (0.001) loss 1.6450 (1.6443) lr 1.0000e-05 eta 2 days, 6:16:26
epoch [2/50] batch [1740/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.7237 (1.6437) lr 1.0000e-05 eta 2 days, 6:15:48
epoch [2/50] batch [1760/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.3685 (1.6380) lr 1.0000e-05 eta 2 days, 6:15:05
epoch [2/50] batch [1780/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.3987 (1.6403) lr 1.0000e-05 eta 2 days, 6:14:22
epoch [2/50] batch [1800/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.9131 (1.6419) lr 1.0000e-05 eta 2 days, 6:13:33
epoch [2/50] batch [1820/2000] time 1.994 (2.029) data 0.000 (0.001) loss 1.1857 (1.6445) lr 1.0000e-05 eta 2 days, 6:12:48
epoch [2/50] batch [1840/2000] time 2.049 (2.029) data 0.000 (0.000) loss 1.7189 (1.6452) lr 1.0000e-05 eta 2 days, 6:12:04
epoch [2/50] batch [1860/2000] time 2.028 (2.029) data 0.000 (0.000) loss 1.5371 (1.6455) lr 1.0000e-05 eta 2 days, 6:11:16
epoch [2/50] batch [1880/2000] time 2.050 (2.029) data 0.000 (0.000) loss 2.7568 (1.6457) lr 1.0000e-05 eta 2 days, 6:10:39
epoch [2/50] batch [1900/2000] time 1.998 (2.029) data 0.000 (0.000) loss 0.2996 (1.6431) lr 1.0000e-05 eta 2 days, 6:09:51
epoch [2/50] batch [1920/2000] time 2.000 (2.029) data 0.000 (0.000) loss 0.7867 (1.6461) lr 1.0000e-05 eta 2 days, 6:09:13
epoch [2/50] batch [1940/2000] time 2.049 (2.029) data 0.000 (0.000) loss 1.0533 (1.6500) lr 1.0000e-05 eta 2 days, 6:08:32
epoch [2/50] batch [1960/2000] time 2.049 (2.029) data 0.000 (0.000) loss 0.8506 (1.6495) lr 1.0000e-05 eta 2 days, 6:07:45
epoch [2/50] batch [1980/2000] time 2.025 (2.029) data 0.000 (0.000) loss 0.7245 (1.6510) lr 1.0000e-05 eta 2 days, 6:07:03
epoch [2/50] batch [2000/2000] time 2.052 (2.029) data 0.000 (0.000) loss 1.6570 (1.6518) lr 1.0000e-05 eta 2 days, 6:06:20
epoch [3/50] batch [20/2000] time 2.031 (2.057) data 0.000 (0.027) loss 3.9692 (1.4739) lr 1.0000e-05 eta 2 days, 6:50:17
epoch [3/50] batch [40/2000] time 2.030 (2.046) data 0.000 (0.014) loss 0.4189 (1.5117) lr 1.0000e-05 eta 2 days, 6:31:58
epoch [3/50] batch [60/2000] time 2.029 (2.040) data 0.001 (0.009) loss 1.0320 (1.5689) lr 1.0000e-05 eta 2 days, 6:22:28
epoch [3/50] batch [80/2000] time 2.031 (2.037) data 0.000 (0.007) loss 2.7611 (1.5420) lr 1.0000e-05 eta 2 days, 6:15:48
epoch [3/50] batch [100/2000] time 2.055 (2.037) data 0.000 (0.006) loss 1.6412 (1.5734) lr 1.0000e-05 eta 2 days, 6:15:12
epoch [3/50] batch [120/2000] time 2.034 (2.036) data 0.000 (0.005) loss 1.8790 (1.5990) lr 1.0000e-05 eta 2 days, 6:12:48
epoch [3/50] batch [140/2000] time 2.033 (2.034) data 0.000 (0.004) loss 0.9263 (1.5967) lr 1.0000e-05 eta 2 days, 6:10:22
epoch [3/50] batch [160/2000] time 2.054 (2.035) data 0.000 (0.004) loss 0.3293 (1.6123) lr 1.0000e-05 eta 2 days, 6:10:21
epoch [3/50] batch [180/2000] time 2.003 (2.035) data 0.000 (0.003) loss 2.3946 (1.6295) lr 1.0000e-05 eta 2 days, 6:09:45
epoch [3/50] batch [200/2000] time 2.050 (2.034) data 0.000 (0.003) loss 2.5670 (1.6557) lr 1.0000e-05 eta 2 days, 6:08:03
epoch [3/50] batch [220/2000] time 2.051 (2.034) data 0.000 (0.003) loss 2.0521 (1.6067) lr 1.0000e-05 eta 2 days, 6:07:36
epoch [3/50] batch [240/2000] time 1.999 (2.034) data 0.000 (0.002) loss 2.8456 (1.5969) lr 1.0000e-05 eta 2 days, 6:06:07
epoch [3/50] batch [260/2000] time 2.028 (2.033) data 0.000 (0.002) loss 1.1566 (1.5979) lr 1.0000e-05 eta 2 days, 6:04:33
epoch [3/50] batch [280/2000] time 1.996 (2.033) data 0.000 (0.002) loss 2.8344 (1.6016) lr 1.0000e-05 eta 2 days, 6:03:08
epoch [3/50] batch [300/2000] time 2.031 (2.032) data 0.000 (0.002) loss 1.9222 (1.6008) lr 1.0000e-05 eta 2 days, 6:01:20
epoch [3/50] batch [320/2000] time 1.974 (2.032) data 0.000 (0.002) loss 0.3327 (1.5858) lr 1.0000e-05 eta 2 days, 6:00:39
epoch [3/50] batch [340/2000] time 2.028 (2.032) data 0.000 (0.002) loss 0.9889 (1.5985) lr 1.0000e-05 eta 2 days, 5:59:41
epoch [3/50] batch [360/2000] time 1.975 (2.032) data 0.000 (0.002) loss 1.6530 (1.5902) lr 1.0000e-05 eta 2 days, 5:59:02
epoch [3/50] batch [380/2000] time 2.054 (2.032) data 0.000 (0.002) loss 0.8016 (1.5777) lr 1.0000e-05 eta 2 days, 5:58:05
epoch [3/50] batch [400/2000] time 1.998 (2.032) data 0.000 (0.002) loss 1.7882 (1.5712) lr 1.0000e-05 eta 2 days, 5:56:56
epoch [3/50] batch [420/2000] time 2.050 (2.032) data 0.000 (0.001) loss 3.9149 (1.5619) lr 1.0000e-05 eta 2 days, 5:56:13
epoch [3/50] batch [440/2000] time 2.049 (2.031) data 0.000 (0.001) loss 1.8989 (1.5641) lr 1.0000e-05 eta 2 days, 5:55:04
epoch [3/50] batch [460/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.7282 (1.5780) lr 1.0000e-05 eta 2 days, 5:54:28
epoch [3/50] batch [480/2000] time 2.026 (2.031) data 0.000 (0.001) loss 1.2862 (1.5875) lr 1.0000e-05 eta 2 days, 5:53:13
epoch [3/50] batch [500/2000] time 1.995 (2.030) data 0.000 (0.001) loss 1.5123 (1.5823) lr 1.0000e-05 eta 2 days, 5:51:49
epoch [3/50] batch [520/2000] time 1.973 (2.030) data 0.000 (0.001) loss 1.9488 (1.5721) lr 1.0000e-05 eta 2 days, 5:50:58
epoch [3/50] batch [540/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.6737 (1.5655) lr 1.0000e-05 eta 2 days, 5:49:53
epoch [3/50] batch [560/2000] time 2.028 (2.030) data 0.000 (0.001) loss 2.3994 (1.5763) lr 1.0000e-05 eta 2 days, 5:49:01
epoch [3/50] batch [580/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.7603 (1.5830) lr 1.0000e-05 eta 2 days, 5:47:58
epoch [3/50] batch [600/2000] time 2.059 (2.030) data 0.000 (0.001) loss 2.0408 (1.5799) lr 1.0000e-05 eta 2 days, 5:47:39
epoch [3/50] batch [620/2000] time 2.027 (2.030) data 0.000 (0.001) loss 3.4969 (1.5933) lr 1.0000e-05 eta 2 days, 5:46:56
epoch [3/50] batch [640/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.3652 (1.5884) lr 1.0000e-05 eta 2 days, 5:46:39
epoch [3/50] batch [660/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.2107 (1.5875) lr 1.0000e-05 eta 2 days, 5:46:08
epoch [3/50] batch [680/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.3445 (1.5852) lr 1.0000e-05 eta 2 days, 5:45:01
epoch [3/50] batch [700/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.5632 (1.5909) lr 1.0000e-05 eta 2 days, 5:44:25
epoch [3/50] batch [720/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.1804 (1.5969) lr 1.0000e-05 eta 2 days, 5:43:41
epoch [3/50] batch [740/2000] time 1.997 (2.030) data 0.000 (0.001) loss 2.0820 (1.5932) lr 1.0000e-05 eta 2 days, 5:42:49
epoch [3/50] batch [760/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.8745 (1.5846) lr 1.0000e-05 eta 2 days, 5:41:55
epoch [3/50] batch [780/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.1903 (1.5900) lr 1.0000e-05 eta 2 days, 5:41:17
epoch [3/50] batch [800/2000] time 2.026 (2.030) data 0.000 (0.001) loss 1.6673 (1.5842) lr 1.0000e-05 eta 2 days, 5:40:37
epoch [3/50] batch [820/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.9346 (1.5928) lr 1.0000e-05 eta 2 days, 5:39:44
epoch [3/50] batch [840/2000] time 2.053 (2.030) data 0.000 (0.001) loss 2.2991 (1.5959) lr 1.0000e-05 eta 2 days, 5:39:00
epoch [3/50] batch [860/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.2843 (1.5941) lr 1.0000e-05 eta 2 days, 5:38:12
epoch [3/50] batch [880/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.5163 (1.5878) lr 1.0000e-05 eta 2 days, 5:37:36
epoch [3/50] batch [900/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.8192 (1.5853) lr 1.0000e-05 eta 2 days, 5:36:37
epoch [3/50] batch [920/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.9564 (1.5815) lr 1.0000e-05 eta 2 days, 5:35:49
epoch [3/50] batch [940/2000] time 2.052 (2.029) data 0.000 (0.001) loss 2.8583 (1.5855) lr 1.0000e-05 eta 2 days, 5:35:06
epoch [3/50] batch [960/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.4043 (1.5865) lr 1.0000e-05 eta 2 days, 5:34:17
epoch [3/50] batch [980/2000] time 2.027 (2.029) data 0.000 (0.001) loss 2.0127 (1.5946) lr 1.0000e-05 eta 2 days, 5:33:43
epoch [3/50] batch [1000/2000] time 1.995 (2.029) data 0.000 (0.001) loss 1.3728 (1.5873) lr 1.0000e-05 eta 2 days, 5:33:01
epoch [3/50] batch [1020/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.0518 (1.5927) lr 1.0000e-05 eta 2 days, 5:32:22
epoch [3/50] batch [1040/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.7338 (1.5895) lr 1.0000e-05 eta 2 days, 5:31:43
epoch [3/50] batch [1060/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.1445 (1.5941) lr 1.0000e-05 eta 2 days, 5:30:57
epoch [3/50] batch [1080/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.4166 (1.5930) lr 1.0000e-05 eta 2 days, 5:30:08
epoch [3/50] batch [1100/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.7646 (1.5939) lr 1.0000e-05 eta 2 days, 5:29:31
epoch [3/50] batch [1120/2000] time 1.997 (2.029) data 0.000 (0.001) loss 2.8426 (1.5948) lr 1.0000e-05 eta 2 days, 5:28:46
epoch [3/50] batch [1140/2000] time 2.028 (2.029) data 0.001 (0.001) loss 2.9890 (1.5943) lr 1.0000e-05 eta 2 days, 5:28:01
epoch [3/50] batch [1160/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.8637 (1.5970) lr 1.0000e-05 eta 2 days, 5:27:21
epoch [3/50] batch [1180/2000] time 2.051 (2.029) data 0.000 (0.001) loss 4.6987 (1.6016) lr 1.0000e-05 eta 2 days, 5:26:39
epoch [3/50] batch [1200/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.7890 (1.5999) lr 1.0000e-05 eta 2 days, 5:25:52
epoch [3/50] batch [1220/2000] time 1.994 (2.029) data 0.000 (0.001) loss 1.6887 (1.5985) lr 1.0000e-05 eta 2 days, 5:25:15
epoch [3/50] batch [1240/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.8544 (1.5997) lr 1.0000e-05 eta 2 days, 5:24:31
epoch [3/50] batch [1260/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.5156 (1.6072) lr 1.0000e-05 eta 2 days, 5:23:50
epoch [3/50] batch [1280/2000] time 1.976 (2.029) data 0.000 (0.001) loss 1.0104 (1.6063) lr 1.0000e-05 eta 2 days, 5:23:08
epoch [3/50] batch [1300/2000] time 1.972 (2.029) data 0.000 (0.001) loss 1.1296 (1.6032) lr 1.0000e-05 eta 2 days, 5:22:12
epoch [3/50] batch [1320/2000] time 1.997 (2.029) data 0.000 (0.001) loss 2.1524 (1.5992) lr 1.0000e-05 eta 2 days, 5:21:26
epoch [3/50] batch [1340/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.6511 (1.5947) lr 1.0000e-05 eta 2 days, 5:20:53
epoch [3/50] batch [1360/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.7589 (1.5983) lr 1.0000e-05 eta 2 days, 5:20:14
epoch [3/50] batch [1380/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.7913 (1.5931) lr 1.0000e-05 eta 2 days, 5:19:21
epoch [3/50] batch [1400/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.0430 (1.5931) lr 1.0000e-05 eta 2 days, 5:18:52
epoch [3/50] batch [1420/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.7636 (1.5870) lr 1.0000e-05 eta 2 days, 5:18:07
epoch [3/50] batch [1440/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.1626 (1.5878) lr 1.0000e-05 eta 2 days, 5:17:17
epoch [3/50] batch [1460/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.1848 (1.5873) lr 1.0000e-05 eta 2 days, 5:16:39
epoch [3/50] batch [1480/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.7347 (1.5938) lr 1.0000e-05 eta 2 days, 5:16:00
epoch [3/50] batch [1500/2000] time 1.997 (2.029) data 0.000 (0.001) loss 2.3118 (1.5936) lr 1.0000e-05 eta 2 days, 5:15:14
epoch [3/50] batch [1520/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.8419 (1.5928) lr 1.0000e-05 eta 2 days, 5:14:41
epoch [3/50] batch [1540/2000] time 2.027 (2.029) data 0.000 (0.001) loss 2.4955 (1.5873) lr 1.0000e-05 eta 2 days, 5:14:01
epoch [3/50] batch [1560/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.9698 (1.5851) lr 1.0000e-05 eta 2 days, 5:13:19
epoch [3/50] batch [1580/2000] time 2.049 (2.029) data 0.000 (0.001) loss 3.1887 (1.5838) lr 1.0000e-05 eta 2 days, 5:12:41
epoch [3/50] batch [1600/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.9673 (1.5826) lr 1.0000e-05 eta 2 days, 5:12:10
epoch [3/50] batch [1620/2000] time 2.005 (2.029) data 0.000 (0.001) loss 1.3475 (1.5832) lr 1.0000e-05 eta 2 days, 5:11:26
epoch [3/50] batch [1640/2000] time 2.005 (2.029) data 0.000 (0.001) loss 2.1497 (1.5888) lr 1.0000e-05 eta 2 days, 5:10:51
epoch [3/50] batch [1660/2000] time 2.031 (2.029) data 0.000 (0.001) loss 2.8671 (1.5883) lr 1.0000e-05 eta 2 days, 5:10:17
epoch [3/50] batch [1680/2000] time 2.049 (2.029) data 0.000 (0.001) loss 2.1188 (1.5862) lr 1.0000e-05 eta 2 days, 5:09:35
epoch [3/50] batch [1700/2000] time 2.052 (2.029) data 0.000 (0.001) loss 2.0025 (1.5857) lr 1.0000e-05 eta 2 days, 5:09:05
epoch [3/50] batch [1720/2000] time 1.975 (2.029) data 0.000 (0.001) loss 1.4084 (1.5812) lr 1.0000e-05 eta 2 days, 5:08:23
epoch [3/50] batch [1740/2000] time 1.980 (2.029) data 0.000 (0.001) loss 2.4828 (1.5770) lr 1.0000e-05 eta 2 days, 5:07:45
epoch [3/50] batch [1760/2000] time 2.030 (2.029) data 0.000 (0.001) loss 1.0202 (1.5725) lr 1.0000e-05 eta 2 days, 5:07:04
epoch [3/50] batch [1780/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.9686 (1.5700) lr 1.0000e-05 eta 2 days, 5:06:29
epoch [3/50] batch [1800/2000] time 1.999 (2.029) data 0.001 (0.001) loss 1.5593 (1.5689) lr 1.0000e-05 eta 2 days, 5:05:48
epoch [3/50] batch [1820/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.5572 (1.5702) lr 1.0000e-05 eta 2 days, 5:05:11
epoch [3/50] batch [1840/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.3166 (1.5715) lr 1.0000e-05 eta 2 days, 5:04:34
epoch [3/50] batch [1860/2000] time 2.052 (2.029) data 0.000 (0.000) loss 1.3102 (1.5704) lr 1.0000e-05 eta 2 days, 5:03:54
epoch [3/50] batch [1880/2000] time 1.996 (2.029) data 0.000 (0.000) loss 2.2743 (1.5722) lr 1.0000e-05 eta 2 days, 5:03:11
epoch [3/50] batch [1900/2000] time 2.050 (2.029) data 0.000 (0.000) loss 1.2649 (1.5732) lr 1.0000e-05 eta 2 days, 5:02:29
epoch [3/50] batch [1920/2000] time 1.997 (2.029) data 0.000 (0.000) loss 1.3366 (1.5684) lr 1.0000e-05 eta 2 days, 5:01:49
epoch [3/50] batch [1940/2000] time 2.050 (2.029) data 0.000 (0.000) loss 1.3060 (1.5674) lr 1.0000e-05 eta 2 days, 5:01:15
epoch [3/50] batch [1960/2000] time 1.976 (2.029) data 0.000 (0.000) loss 3.3754 (1.5689) lr 1.0000e-05 eta 2 days, 5:00:37
epoch [3/50] batch [1980/2000] time 2.030 (2.029) data 0.000 (0.000) loss 1.3221 (1.5690) lr 1.0000e-05 eta 2 days, 4:59:59
epoch [3/50] batch [2000/2000] time 2.053 (2.029) data 0.000 (0.000) loss 1.1605 (1.5636) lr 1.0000e-05 eta 2 days, 4:59:24
epoch [4/50] batch [20/2000] time 2.000 (2.049) data 0.000 (0.027) loss 1.1676 (1.7485) lr 1.0000e-05 eta 2 days, 5:30:09
epoch [4/50] batch [40/2000] time 2.055 (2.039) data 0.000 (0.014) loss 0.4925 (1.6674) lr 1.0000e-05 eta 2 days, 5:13:17
epoch [4/50] batch [60/2000] time 2.053 (2.037) data 0.001 (0.009) loss 0.2681 (1.5953) lr 1.0000e-05 eta 2 days, 5:08:37
epoch [4/50] batch [80/2000] time 2.031 (2.036) data 0.000 (0.007) loss 2.0275 (1.5251) lr 1.0000e-05 eta 2 days, 5:06:35
epoch [4/50] batch [100/2000] time 2.051 (2.034) data 0.000 (0.006) loss 0.9168 (1.5944) lr 1.0000e-05 eta 2 days, 5:02:47
epoch [4/50] batch [120/2000] time 2.049 (2.034) data 0.000 (0.005) loss 0.2406 (1.6299) lr 1.0000e-05 eta 2 days, 5:02:08
epoch [4/50] batch [140/2000] time 1.997 (2.033) data 0.000 (0.004) loss 0.8975 (1.5920) lr 1.0000e-05 eta 2 days, 5:01:03
epoch [4/50] batch [160/2000] time 2.056 (2.033) data 0.000 (0.004) loss 1.0823 (1.5523) lr 1.0000e-05 eta 2 days, 5:00:06
epoch [4/50] batch [180/2000] time 2.031 (2.033) data 0.000 (0.003) loss 0.0452 (1.5486) lr 1.0000e-05 eta 2 days, 4:58:16
epoch [4/50] batch [200/2000] time 2.053 (2.033) data 0.000 (0.003) loss 2.7452 (1.5339) lr 1.0000e-05 eta 2 days, 4:58:21
epoch [4/50] batch [220/2000] time 2.049 (2.032) data 0.000 (0.003) loss 0.4138 (1.5116) lr 1.0000e-05 eta 2 days, 4:56:36
epoch [4/50] batch [240/2000] time 2.050 (2.032) data 0.000 (0.002) loss 2.4180 (1.5250) lr 1.0000e-05 eta 2 days, 4:56:05
epoch [4/50] batch [260/2000] time 2.000 (2.033) data 0.000 (0.002) loss 0.7242 (1.5267) lr 1.0000e-05 eta 2 days, 4:55:58
epoch [4/50] batch [280/2000] time 2.027 (2.033) data 0.000 (0.002) loss 0.3727 (1.5513) lr 1.0000e-05 eta 2 days, 4:55:02
epoch [4/50] batch [300/2000] time 2.054 (2.033) data 0.000 (0.002) loss 0.2380 (1.5426) lr 1.0000e-05 eta 2 days, 4:54:57
epoch [4/50] batch [320/2000] time 1.999 (2.033) data 0.000 (0.002) loss 0.1824 (1.5436) lr 1.0000e-05 eta 2 days, 4:53:50
epoch [4/50] batch [340/2000] time 1.997 (2.032) data 0.000 (0.002) loss 0.7323 (1.5254) lr 1.0000e-05 eta 2 days, 4:52:07
epoch [4/50] batch [360/2000] time 2.049 (2.032) data 0.000 (0.002) loss 2.9592 (1.5377) lr 1.0000e-05 eta 2 days, 4:51:32
epoch [4/50] batch [380/2000] time 2.054 (2.032) data 0.000 (0.002) loss 0.6991 (1.5230) lr 1.0000e-05 eta 2 days, 4:50:04
epoch [4/50] batch [400/2000] time 2.051 (2.032) data 0.000 (0.002) loss 0.2987 (1.5212) lr 1.0000e-05 eta 2 days, 4:49:23
epoch [4/50] batch [420/2000] time 2.055 (2.032) data 0.000 (0.001) loss 1.2564 (1.5152) lr 1.0000e-05 eta 2 days, 4:48:46
epoch [4/50] batch [440/2000] time 2.053 (2.031) data 0.000 (0.001) loss 2.4903 (1.5199) lr 1.0000e-05 eta 2 days, 4:47:39
epoch [4/50] batch [460/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.4033 (1.5417) lr 1.0000e-05 eta 2 days, 4:46:48
epoch [4/50] batch [480/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.3122 (1.5460) lr 1.0000e-05 eta 2 days, 4:46:13
epoch [4/50] batch [500/2000] time 2.030 (2.031) data 0.000 (0.001) loss 0.3489 (1.5414) lr 1.0000e-05 eta 2 days, 4:45:31
epoch [4/50] batch [520/2000] time 1.979 (2.031) data 0.000 (0.001) loss 0.8077 (1.5366) lr 1.0000e-05 eta 2 days, 4:44:52
epoch [4/50] batch [540/2000] time 2.055 (2.031) data 0.000 (0.001) loss 2.5119 (1.5445) lr 1.0000e-05 eta 2 days, 4:44:03
epoch [4/50] batch [560/2000] time 1.973 (2.031) data 0.000 (0.001) loss 0.5591 (1.5475) lr 1.0000e-05 eta 2 days, 4:43:20
epoch [4/50] batch [580/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.0026 (1.5401) lr 1.0000e-05 eta 2 days, 4:42:44
epoch [4/50] batch [600/2000] time 2.054 (2.031) data 0.001 (0.001) loss 1.0544 (1.5413) lr 1.0000e-05 eta 2 days, 4:41:28
epoch [4/50] batch [620/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.2906 (1.5364) lr 1.0000e-05 eta 2 days, 4:40:58
epoch [4/50] batch [640/2000] time 2.002 (2.031) data 0.000 (0.001) loss 1.1658 (1.5371) lr 1.0000e-05 eta 2 days, 4:40:18
epoch [4/50] batch [660/2000] time 1.998 (2.031) data 0.000 (0.001) loss 1.3862 (1.5338) lr 1.0000e-05 eta 2 days, 4:39:23
epoch [4/50] batch [680/2000] time 2.028 (2.031) data 0.000 (0.001) loss 1.5188 (1.5289) lr 1.0000e-05 eta 2 days, 4:38:34
epoch [4/50] batch [700/2000] time 2.027 (2.031) data 0.000 (0.001) loss 2.2958 (1.5181) lr 1.0000e-05 eta 2 days, 4:37:40
epoch [4/50] batch [720/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.4685 (1.5115) lr 1.0000e-05 eta 2 days, 4:37:08
epoch [4/50] batch [740/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.3450 (1.5118) lr 1.0000e-05 eta 2 days, 4:36:38
epoch [4/50] batch [760/2000] time 2.027 (2.031) data 0.000 (0.001) loss 2.0937 (1.5038) lr 1.0000e-05 eta 2 days, 4:35:55
epoch [4/50] batch [780/2000] time 2.049 (2.031) data 0.000 (0.001) loss 2.1044 (1.5085) lr 1.0000e-05 eta 2 days, 4:34:57
epoch [4/50] batch [800/2000] time 2.000 (2.031) data 0.000 (0.001) loss 3.3161 (1.5208) lr 1.0000e-05 eta 2 days, 4:34:18
epoch [4/50] batch [820/2000] time 2.031 (2.031) data 0.000 (0.001) loss 1.3921 (1.5178) lr 1.0000e-05 eta 2 days, 4:33:36
epoch [4/50] batch [840/2000] time 2.000 (2.031) data 0.000 (0.001) loss 2.7534 (1.5205) lr 1.0000e-05 eta 2 days, 4:32:53
epoch [4/50] batch [860/2000] time 2.054 (2.031) data 0.000 (0.001) loss 2.1787 (1.5169) lr 1.0000e-05 eta 2 days, 4:32:15
epoch [4/50] batch [880/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.2781 (1.5159) lr 1.0000e-05 eta 2 days, 4:31:44
epoch [4/50] batch [900/2000] time 2.052 (2.031) data 0.000 (0.001) loss 2.8203 (1.5244) lr 1.0000e-05 eta 2 days, 4:31:10
epoch [4/50] batch [920/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.4785 (1.5253) lr 1.0000e-05 eta 2 days, 4:30:34
epoch [4/50] batch [940/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.8402 (1.5179) lr 1.0000e-05 eta 2 days, 4:29:56
epoch [4/50] batch [960/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.6594 (1.5127) lr 1.0000e-05 eta 2 days, 4:29:12
epoch [4/50] batch [980/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.4814 (1.5085) lr 1.0000e-05 eta 2 days, 4:28:24
epoch [4/50] batch [1000/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.8678 (1.5029) lr 1.0000e-05 eta 2 days, 4:27:39
epoch [4/50] batch [1020/2000] time 2.026 (2.031) data 0.000 (0.001) loss 0.9512 (1.4986) lr 1.0000e-05 eta 2 days, 4:27:03
epoch [4/50] batch [1040/2000] time 2.053 (2.031) data 0.000 (0.001) loss 2.2730 (1.4998) lr 1.0000e-05 eta 2 days, 4:26:22
epoch [4/50] batch [1060/2000] time 2.007 (2.031) data 0.000 (0.001) loss 2.5563 (1.4999) lr 1.0000e-05 eta 2 days, 4:25:42
epoch [4/50] batch [1080/2000] time 2.059 (2.031) data 0.000 (0.001) loss 1.5708 (1.5028) lr 1.0000e-05 eta 2 days, 4:25:16
epoch [4/50] batch [1100/2000] time 2.056 (2.031) data 0.000 (0.001) loss 0.4757 (1.5010) lr 1.0000e-05 eta 2 days, 4:24:37
epoch [4/50] batch [1120/2000] time 2.034 (2.031) data 0.000 (0.001) loss 1.1114 (1.5019) lr 1.0000e-05 eta 2 days, 4:24:03
epoch [4/50] batch [1140/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.4333 (1.5022) lr 1.0000e-05 eta 2 days, 4:23:14
epoch [4/50] batch [1160/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.7508 (1.5024) lr 1.0000e-05 eta 2 days, 4:22:35
epoch [4/50] batch [1180/2000] time 2.026 (2.031) data 0.000 (0.001) loss 1.7567 (1.5090) lr 1.0000e-05 eta 2 days, 4:21:50
epoch [4/50] batch [1200/2000] time 2.050 (2.031) data 0.000 (0.001) loss 1.6976 (1.5037) lr 1.0000e-05 eta 2 days, 4:21:10
epoch [4/50] batch [1220/2000] time 1.996 (2.031) data 0.000 (0.001) loss 0.2888 (1.5029) lr 1.0000e-05 eta 2 days, 4:20:32
epoch [4/50] batch [1240/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.6090 (1.5018) lr 1.0000e-05 eta 2 days, 4:19:45
epoch [4/50] batch [1260/2000] time 2.049 (2.031) data 0.000 (0.001) loss 0.9121 (1.5021) lr 1.0000e-05 eta 2 days, 4:19:02
epoch [4/50] batch [1280/2000] time 1.996 (2.031) data 0.000 (0.001) loss 1.0924 (1.5045) lr 1.0000e-05 eta 2 days, 4:18:19
epoch [4/50] batch [1300/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.2811 (1.5042) lr 1.0000e-05 eta 2 days, 4:17:40
epoch [4/50] batch [1320/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.2585 (1.5089) lr 1.0000e-05 eta 2 days, 4:16:49
epoch [4/50] batch [1340/2000] time 1.998 (2.031) data 0.000 (0.001) loss 1.1309 (1.5193) lr 1.0000e-05 eta 2 days, 4:15:56
epoch [4/50] batch [1360/2000] time 1.996 (2.031) data 0.000 (0.001) loss 2.0644 (1.5213) lr 1.0000e-05 eta 2 days, 4:15:10
epoch [4/50] batch [1380/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.2346 (1.5217) lr 1.0000e-05 eta 2 days, 4:14:22
epoch [4/50] batch [1400/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.3979 (1.5178) lr 1.0000e-05 eta 2 days, 4:13:35
epoch [4/50] batch [1420/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.0063 (1.5204) lr 1.0000e-05 eta 2 days, 4:12:54
epoch [4/50] batch [1440/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.5484 (1.5168) lr 1.0000e-05 eta 2 days, 4:12:07
epoch [4/50] batch [1460/2000] time 2.052 (2.030) data 0.000 (0.001) loss 2.5245 (1.5208) lr 1.0000e-05 eta 2 days, 4:11:29
epoch [4/50] batch [1480/2000] time 2.056 (2.030) data 0.000 (0.001) loss 0.8690 (1.5170) lr 1.0000e-05 eta 2 days, 4:10:47
epoch [4/50] batch [1500/2000] time 1.978 (2.030) data 0.000 (0.001) loss 1.6904 (1.5168) lr 1.0000e-05 eta 2 days, 4:10:11
epoch [4/50] batch [1520/2000] time 2.005 (2.030) data 0.000 (0.001) loss 2.3197 (1.5135) lr 1.0000e-05 eta 2 days, 4:09:29
epoch [4/50] batch [1540/2000] time 1.973 (2.030) data 0.000 (0.001) loss 1.6165 (1.5115) lr 1.0000e-05 eta 2 days, 4:08:48
epoch [4/50] batch [1560/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.0063 (1.5084) lr 1.0000e-05 eta 2 days, 4:08:03
epoch [4/50] batch [1580/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.6686 (1.5045) lr 1.0000e-05 eta 2 days, 4:07:17
epoch [4/50] batch [1600/2000] time 2.002 (2.030) data 0.000 (0.001) loss 1.1533 (1.5048) lr 1.0000e-05 eta 2 days, 4:06:41
epoch [4/50] batch [1620/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.1924 (1.5047) lr 1.0000e-05 eta 2 days, 4:05:57
epoch [4/50] batch [1640/2000] time 2.050 (2.030) data 0.000 (0.001) loss 4.2669 (1.5082) lr 1.0000e-05 eta 2 days, 4:05:18
epoch [4/50] batch [1660/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.3557 (1.5069) lr 1.0000e-05 eta 2 days, 4:04:40
epoch [4/50] batch [1680/2000] time 2.050 (2.030) data 0.001 (0.001) loss 1.2904 (1.5085) lr 1.0000e-05 eta 2 days, 4:04:01
epoch [4/50] batch [1700/2000] time 2.027 (2.030) data 0.000 (0.001) loss 2.9522 (1.5087) lr 1.0000e-05 eta 2 days, 4:03:23
epoch [4/50] batch [1720/2000] time 2.048 (2.030) data 0.000 (0.001) loss 0.7850 (1.5100) lr 1.0000e-05 eta 2 days, 4:02:42
epoch [4/50] batch [1740/2000] time 1.970 (2.030) data 0.000 (0.001) loss 0.5421 (1.5108) lr 1.0000e-05 eta 2 days, 4:01:55
epoch [4/50] batch [1760/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.4370 (1.5164) lr 1.0000e-05 eta 2 days, 4:01:01
epoch [4/50] batch [1780/2000] time 1.974 (2.030) data 0.000 (0.001) loss 0.5615 (1.5166) lr 1.0000e-05 eta 2 days, 4:00:18
epoch [4/50] batch [1800/2000] time 2.001 (2.030) data 0.000 (0.001) loss 2.6523 (1.5190) lr 1.0000e-05 eta 2 days, 3:59:38
epoch [4/50] batch [1820/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.0494 (1.5157) lr 1.0000e-05 eta 2 days, 3:58:58
epoch [4/50] batch [1840/2000] time 2.051 (2.030) data 0.000 (0.000) loss 2.1788 (1.5141) lr 1.0000e-05 eta 2 days, 3:58:16
epoch [4/50] batch [1860/2000] time 2.050 (2.030) data 0.000 (0.000) loss 3.3554 (1.5193) lr 1.0000e-05 eta 2 days, 3:57:32
epoch [4/50] batch [1880/2000] time 2.053 (2.030) data 0.000 (0.000) loss 1.1449 (1.5184) lr 1.0000e-05 eta 2 days, 3:56:47
epoch [4/50] batch [1900/2000] time 2.025 (2.030) data 0.000 (0.000) loss 2.9948 (1.5236) lr 1.0000e-05 eta 2 days, 3:56:00
epoch [4/50] batch [1920/2000] time 1.996 (2.030) data 0.000 (0.000) loss 2.1204 (1.5212) lr 1.0000e-05 eta 2 days, 3:55:15
epoch [4/50] batch [1940/2000] time 2.053 (2.030) data 0.000 (0.000) loss 2.4780 (1.5233) lr 1.0000e-05 eta 2 days, 3:54:38
epoch [4/50] batch [1960/2000] time 2.053 (2.030) data 0.000 (0.000) loss 2.1758 (1.5218) lr 1.0000e-05 eta 2 days, 3:54:00
epoch [4/50] batch [1980/2000] time 2.051 (2.030) data 0.000 (0.000) loss 0.3541 (1.5207) lr 1.0000e-05 eta 2 days, 3:53:18
epoch [4/50] batch [2000/2000] time 2.051 (2.030) data 0.000 (0.000) loss 0.9898 (1.5210) lr 1.0000e-05 eta 2 days, 3:52:40
epoch [5/50] batch [20/2000] time 2.053 (2.054) data 0.000 (0.027) loss 1.9703 (1.4008) lr 1.0000e-05 eta 2 days, 4:29:31
epoch [5/50] batch [40/2000] time 2.053 (2.046) data 0.000 (0.014) loss 0.7686 (1.2612) lr 1.0000e-05 eta 2 days, 4:15:20
epoch [5/50] batch [60/2000] time 2.029 (2.040) data 0.001 (0.009) loss 1.4983 (1.3526) lr 1.0000e-05 eta 2 days, 4:06:08
epoch [5/50] batch [80/2000] time 1.980 (2.036) data 0.000 (0.007) loss 0.2434 (1.3877) lr 1.0000e-05 eta 2 days, 3:59:26
epoch [5/50] batch [100/2000] time 1.999 (2.034) data 0.000 (0.006) loss 1.2460 (1.4254) lr 1.0000e-05 eta 2 days, 3:55:54
epoch [5/50] batch [120/2000] time 2.052 (2.033) data 0.000 (0.005) loss 0.3692 (1.4037) lr 1.0000e-05 eta 2 days, 3:53:41
epoch [5/50] batch [140/2000] time 1.995 (2.032) data 0.000 (0.004) loss 1.5075 (1.4434) lr 1.0000e-05 eta 2 days, 3:50:26
epoch [5/50] batch [160/2000] time 2.026 (2.031) data 0.000 (0.004) loss 1.3557 (1.4229) lr 1.0000e-05 eta 2 days, 3:48:41
epoch [5/50] batch [180/2000] time 1.996 (2.031) data 0.000 (0.003) loss 0.7326 (1.4297) lr 1.0000e-05 eta 2 days, 3:47:27
epoch [5/50] batch [200/2000] time 2.051 (2.030) data 0.000 (0.003) loss 0.2763 (1.4027) lr 1.0000e-05 eta 2 days, 3:46:35
epoch [5/50] batch [220/2000] time 2.028 (2.029) data 0.000 (0.003) loss 1.6625 (1.4126) lr 1.0000e-05 eta 2 days, 3:43:58
epoch [5/50] batch [240/2000] time 2.049 (2.030) data 0.000 (0.002) loss 0.5799 (1.4005) lr 1.0000e-05 eta 2 days, 3:44:01
epoch [5/50] batch [260/2000] time 2.025 (2.029) data 0.000 (0.002) loss 2.6335 (1.4152) lr 1.0000e-05 eta 2 days, 3:42:47
epoch [5/50] batch [280/2000] time 1.998 (2.029) data 0.000 (0.002) loss 1.6727 (1.4363) lr 1.0000e-05 eta 2 days, 3:41:08
epoch [5/50] batch [300/2000] time 2.030 (2.029) data 0.000 (0.002) loss 0.8502 (1.4329) lr 1.0000e-05 eta 2 days, 3:40:35
epoch [5/50] batch [320/2000] time 2.028 (2.028) data 0.000 (0.002) loss 0.5193 (1.4153) lr 1.0000e-05 eta 2 days, 3:38:57
epoch [5/50] batch [340/2000] time 2.028 (2.028) data 0.000 (0.002) loss 2.1634 (1.4347) lr 1.0000e-05 eta 2 days, 3:37:47
epoch [5/50] batch [360/2000] time 1.998 (2.028) data 0.000 (0.002) loss 1.1213 (1.4304) lr 1.0000e-05 eta 2 days, 3:37:02
epoch [5/50] batch [380/2000] time 1.999 (2.028) data 0.000 (0.002) loss 4.0140 (1.4569) lr 1.0000e-05 eta 2 days, 3:36:21
epoch [5/50] batch [400/2000] time 2.050 (2.028) data 0.000 (0.002) loss 2.9316 (1.4607) lr 1.0000e-05 eta 2 days, 3:35:22
epoch [5/50] batch [420/2000] time 2.051 (2.027) data 0.000 (0.001) loss 0.9859 (1.4626) lr 1.0000e-05 eta 2 days, 3:34:28
epoch [5/50] batch [440/2000] time 1.996 (2.027) data 0.000 (0.001) loss 0.6532 (1.4780) lr 1.0000e-05 eta 2 days, 3:33:53
epoch [5/50] batch [460/2000] time 1.996 (2.027) data 0.000 (0.001) loss 2.1605 (1.4791) lr 1.0000e-05 eta 2 days, 3:33:10
epoch [5/50] batch [480/2000] time 2.049 (2.027) data 0.000 (0.001) loss 1.4177 (1.4748) lr 1.0000e-05 eta 2 days, 3:32:27
epoch [5/50] batch [500/2000] time 2.029 (2.027) data 0.000 (0.001) loss 1.8035 (1.4798) lr 1.0000e-05 eta 2 days, 3:31:40
epoch [5/50] batch [520/2000] time 2.049 (2.028) data 0.000 (0.001) loss 2.1178 (1.4703) lr 1.0000e-05 eta 2 days, 3:31:15
epoch [5/50] batch [540/2000] time 2.052 (2.027) data 0.000 (0.001) loss 0.7264 (1.4692) lr 1.0000e-05 eta 2 days, 3:30:29
epoch [5/50] batch [560/2000] time 2.030 (2.028) data 0.000 (0.001) loss 1.5060 (1.4613) lr 1.0000e-05 eta 2 days, 3:30:00
epoch [5/50] batch [580/2000] time 2.026 (2.028) data 0.000 (0.001) loss 3.0784 (1.4678) lr 1.0000e-05 eta 2 days, 3:29:35
epoch [5/50] batch [600/2000] time 2.048 (2.028) data 0.001 (0.001) loss 0.7602 (1.4636) lr 1.0000e-05 eta 2 days, 3:28:44
epoch [5/50] batch [620/2000] time 2.053 (2.028) data 0.000 (0.001) loss 0.8527 (1.4699) lr 1.0000e-05 eta 2 days, 3:28:20
epoch [5/50] batch [640/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.6808 (1.4764) lr 1.0000e-05 eta 2 days, 3:27:33
epoch [5/50] batch [660/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.1931 (1.4667) lr 1.0000e-05 eta 2 days, 3:26:47
epoch [5/50] batch [680/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.7196 (1.4683) lr 1.0000e-05 eta 2 days, 3:26:09
epoch [5/50] batch [700/2000] time 2.052 (2.028) data 0.000 (0.001) loss 1.1733 (1.4851) lr 1.0000e-05 eta 2 days, 3:25:35
epoch [5/50] batch [720/2000] time 1.970 (2.028) data 0.000 (0.001) loss 1.5655 (1.4717) lr 1.0000e-05 eta 2 days, 3:24:50
epoch [5/50] batch [740/2000] time 2.023 (2.028) data 0.000 (0.001) loss 3.1655 (1.4794) lr 1.0000e-05 eta 2 days, 3:23:58
epoch [5/50] batch [760/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.6649 (1.4850) lr 1.0000e-05 eta 2 days, 3:23:28
epoch [5/50] batch [780/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.5469 (1.4868) lr 1.0000e-05 eta 2 days, 3:22:35
epoch [5/50] batch [800/2000] time 2.052 (2.028) data 0.000 (0.001) loss 1.4947 (1.4891) lr 1.0000e-05 eta 2 days, 3:21:55
epoch [5/50] batch [820/2000] time 1.999 (2.027) data 0.000 (0.001) loss 1.1506 (1.4813) lr 1.0000e-05 eta 2 days, 3:21:02
epoch [5/50] batch [840/2000] time 2.054 (2.027) data 0.000 (0.001) loss 0.3832 (1.4713) lr 1.0000e-05 eta 2 days, 3:20:26
epoch [5/50] batch [860/2000] time 1.996 (2.027) data 0.000 (0.001) loss 0.5500 (1.4766) lr 1.0000e-05 eta 2 days, 3:19:39
epoch [5/50] batch [880/2000] time 1.970 (2.027) data 0.000 (0.001) loss 0.2039 (1.4738) lr 1.0000e-05 eta 2 days, 3:18:39
epoch [5/50] batch [900/2000] time 2.026 (2.027) data 0.000 (0.001) loss 2.7107 (1.4750) lr 1.0000e-05 eta 2 days, 3:17:56
epoch [5/50] batch [920/2000] time 1.999 (2.027) data 0.000 (0.001) loss 0.0688 (1.4744) lr 1.0000e-05 eta 2 days, 3:17:15
epoch [5/50] batch [940/2000] time 2.052 (2.027) data 0.000 (0.001) loss 0.4354 (1.4759) lr 1.0000e-05 eta 2 days, 3:16:47
epoch [5/50] batch [960/2000] time 2.049 (2.027) data 0.000 (0.001) loss 2.7260 (1.4724) lr 1.0000e-05 eta 2 days, 3:16:06
epoch [5/50] batch [980/2000] time 1.996 (2.027) data 0.000 (0.001) loss 2.0343 (1.4773) lr 1.0000e-05 eta 2 days, 3:15:31
epoch [5/50] batch [1000/2000] time 2.051 (2.027) data 0.000 (0.001) loss 1.4511 (1.4735) lr 1.0000e-05 eta 2 days, 3:14:53
epoch [5/50] batch [1020/2000] time 2.048 (2.027) data 0.000 (0.001) loss 2.7121 (1.4813) lr 1.0000e-05 eta 2 days, 3:14:15
epoch [5/50] batch [1040/2000] time 2.055 (2.027) data 0.000 (0.001) loss 1.7187 (1.4871) lr 1.0000e-05 eta 2 days, 3:13:30
epoch [5/50] batch [1060/2000] time 1.997 (2.027) data 0.000 (0.001) loss 2.8993 (1.4870) lr 1.0000e-05 eta 2 days, 3:12:41
epoch [5/50] batch [1080/2000] time 2.049 (2.027) data 0.000 (0.001) loss 0.8299 (1.4876) lr 1.0000e-05 eta 2 days, 3:12:06
epoch [5/50] batch [1100/2000] time 2.029 (2.027) data 0.000 (0.001) loss 1.0452 (1.4864) lr 1.0000e-05 eta 2 days, 3:11:32
epoch [5/50] batch [1120/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.2941 (1.4786) lr 1.0000e-05 eta 2 days, 3:11:06
epoch [5/50] batch [1140/2000] time 2.052 (2.028) data 0.001 (0.001) loss 0.5484 (1.4790) lr 1.0000e-05 eta 2 days, 3:10:22
epoch [5/50] batch [1160/2000] time 2.002 (2.028) data 0.000 (0.001) loss 0.3003 (1.4737) lr 1.0000e-05 eta 2 days, 3:09:52
epoch [5/50] batch [1180/2000] time 2.033 (2.028) data 0.000 (0.001) loss 2.4578 (1.4751) lr 1.0000e-05 eta 2 days, 3:09:17
epoch [5/50] batch [1200/2000] time 2.056 (2.028) data 0.000 (0.001) loss 1.5985 (1.4674) lr 1.0000e-05 eta 2 days, 3:08:48
epoch [5/50] batch [1220/2000] time 2.051 (2.028) data 0.000 (0.001) loss 2.2060 (1.4698) lr 1.0000e-05 eta 2 days, 3:07:57
epoch [5/50] batch [1240/2000] time 2.028 (2.028) data 0.000 (0.001) loss 2.6972 (1.4689) lr 1.0000e-05 eta 2 days, 3:07:24
epoch [5/50] batch [1260/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.9095 (1.4645) lr 1.0000e-05 eta 2 days, 3:06:41
epoch [5/50] batch [1280/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.7025 (1.4622) lr 1.0000e-05 eta 2 days, 3:06:00
epoch [5/50] batch [1300/2000] time 1.996 (2.028) data 0.000 (0.001) loss 2.1390 (1.4613) lr 1.0000e-05 eta 2 days, 3:05:22
epoch [5/50] batch [1320/2000] time 2.049 (2.028) data 0.000 (0.001) loss 2.7016 (1.4656) lr 1.0000e-05 eta 2 days, 3:04:45
epoch [5/50] batch [1340/2000] time 1.972 (2.028) data 0.000 (0.001) loss 1.5205 (1.4695) lr 1.0000e-05 eta 2 days, 3:03:59
epoch [5/50] batch [1360/2000] time 2.055 (2.028) data 0.000 (0.001) loss 2.1135 (1.4684) lr 1.0000e-05 eta 2 days, 3:03:23
epoch [5/50] batch [1380/2000] time 2.028 (2.028) data 0.000 (0.001) loss 2.2591 (1.4754) lr 1.0000e-05 eta 2 days, 3:02:47
epoch [5/50] batch [1400/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.6926 (1.4734) lr 1.0000e-05 eta 2 days, 3:02:07
epoch [5/50] batch [1420/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.7297 (1.4729) lr 1.0000e-05 eta 2 days, 3:01:21
epoch [5/50] batch [1440/2000] time 2.051 (2.028) data 0.000 (0.001) loss 3.7417 (1.4756) lr 1.0000e-05 eta 2 days, 3:00:37
epoch [5/50] batch [1460/2000] time 2.053 (2.028) data 0.000 (0.001) loss 0.7507 (1.4760) lr 1.0000e-05 eta 2 days, 3:00:01
epoch [5/50] batch [1480/2000] time 2.053 (2.028) data 0.000 (0.001) loss 0.2310 (1.4740) lr 1.0000e-05 eta 2 days, 2:59:25
epoch [5/50] batch [1500/2000] time 2.030 (2.028) data 0.000 (0.001) loss 2.1120 (1.4739) lr 1.0000e-05 eta 2 days, 2:58:48
epoch [5/50] batch [1520/2000] time 2.027 (2.028) data 0.000 (0.001) loss 2.2832 (1.4732) lr 1.0000e-05 eta 2 days, 2:58:02
epoch [5/50] batch [1540/2000] time 2.053 (2.028) data 0.000 (0.001) loss 3.4566 (1.4710) lr 1.0000e-05 eta 2 days, 2:57:16
epoch [5/50] batch [1560/2000] time 1.994 (2.028) data 0.000 (0.001) loss 1.6055 (1.4716) lr 1.0000e-05 eta 2 days, 2:56:45
epoch [5/50] batch [1580/2000] time 1.997 (2.028) data 0.000 (0.001) loss 1.5713 (1.4721) lr 1.0000e-05 eta 2 days, 2:56:05
epoch [5/50] batch [1600/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.1399 (1.4713) lr 1.0000e-05 eta 2 days, 2:55:22
epoch [5/50] batch [1620/2000] time 2.026 (2.028) data 0.000 (0.001) loss 2.6638 (1.4716) lr 1.0000e-05 eta 2 days, 2:54:27
epoch [5/50] batch [1640/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.3481 (1.4668) lr 1.0000e-05 eta 2 days, 2:53:45
epoch [5/50] batch [1660/2000] time 2.055 (2.028) data 0.000 (0.001) loss 0.7068 (1.4658) lr 1.0000e-05 eta 2 days, 2:53:04
epoch [5/50] batch [1680/2000] time 2.000 (2.028) data 0.001 (0.001) loss 1.6975 (1.4692) lr 1.0000e-05 eta 2 days, 2:52:25
epoch [5/50] batch [1700/2000] time 1.999 (2.028) data 0.000 (0.001) loss 1.6107 (1.4679) lr 1.0000e-05 eta 2 days, 2:51:52
epoch [5/50] batch [1720/2000] time 2.029 (2.028) data 0.000 (0.001) loss 0.0990 (1.4693) lr 1.0000e-05 eta 2 days, 2:51:28
epoch [5/50] batch [1740/2000] time 2.056 (2.028) data 0.000 (0.001) loss 1.7925 (1.4726) lr 1.0000e-05 eta 2 days, 2:50:47
epoch [5/50] batch [1760/2000] time 1.997 (2.028) data 0.000 (0.001) loss 2.8892 (1.4736) lr 1.0000e-05 eta 2 days, 2:50:08
epoch [5/50] batch [1780/2000] time 2.029 (2.028) data 0.000 (0.001) loss 1.5412 (1.4735) lr 1.0000e-05 eta 2 days, 2:49:24
epoch [5/50] batch [1800/2000] time 1.973 (2.028) data 0.000 (0.001) loss 0.9835 (1.4813) lr 1.0000e-05 eta 2 days, 2:48:46
epoch [5/50] batch [1820/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.3077 (1.4757) lr 1.0000e-05 eta 2 days, 2:48:09
epoch [5/50] batch [1840/2000] time 2.050 (2.028) data 0.000 (0.000) loss 0.4389 (1.4746) lr 1.0000e-05 eta 2 days, 2:47:28
epoch [5/50] batch [1860/2000] time 2.050 (2.028) data 0.000 (0.000) loss 2.5587 (1.4756) lr 1.0000e-05 eta 2 days, 2:46:48
epoch [5/50] batch [1880/2000] time 2.055 (2.028) data 0.000 (0.000) loss 1.7460 (1.4780) lr 1.0000e-05 eta 2 days, 2:46:08
epoch [5/50] batch [1900/2000] time 2.058 (2.028) data 0.000 (0.000) loss 0.8008 (1.4742) lr 1.0000e-05 eta 2 days, 2:45:30
epoch [5/50] batch [1920/2000] time 2.035 (2.028) data 0.000 (0.000) loss 0.0915 (1.4747) lr 1.0000e-05 eta 2 days, 2:45:00
epoch [5/50] batch [1940/2000] time 2.001 (2.028) data 0.000 (0.000) loss 1.7366 (1.4743) lr 1.0000e-05 eta 2 days, 2:44:27
epoch [5/50] batch [1960/2000] time 2.030 (2.028) data 0.000 (0.000) loss 0.2128 (1.4738) lr 1.0000e-05 eta 2 days, 2:43:50
epoch [5/50] batch [1980/2000] time 2.052 (2.028) data 0.000 (0.000) loss 0.0714 (1.4743) lr 1.0000e-05 eta 2 days, 2:43:11
epoch [5/50] batch [2000/2000] time 2.052 (2.028) data 0.000 (0.000) loss 1.4397 (1.4753) lr 2.0000e-03 eta 2 days, 2:42:28
epoch [6/50] batch [20/2000] time 2.051 (2.060) data 0.000 (0.027) loss 2.2938 (1.8374) lr 2.0000e-03 eta 2 days, 3:28:43
epoch [6/50] batch [40/2000] time 2.029 (2.044) data 0.000 (0.014) loss 1.2792 (1.8861) lr 2.0000e-03 eta 2 days, 3:04:30
epoch [6/50] batch [60/2000] time 2.027 (2.038) data 0.001 (0.009) loss 0.4161 (1.9767) lr 2.0000e-03 eta 2 days, 2:54:42
epoch [6/50] batch [80/2000] time 2.033 (2.035) data 0.000 (0.007) loss 0.2864 (1.8908) lr 2.0000e-03 eta 2 days, 2:50:25
epoch [6/50] batch [100/2000] time 2.002 (2.034) data 0.000 (0.006) loss 1.0411 (1.9298) lr 2.0000e-03 eta 2 days, 2:48:10
epoch [6/50] batch [120/2000] time 2.053 (2.035) data 0.000 (0.005) loss 2.0964 (1.8850) lr 2.0000e-03 eta 2 days, 2:48:26
epoch [6/50] batch [140/2000] time 2.001 (2.035) data 0.000 (0.004) loss 0.4500 (1.8178) lr 2.0000e-03 eta 2 days, 2:47:15
epoch [6/50] batch [160/2000] time 2.033 (2.034) data 0.000 (0.004) loss 3.5166 (1.7737) lr 2.0000e-03 eta 2 days, 2:45:33
epoch [6/50] batch [180/2000] time 2.000 (2.033) data 0.000 (0.003) loss 3.7715 (1.7788) lr 2.0000e-03 eta 2 days, 2:44:04
epoch [6/50] batch [200/2000] time 2.026 (2.033) data 0.000 (0.003) loss 2.2549 (1.7759) lr 2.0000e-03 eta 2 days, 2:43:08
epoch [6/50] batch [220/2000] time 2.049 (2.033) data 0.000 (0.003) loss 0.2465 (1.7261) lr 2.0000e-03 eta 2 days, 2:42:45
epoch [6/50] batch [240/2000] time 2.049 (2.033) data 0.000 (0.002) loss 1.7556 (1.7132) lr 2.0000e-03 eta 2 days, 2:40:51
epoch [6/50] batch [260/2000] time 2.030 (2.032) data 0.000 (0.002) loss 1.5477 (1.6620) lr 2.0000e-03 eta 2 days, 2:39:52
epoch [6/50] batch [280/2000] time 2.054 (2.033) data 0.000 (0.002) loss 0.6433 (1.6350) lr 2.0000e-03 eta 2 days, 2:39:34
epoch [6/50] batch [300/2000] time 2.055 (2.033) data 0.000 (0.002) loss 1.1074 (1.6277) lr 2.0000e-03 eta 2 days, 2:38:49
epoch [6/50] batch [320/2000] time 2.050 (2.032) data 0.000 (0.002) loss 1.4579 (1.6220) lr 2.0000e-03 eta 2 days, 2:37:52
epoch [6/50] batch [340/2000] time 2.053 (2.032) data 0.000 (0.002) loss 2.6882 (1.6152) lr 2.0000e-03 eta 2 days, 2:36:16
epoch [6/50] batch [360/2000] time 2.048 (2.032) data 0.000 (0.002) loss 0.3028 (1.5863) lr 2.0000e-03 eta 2 days, 2:35:24
epoch [6/50] batch [380/2000] time 1.997 (2.032) data 0.000 (0.002) loss 0.5778 (1.5677) lr 2.0000e-03 eta 2 days, 2:34:55
epoch [6/50] batch [400/2000] time 2.000 (2.031) data 0.000 (0.002) loss 0.8435 (1.5752) lr 2.0000e-03 eta 2 days, 2:33:21
epoch [6/50] batch [420/2000] time 1.997 (2.031) data 0.000 (0.002) loss 4.1848 (1.5740) lr 2.0000e-03 eta 2 days, 2:32:50
epoch [6/50] batch [440/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.7184 (1.5643) lr 2.0000e-03 eta 2 days, 2:31:43
epoch [6/50] batch [460/2000] time 2.001 (2.031) data 0.000 (0.001) loss 0.3560 (1.5490) lr 2.0000e-03 eta 2 days, 2:31:06
epoch [6/50] batch [480/2000] time 2.002 (2.031) data 0.000 (0.001) loss 0.8295 (1.5297) lr 2.0000e-03 eta 2 days, 2:30:17
epoch [6/50] batch [500/2000] time 2.054 (2.031) data 0.000 (0.001) loss 0.2308 (1.5099) lr 2.0000e-03 eta 2 days, 2:29:47
epoch [6/50] batch [520/2000] time 2.050 (2.031) data 0.000 (0.001) loss 3.2578 (1.5094) lr 2.0000e-03 eta 2 days, 2:29:09
epoch [6/50] batch [540/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.3132 (1.4899) lr 2.0000e-03 eta 2 days, 2:28:36
epoch [6/50] batch [560/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.3138 (1.4887) lr 2.0000e-03 eta 2 days, 2:27:39
epoch [6/50] batch [580/2000] time 1.974 (2.031) data 0.000 (0.001) loss 1.6301 (1.4827) lr 2.0000e-03 eta 2 days, 2:26:53
epoch [6/50] batch [600/2000] time 2.051 (2.031) data 0.001 (0.001) loss 1.0531 (1.4849) lr 2.0000e-03 eta 2 days, 2:26:01
epoch [6/50] batch [620/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.4606 (1.4754) lr 2.0000e-03 eta 2 days, 2:25:22
epoch [6/50] batch [640/2000] time 1.997 (2.031) data 0.000 (0.001) loss 2.7353 (1.4588) lr 2.0000e-03 eta 2 days, 2:24:49
epoch [6/50] batch [660/2000] time 2.027 (2.031) data 0.000 (0.001) loss 1.1745 (1.4746) lr 2.0000e-03 eta 2 days, 2:24:06
epoch [6/50] batch [680/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.8065 (1.4884) lr 2.0000e-03 eta 2 days, 2:23:23
epoch [6/50] batch [700/2000] time 1.999 (2.031) data 0.000 (0.001) loss 1.5977 (1.4911) lr 2.0000e-03 eta 2 days, 2:22:32
epoch [6/50] batch [720/2000] time 1.974 (2.031) data 0.000 (0.001) loss 1.0139 (1.4896) lr 2.0000e-03 eta 2 days, 2:21:56
epoch [6/50] batch [740/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.7563 (1.4892) lr 2.0000e-03 eta 2 days, 2:21:02
epoch [6/50] batch [760/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.4421 (1.4847) lr 2.0000e-03 eta 2 days, 2:20:26
epoch [6/50] batch [780/2000] time 1.974 (2.031) data 0.000 (0.001) loss 3.5114 (1.4777) lr 2.0000e-03 eta 2 days, 2:19:50
epoch [6/50] batch [800/2000] time 2.053 (2.031) data 0.000 (0.001) loss 2.0403 (1.4804) lr 2.0000e-03 eta 2 days, 2:19:02
epoch [6/50] batch [820/2000] time 2.052 (2.031) data 0.000 (0.001) loss 2.5037 (1.4824) lr 2.0000e-03 eta 2 days, 2:18:12
epoch [6/50] batch [840/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.3110 (1.4735) lr 2.0000e-03 eta 2 days, 2:17:32
epoch [6/50] batch [860/2000] time 1.996 (2.031) data 0.000 (0.001) loss 1.7736 (1.4674) lr 2.0000e-03 eta 2 days, 2:16:51
epoch [6/50] batch [880/2000] time 2.025 (2.030) data 0.000 (0.001) loss 0.6862 (1.4590) lr 2.0000e-03 eta 2 days, 2:15:56
epoch [6/50] batch [900/2000] time 2.029 (2.030) data 0.000 (0.001) loss 2.0709 (1.4549) lr 2.0000e-03 eta 2 days, 2:15:15
epoch [6/50] batch [920/2000] time 2.000 (2.030) data 0.000 (0.001) loss 2.3821 (1.4566) lr 2.0000e-03 eta 2 days, 2:14:34
epoch [6/50] batch [940/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.9570 (1.4600) lr 2.0000e-03 eta 2 days, 2:13:33
epoch [6/50] batch [960/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.1599 (1.4567) lr 2.0000e-03 eta 2 days, 2:13:04
epoch [6/50] batch [980/2000] time 1.997 (2.030) data 0.000 (0.001) loss 2.3153 (1.4574) lr 2.0000e-03 eta 2 days, 2:12:26
epoch [6/50] batch [1000/2000] time 2.050 (2.030) data 0.000 (0.001) loss 4.7075 (1.4565) lr 2.0000e-03 eta 2 days, 2:11:43
epoch [6/50] batch [1020/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.5593 (1.4519) lr 2.0000e-03 eta 2 days, 2:10:51
epoch [6/50] batch [1040/2000] time 2.056 (2.030) data 0.000 (0.001) loss 2.3665 (1.4647) lr 2.0000e-03 eta 2 days, 2:10:00
epoch [6/50] batch [1060/2000] time 2.028 (2.030) data 0.000 (0.001) loss 2.2701 (1.4576) lr 2.0000e-03 eta 2 days, 2:09:17
epoch [6/50] batch [1080/2000] time 1.976 (2.030) data 0.000 (0.001) loss 0.7500 (1.4576) lr 2.0000e-03 eta 2 days, 2:08:19
epoch [6/50] batch [1100/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.8509 (1.4528) lr 2.0000e-03 eta 2 days, 2:07:36
epoch [6/50] batch [1120/2000] time 1.997 (2.030) data 0.000 (0.001) loss 2.7488 (1.4489) lr 2.0000e-03 eta 2 days, 2:06:54
epoch [6/50] batch [1140/2000] time 2.052 (2.030) data 0.001 (0.001) loss 0.2696 (1.4419) lr 2.0000e-03 eta 2 days, 2:06:17
epoch [6/50] batch [1160/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.2441 (1.4443) lr 2.0000e-03 eta 2 days, 2:05:42
epoch [6/50] batch [1180/2000] time 2.054 (2.030) data 0.000 (0.001) loss 2.0874 (1.4441) lr 2.0000e-03 eta 2 days, 2:05:11
epoch [6/50] batch [1200/2000] time 2.000 (2.030) data 0.000 (0.001) loss 2.2914 (1.4433) lr 2.0000e-03 eta 2 days, 2:04:37
epoch [6/50] batch [1220/2000] time 1.998 (2.030) data 0.000 (0.001) loss 2.3011 (1.4422) lr 2.0000e-03 eta 2 days, 2:03:39
epoch [6/50] batch [1240/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.7556 (1.4384) lr 2.0000e-03 eta 2 days, 2:03:00
epoch [6/50] batch [1260/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.9454 (1.4387) lr 2.0000e-03 eta 2 days, 2:02:20
epoch [6/50] batch [1280/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.6599 (1.4388) lr 2.0000e-03 eta 2 days, 2:01:32
epoch [6/50] batch [1300/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.1474 (1.4366) lr 2.0000e-03 eta 2 days, 2:00:56
epoch [6/50] batch [1320/2000] time 2.002 (2.030) data 0.000 (0.001) loss 2.5075 (1.4359) lr 2.0000e-03 eta 2 days, 2:00:12
epoch [6/50] batch [1340/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.4446 (1.4350) lr 2.0000e-03 eta 2 days, 1:59:36
epoch [6/50] batch [1360/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.7319 (1.4331) lr 2.0000e-03 eta 2 days, 1:58:47
epoch [6/50] batch [1380/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.6675 (1.4317) lr 2.0000e-03 eta 2 days, 1:58:05
epoch [6/50] batch [1400/2000] time 2.052 (2.030) data 0.000 (0.001) loss 2.8494 (1.4355) lr 2.0000e-03 eta 2 days, 1:57:32
epoch [6/50] batch [1420/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.1312 (1.4334) lr 2.0000e-03 eta 2 days, 1:56:47
epoch [6/50] batch [1440/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.3343 (1.4364) lr 2.0000e-03 eta 2 days, 1:56:07
epoch [6/50] batch [1460/2000] time 2.053 (2.030) data 0.000 (0.001) loss 2.5666 (1.4398) lr 2.0000e-03 eta 2 days, 1:55:31
epoch [6/50] batch [1480/2000] time 2.054 (2.030) data 0.000 (0.001) loss 2.6556 (1.4411) lr 2.0000e-03 eta 2 days, 1:54:48
epoch [6/50] batch [1500/2000] time 2.030 (2.030) data 0.000 (0.001) loss 1.9173 (1.4343) lr 2.0000e-03 eta 2 days, 1:54:00
epoch [6/50] batch [1520/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.8698 (1.4302) lr 2.0000e-03 eta 2 days, 1:53:25
epoch [6/50] batch [1540/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.2109 (1.4283) lr 2.0000e-03 eta 2 days, 1:52:46
epoch [6/50] batch [1560/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.4972 (1.4301) lr 2.0000e-03 eta 2 days, 1:52:10
epoch [6/50] batch [1580/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.7665 (1.4298) lr 2.0000e-03 eta 2 days, 1:51:18
epoch [6/50] batch [1600/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.8858 (1.4254) lr 2.0000e-03 eta 2 days, 1:50:41
epoch [6/50] batch [1620/2000] time 2.193 (2.031) data 0.000 (0.001) loss 1.3575 (1.4270) lr 2.0000e-03 eta 2 days, 1:51:02
epoch [6/50] batch [1640/2000] time 2.203 (2.033) data 0.000 (0.001) loss 1.0893 (1.4279) lr 2.0000e-03 eta 2 days, 1:54:26
epoch [6/50] batch [1660/2000] time 2.162 (2.037) data 0.000 (0.001) loss 3.2811 (1.4254) lr 2.0000e-03 eta 2 days, 1:58:30
epoch [6/50] batch [1680/2000] time 2.368 (2.040) data 0.001 (0.001) loss 0.4107 (1.4213) lr 2.0000e-03 eta 2 days, 2:02:27
epoch [6/50] batch [1700/2000] time 2.179 (2.042) data 0.000 (0.001) loss 2.1454 (1.4168) lr 2.0000e-03 eta 2 days, 2:05:31
epoch [6/50] batch [1720/2000] time 2.150 (2.045) data 0.000 (0.001) loss 1.5261 (1.4147) lr 2.0000e-03 eta 2 days, 2:09:21
epoch [6/50] batch [1740/2000] time 2.400 (2.048) data 0.000 (0.001) loss 1.3071 (1.4127) lr 2.0000e-03 eta 2 days, 2:13:03
epoch [6/50] batch [1760/2000] time 2.482 (2.051) data 0.000 (0.001) loss 1.9485 (1.4122) lr 2.0000e-03 eta 2 days, 2:16:16
epoch [6/50] batch [1780/2000] time 2.149 (2.054) data 0.000 (0.001) loss 0.6497 (1.4128) lr 2.0000e-03 eta 2 days, 2:19:37
epoch [6/50] batch [1800/2000] time 2.168 (2.056) data 0.000 (0.001) loss 3.0521 (1.4118) lr 2.0000e-03 eta 2 days, 2:22:54
epoch [6/50] batch [1820/2000] time 2.514 (2.059) data 0.000 (0.001) loss 0.2144 (1.4108) lr 2.0000e-03 eta 2 days, 2:26:02
epoch [6/50] batch [1840/2000] time 2.146 (2.061) data 0.000 (0.001) loss 1.0723 (1.4083) lr 2.0000e-03 eta 2 days, 2:28:32
epoch [6/50] batch [1860/2000] time 2.076 (2.064) data 0.000 (0.001) loss 2.2338 (1.4089) lr 2.0000e-03 eta 2 days, 2:31:30
epoch [6/50] batch [1880/2000] time 2.179 (2.066) data 0.000 (0.001) loss 0.3289 (1.4122) lr 2.0000e-03 eta 2 days, 2:33:52
epoch [6/50] batch [1900/2000] time 2.029 (2.067) data 0.000 (0.001) loss 1.8900 (1.4102) lr 2.0000e-03 eta 2 days, 2:34:54
epoch [6/50] batch [1920/2000] time 2.030 (2.066) data 0.000 (0.001) loss 0.8301 (1.4090) lr 2.0000e-03 eta 2 days, 2:33:35
epoch [6/50] batch [1940/2000] time 2.052 (2.066) data 0.000 (0.000) loss 1.2712 (1.4054) lr 2.0000e-03 eta 2 days, 2:32:22
epoch [6/50] batch [1960/2000] time 2.024 (2.066) data 0.000 (0.000) loss 2.3970 (1.4010) lr 2.0000e-03 eta 2 days, 2:31:12
epoch [6/50] batch [1980/2000] time 1.995 (2.065) data 0.000 (0.000) loss 0.5029 (1.4031) lr 2.0000e-03 eta 2 days, 2:29:55
epoch [6/50] batch [2000/2000] time 2.051 (2.065) data 0.000 (0.000) loss 1.5708 (1.4042) lr 1.9980e-03 eta 2 days, 2:28:35
epoch [7/50] batch [20/2000] time 2.029 (2.054) data 0.000 (0.027) loss 0.9092 (1.6937) lr 1.9980e-03 eta 2 days, 2:11:27
epoch [7/50] batch [40/2000] time 1.998 (2.044) data 0.000 (0.014) loss 1.1157 (1.5668) lr 1.9980e-03 eta 2 days, 1:56:14
epoch [7/50] batch [60/2000] time 1.998 (2.040) data 0.000 (0.009) loss 0.3804 (1.4580) lr 1.9980e-03 eta 2 days, 1:50:33
epoch [7/50] batch [80/2000] time 2.051 (2.037) data 0.000 (0.007) loss 2.3535 (1.4451) lr 1.9980e-03 eta 2 days, 1:45:18
epoch [7/50] batch [100/2000] time 1.996 (2.034) data 0.000 (0.006) loss 0.9771 (1.4249) lr 1.9980e-03 eta 2 days, 1:39:36
epoch [7/50] batch [120/2000] time 1.998 (2.033) data 0.000 (0.005) loss 0.5785 (1.3768) lr 1.9980e-03 eta 2 days, 1:37:17
epoch [7/50] batch [140/2000] time 2.053 (2.033) data 0.000 (0.004) loss 3.4030 (1.3533) lr 1.9980e-03 eta 2 days, 1:36:40
epoch [7/50] batch [160/2000] time 2.030 (2.032) data 0.000 (0.004) loss 1.5424 (1.3838) lr 1.9980e-03 eta 2 days, 1:35:25
epoch [7/50] batch [180/2000] time 2.027 (2.032) data 0.000 (0.003) loss 2.3095 (1.3600) lr 1.9980e-03 eta 2 days, 1:34:23
epoch [7/50] batch [200/2000] time 2.050 (2.032) data 0.000 (0.003) loss 3.4890 (1.3696) lr 1.9980e-03 eta 2 days, 1:33:15
epoch [7/50] batch [220/2000] time 1.994 (2.032) data 0.000 (0.003) loss 1.3547 (1.3788) lr 1.9980e-03 eta 2 days, 1:32:08
epoch [7/50] batch [240/2000] time 2.048 (2.031) data 0.000 (0.002) loss 2.1565 (1.3844) lr 1.9980e-03 eta 2 days, 1:31:21
epoch [7/50] batch [260/2000] time 2.028 (2.031) data 0.000 (0.002) loss 1.0252 (1.3603) lr 1.9980e-03 eta 2 days, 1:30:23
epoch [7/50] batch [280/2000] time 1.998 (2.031) data 0.000 (0.002) loss 2.1067 (1.3575) lr 1.9980e-03 eta 2 days, 1:29:31
epoch [7/50] batch [300/2000] time 1.971 (2.031) data 0.000 (0.002) loss 1.4196 (1.3564) lr 1.9980e-03 eta 2 days, 1:27:59
epoch [7/50] batch [320/2000] time 2.051 (2.031) data 0.000 (0.002) loss 0.4834 (1.3531) lr 1.9980e-03 eta 2 days, 1:27:14
epoch [7/50] batch [340/2000] time 2.057 (2.031) data 0.000 (0.002) loss 1.7034 (1.3520) lr 1.9980e-03 eta 2 days, 1:26:40
epoch [7/50] batch [360/2000] time 2.028 (2.031) data 0.000 (0.002) loss 0.6568 (1.3517) lr 1.9980e-03 eta 2 days, 1:26:25
epoch [7/50] batch [380/2000] time 1.974 (2.031) data 0.000 (0.002) loss 0.9323 (1.3468) lr 1.9980e-03 eta 2 days, 1:25:40
epoch [7/50] batch [400/2000] time 2.001 (2.031) data 0.000 (0.002) loss 1.2734 (1.3337) lr 1.9980e-03 eta 2 days, 1:25:00
epoch [7/50] batch [420/2000] time 2.029 (2.031) data 0.000 (0.002) loss 0.7723 (1.3310) lr 1.9980e-03 eta 2 days, 1:24:12
epoch [7/50] batch [440/2000] time 2.055 (2.031) data 0.000 (0.001) loss 1.4284 (1.3386) lr 1.9980e-03 eta 2 days, 1:23:28
epoch [7/50] batch [460/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.0929 (1.3446) lr 1.9980e-03 eta 2 days, 1:23:00
epoch [7/50] batch [480/2000] time 1.995 (2.031) data 0.000 (0.001) loss 0.5998 (1.3411) lr 1.9980e-03 eta 2 days, 1:22:00
epoch [7/50] batch [500/2000] time 2.048 (2.030) data 0.000 (0.001) loss 0.8489 (1.3427) lr 1.9980e-03 eta 2 days, 1:21:07
epoch [7/50] batch [520/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.2902 (1.3517) lr 1.9980e-03 eta 2 days, 1:20:26
epoch [7/50] batch [540/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.1625 (1.3508) lr 1.9980e-03 eta 2 days, 1:19:18
epoch [7/50] batch [560/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.0714 (1.3537) lr 1.9980e-03 eta 2 days, 1:18:38
epoch [7/50] batch [580/2000] time 2.029 (2.030) data 0.000 (0.001) loss 2.6598 (1.3543) lr 1.9980e-03 eta 2 days, 1:17:57
epoch [7/50] batch [600/2000] time 2.053 (2.030) data 0.001 (0.001) loss 1.2502 (1.3491) lr 1.9980e-03 eta 2 days, 1:17:10
epoch [7/50] batch [620/2000] time 1.999 (2.030) data 0.000 (0.001) loss 3.4627 (1.3499) lr 1.9980e-03 eta 2 days, 1:16:19
epoch [7/50] batch [640/2000] time 2.031 (2.030) data 0.000 (0.001) loss 1.0855 (1.3461) lr 1.9980e-03 eta 2 days, 1:15:41
epoch [7/50] batch [660/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.7151 (1.3446) lr 1.9980e-03 eta 2 days, 1:14:56
epoch [7/50] batch [680/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.1415 (1.3458) lr 1.9980e-03 eta 2 days, 1:14:01
epoch [7/50] batch [700/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.0878 (1.3366) lr 1.9980e-03 eta 2 days, 1:13:17
epoch [7/50] batch [720/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.7874 (1.3241) lr 1.9980e-03 eta 2 days, 1:12:33
epoch [7/50] batch [740/2000] time 2.003 (2.030) data 0.000 (0.001) loss 1.8161 (1.3222) lr 1.9980e-03 eta 2 days, 1:11:55
epoch [7/50] batch [760/2000] time 2.034 (2.030) data 0.000 (0.001) loss 2.0890 (1.3190) lr 1.9980e-03 eta 2 days, 1:11:26
epoch [7/50] batch [780/2000] time 2.057 (2.030) data 0.000 (0.001) loss 0.6558 (1.3169) lr 1.9980e-03 eta 2 days, 1:11:09
epoch [7/50] batch [800/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.9551 (1.3146) lr 1.9980e-03 eta 2 days, 1:10:34
epoch [7/50] batch [820/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.0236 (1.3071) lr 1.9980e-03 eta 2 days, 1:09:42
epoch [7/50] batch [840/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.2957 (1.3148) lr 1.9980e-03 eta 2 days, 1:08:59
epoch [7/50] batch [860/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.3833 (1.3121) lr 1.9980e-03 eta 2 days, 1:08:14
epoch [7/50] batch [880/2000] time 1.975 (2.030) data 0.000 (0.001) loss 0.7384 (1.3106) lr 1.9980e-03 eta 2 days, 1:07:39
epoch [7/50] batch [900/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.6146 (1.3083) lr 1.9980e-03 eta 2 days, 1:06:56
epoch [7/50] batch [920/2000] time 2.002 (2.030) data 0.000 (0.001) loss 1.3615 (1.3082) lr 1.9980e-03 eta 2 days, 1:06:17
epoch [7/50] batch [940/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.0890 (1.3017) lr 1.9980e-03 eta 2 days, 1:05:42
epoch [7/50] batch [960/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.6291 (1.3046) lr 1.9980e-03 eta 2 days, 1:04:54
epoch [7/50] batch [980/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.2726 (1.3062) lr 1.9980e-03 eta 2 days, 1:04:09
epoch [7/50] batch [1000/2000] time 1.998 (2.030) data 0.000 (0.001) loss 2.3784 (1.3037) lr 1.9980e-03 eta 2 days, 1:03:31
epoch [7/50] batch [1020/2000] time 2.028 (2.030) data 0.000 (0.001) loss 1.5566 (1.2995) lr 1.9980e-03 eta 2 days, 1:02:45
epoch [7/50] batch [1040/2000] time 2.050 (2.030) data 0.000 (0.001) loss 2.5423 (1.2997) lr 1.9980e-03 eta 2 days, 1:02:03
epoch [7/50] batch [1060/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.8559 (1.3041) lr 1.9980e-03 eta 2 days, 1:01:15
epoch [7/50] batch [1080/2000] time 2.031 (2.030) data 0.000 (0.001) loss 1.0692 (1.2957) lr 1.9980e-03 eta 2 days, 1:00:37
epoch [7/50] batch [1100/2000] time 2.047 (2.030) data 0.000 (0.001) loss 1.4377 (1.2959) lr 1.9980e-03 eta 2 days, 0:59:59
epoch [7/50] batch [1120/2000] time 1.973 (2.030) data 0.000 (0.001) loss 1.5505 (1.2927) lr 1.9980e-03 eta 2 days, 0:59:11
epoch [7/50] batch [1140/2000] time 2.027 (2.030) data 0.001 (0.001) loss 0.7789 (1.2924) lr 1.9980e-03 eta 2 days, 0:58:33
epoch [7/50] batch [1160/2000] time 2.051 (2.030) data 0.000 (0.001) loss 2.8080 (1.2926) lr 1.9980e-03 eta 2 days, 0:57:46
epoch [7/50] batch [1180/2000] time 1.977 (2.030) data 0.000 (0.001) loss 3.4389 (1.2997) lr 1.9980e-03 eta 2 days, 0:56:53
epoch [7/50] batch [1200/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.6045 (1.2965) lr 1.9980e-03 eta 2 days, 0:56:12
epoch [7/50] batch [1220/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.4671 (1.2984) lr 1.9980e-03 eta 2 days, 0:55:44
epoch [7/50] batch [1240/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.2013 (1.2999) lr 1.9980e-03 eta 2 days, 0:55:08
epoch [7/50] batch [1260/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.9664 (1.3107) lr 1.9980e-03 eta 2 days, 0:54:30
epoch [7/50] batch [1280/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.9599 (1.3135) lr 1.9980e-03 eta 2 days, 0:53:47
epoch [7/50] batch [1300/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.8588 (1.3214) lr 1.9980e-03 eta 2 days, 0:53:00
epoch [7/50] batch [1320/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.3631 (1.3233) lr 1.9980e-03 eta 2 days, 0:52:15
epoch [7/50] batch [1340/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.8150 (1.3280) lr 1.9980e-03 eta 2 days, 0:51:39
epoch [7/50] batch [1360/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.9623 (1.3229) lr 1.9980e-03 eta 2 days, 0:50:55
epoch [7/50] batch [1380/2000] time 1.979 (2.030) data 0.000 (0.001) loss 0.2241 (1.3241) lr 1.9980e-03 eta 2 days, 0:50:08
epoch [7/50] batch [1400/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.2219 (1.3228) lr 1.9980e-03 eta 2 days, 0:49:27
epoch [7/50] batch [1420/2000] time 2.121 (2.032) data 0.000 (0.001) loss 0.5693 (1.3216) lr 1.9980e-03 eta 2 days, 0:52:24
epoch [7/50] batch [1440/2000] time 2.141 (2.035) data 0.000 (0.001) loss 1.2852 (1.3215) lr 1.9980e-03 eta 2 days, 0:56:06
epoch [7/50] batch [1460/2000] time 2.502 (2.039) data 0.000 (0.001) loss 1.0166 (1.3196) lr 1.9980e-03 eta 2 days, 1:00:26
epoch [7/50] batch [1480/2000] time 2.161 (2.042) data 0.000 (0.001) loss 0.8132 (1.3177) lr 1.9980e-03 eta 2 days, 1:04:04
epoch [7/50] batch [1500/2000] time 2.349 (2.045) data 0.000 (0.001) loss 2.0469 (1.3194) lr 1.9980e-03 eta 2 days, 1:07:41
epoch [7/50] batch [1520/2000] time 2.157 (2.048) data 0.000 (0.001) loss 0.5537 (1.3205) lr 1.9980e-03 eta 2 days, 1:11:36
epoch [7/50] batch [1540/2000] time 2.150 (2.051) data 0.000 (0.001) loss 3.0595 (1.3212) lr 1.9980e-03 eta 2 days, 1:15:02
epoch [7/50] batch [1560/2000] time 2.387 (2.054) data 0.000 (0.001) loss 1.2273 (1.3196) lr 1.9980e-03 eta 2 days, 1:19:04
epoch [7/50] batch [1580/2000] time 2.142 (2.056) data 0.000 (0.001) loss 0.6465 (1.3192) lr 1.9980e-03 eta 2 days, 1:21:56
epoch [7/50] batch [1600/2000] time 2.524 (2.059) data 0.000 (0.001) loss 0.2123 (1.3148) lr 1.9980e-03 eta 2 days, 1:25:21
epoch [7/50] batch [1620/2000] time 2.181 (2.062) data 0.000 (0.001) loss 1.8071 (1.3122) lr 1.9980e-03 eta 2 days, 1:28:08
epoch [7/50] batch [1640/2000] time 2.483 (2.064) data 0.000 (0.001) loss 1.0345 (1.3135) lr 1.9980e-03 eta 2 days, 1:31:02
epoch [7/50] batch [1660/2000] time 2.079 (2.067) data 0.000 (0.001) loss 2.2056 (1.3152) lr 1.9980e-03 eta 2 days, 1:33:53
epoch [7/50] batch [1680/2000] time 2.133 (2.069) data 0.000 (0.001) loss 0.7918 (1.3146) lr 1.9980e-03 eta 2 days, 1:36:28
epoch [7/50] batch [1700/2000] time 1.997 (2.071) data 0.000 (0.001) loss 1.1561 (1.3163) lr 1.9980e-03 eta 2 days, 1:38:36
epoch [7/50] batch [1720/2000] time 2.055 (2.070) data 0.000 (0.001) loss 2.3499 (1.3187) lr 1.9980e-03 eta 2 days, 1:37:13
epoch [7/50] batch [1740/2000] time 2.063 (2.070) data 0.000 (0.001) loss 2.0195 (1.3196) lr 1.9980e-03 eta 2 days, 1:35:55
epoch [7/50] batch [1760/2000] time 2.005 (2.070) data 0.000 (0.001) loss 1.0436 (1.3229) lr 1.9980e-03 eta 2 days, 1:34:34
epoch [7/50] batch [1780/2000] time 2.057 (2.069) data 0.000 (0.001) loss 1.0430 (1.3254) lr 1.9980e-03 eta 2 days, 1:33:13
epoch [7/50] batch [1800/2000] time 2.059 (2.069) data 0.000 (0.001) loss 2.5546 (1.3257) lr 1.9980e-03 eta 2 days, 1:31:57
epoch [7/50] batch [1820/2000] time 2.053 (2.068) data 0.000 (0.001) loss 0.8887 (1.3282) lr 1.9980e-03 eta 2 days, 1:30:44
epoch [7/50] batch [1840/2000] time 2.053 (2.068) data 0.000 (0.001) loss 1.6538 (1.3267) lr 1.9980e-03 eta 2 days, 1:29:31
epoch [7/50] batch [1860/2000] time 2.029 (2.067) data 0.000 (0.001) loss 1.1427 (1.3244) lr 1.9980e-03 eta 2 days, 1:28:08
epoch [7/50] batch [1880/2000] time 2.030 (2.067) data 0.000 (0.001) loss 0.7694 (1.3256) lr 1.9980e-03 eta 2 days, 1:26:49
epoch [7/50] batch [1900/2000] time 2.030 (2.067) data 0.000 (0.001) loss 4.9224 (1.3285) lr 1.9980e-03 eta 2 days, 1:25:39
epoch [7/50] batch [1920/2000] time 1.975 (2.066) data 0.000 (0.001) loss 0.3044 (1.3317) lr 1.9980e-03 eta 2 days, 1:24:26
epoch [7/50] batch [1940/2000] time 2.050 (2.066) data 0.000 (0.001) loss 0.3102 (1.3303) lr 1.9980e-03 eta 2 days, 1:23:13
epoch [7/50] batch [1960/2000] time 2.029 (2.066) data 0.000 (0.001) loss 3.8270 (1.3286) lr 1.9980e-03 eta 2 days, 1:22:00
epoch [7/50] batch [1980/2000] time 2.026 (2.065) data 0.000 (0.001) loss 1.3465 (1.3247) lr 1.9980e-03 eta 2 days, 1:20:48
epoch [7/50] batch [2000/2000] time 2.046 (2.065) data 0.000 (0.000) loss 3.6142 (1.3233) lr 1.9921e-03 eta 2 days, 1:19:27
epoch [8/50] batch [20/2000] time 2.029 (2.056) data 0.000 (0.027) loss 1.2941 (1.3997) lr 1.9921e-03 eta 2 days, 1:06:26
epoch [8/50] batch [40/2000] time 1.997 (2.042) data 0.000 (0.013) loss 3.3706 (1.4259) lr 1.9921e-03 eta 2 days, 0:45:45
epoch [8/50] batch [60/2000] time 2.050 (2.037) data 0.001 (0.009) loss 2.5519 (1.4131) lr 1.9921e-03 eta 2 days, 0:38:04
epoch [8/50] batch [80/2000] time 2.050 (2.033) data 0.000 (0.007) loss 0.4367 (1.3415) lr 1.9921e-03 eta 2 days, 0:31:10
epoch [8/50] batch [100/2000] time 2.027 (2.032) data 0.000 (0.006) loss 1.7084 (1.3998) lr 1.9921e-03 eta 2 days, 0:29:10
epoch [8/50] batch [120/2000] time 2.050 (2.031) data 0.000 (0.005) loss 0.6581 (1.3722) lr 1.9921e-03 eta 2 days, 0:27:05
epoch [8/50] batch [140/2000] time 1.999 (2.029) data 0.000 (0.004) loss 0.5701 (1.4161) lr 1.9921e-03 eta 2 days, 0:23:53
epoch [8/50] batch [160/2000] time 2.052 (2.029) data 0.000 (0.004) loss 2.8705 (1.4358) lr 1.9921e-03 eta 2 days, 0:23:04
epoch [8/50] batch [180/2000] time 2.033 (2.029) data 0.000 (0.003) loss 0.8634 (1.4238) lr 1.9921e-03 eta 2 days, 0:21:59
epoch [8/50] batch [200/2000] time 2.034 (2.030) data 0.000 (0.003) loss 1.0857 (1.4318) lr 1.9921e-03 eta 2 days, 0:22:17
epoch [8/50] batch [220/2000] time 2.028 (2.030) data 0.000 (0.003) loss 0.2419 (1.4246) lr 1.9921e-03 eta 2 days, 0:22:10
epoch [8/50] batch [240/2000] time 2.006 (2.030) data 0.000 (0.002) loss 4.2138 (1.4240) lr 1.9921e-03 eta 2 days, 0:21:41
epoch [8/50] batch [260/2000] time 2.004 (2.030) data 0.000 (0.002) loss 1.9335 (1.4270) lr 1.9921e-03 eta 2 days, 0:20:18
epoch [8/50] batch [280/2000] time 2.053 (2.030) data 0.000 (0.002) loss 0.4965 (1.3949) lr 1.9921e-03 eta 2 days, 0:19:59
epoch [8/50] batch [300/2000] time 1.974 (2.030) data 0.000 (0.002) loss 0.1242 (1.3995) lr 1.9921e-03 eta 2 days, 0:19:08
epoch [8/50] batch [320/2000] time 2.052 (2.030) data 0.000 (0.002) loss 0.6481 (1.4071) lr 1.9921e-03 eta 2 days, 0:18:29
epoch [8/50] batch [340/2000] time 2.051 (2.029) data 0.000 (0.002) loss 0.9393 (1.4147) lr 1.9921e-03 eta 2 days, 0:17:17
epoch [8/50] batch [360/2000] time 2.052 (2.029) data 0.000 (0.002) loss 1.4519 (1.4056) lr 1.9921e-03 eta 2 days, 0:16:41
epoch [8/50] batch [380/2000] time 2.055 (2.030) data 0.000 (0.002) loss 2.1916 (1.3891) lr 1.9921e-03 eta 2 days, 0:16:47
epoch [8/50] batch [400/2000] time 2.054 (2.030) data 0.000 (0.002) loss 1.8017 (1.3737) lr 1.9921e-03 eta 2 days, 0:15:57
epoch [8/50] batch [420/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.6017 (1.3822) lr 1.9921e-03 eta 2 days, 0:14:55
epoch [8/50] batch [440/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.2028 (1.3871) lr 1.9921e-03 eta 2 days, 0:14:09
epoch [8/50] batch [460/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.9210 (1.3707) lr 1.9921e-03 eta 2 days, 0:13:32
epoch [8/50] batch [480/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.6589 (1.3874) lr 1.9921e-03 eta 2 days, 0:13:09
epoch [8/50] batch [500/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.2645 (1.3709) lr 1.9921e-03 eta 2 days, 0:12:22
epoch [8/50] batch [520/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.2512 (1.3655) lr 1.9921e-03 eta 2 days, 0:11:55
epoch [8/50] batch [540/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.1581 (1.3685) lr 1.9921e-03 eta 2 days, 0:11:08
epoch [8/50] batch [560/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.7421 (1.3598) lr 1.9921e-03 eta 2 days, 0:10:21
epoch [8/50] batch [580/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.7327 (1.3639) lr 1.9921e-03 eta 2 days, 0:09:20
epoch [8/50] batch [600/2000] time 2.002 (2.029) data 0.000 (0.001) loss 0.7150 (1.3580) lr 1.9921e-03 eta 2 days, 0:08:32
epoch [8/50] batch [620/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.1993 (1.3535) lr 1.9921e-03 eta 2 days, 0:07:52
epoch [8/50] batch [640/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.7967 (1.3613) lr 1.9921e-03 eta 2 days, 0:06:56
epoch [8/50] batch [660/2000] time 1.996 (2.029) data 0.000 (0.001) loss 2.2571 (1.3577) lr 1.9921e-03 eta 2 days, 0:06:33
epoch [8/50] batch [680/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.0720 (1.3618) lr 1.9921e-03 eta 2 days, 0:06:02
epoch [8/50] batch [700/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.1058 (1.3602) lr 1.9921e-03 eta 2 days, 0:05:34
epoch [8/50] batch [720/2000] time 2.052 (2.030) data 0.000 (0.001) loss 2.1860 (1.3599) lr 1.9921e-03 eta 2 days, 0:05:04
epoch [8/50] batch [740/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.1198 (1.3576) lr 1.9921e-03 eta 2 days, 0:04:30
epoch [8/50] batch [760/2000] time 2.056 (2.030) data 0.000 (0.001) loss 1.9672 (1.3519) lr 1.9921e-03 eta 2 days, 0:04:04
epoch [8/50] batch [780/2000] time 2.034 (2.030) data 0.000 (0.001) loss 1.8413 (1.3540) lr 1.9921e-03 eta 2 days, 0:03:22
epoch [8/50] batch [800/2000] time 2.029 (2.030) data 0.000 (0.001) loss 2.5542 (1.3563) lr 1.9921e-03 eta 2 days, 0:02:57
epoch [8/50] batch [820/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.6617 (1.3551) lr 1.9921e-03 eta 2 days, 0:02:29
epoch [8/50] batch [840/2000] time 2.026 (2.030) data 0.000 (0.001) loss 0.8842 (1.3588) lr 1.9921e-03 eta 2 days, 0:01:38
epoch [8/50] batch [860/2000] time 2.026 (2.030) data 0.000 (0.001) loss 3.6049 (1.3576) lr 1.9921e-03 eta 2 days, 0:00:48
epoch [8/50] batch [880/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.1993 (1.3597) lr 1.9921e-03 eta 2 days, 0:00:02
epoch [8/50] batch [900/2000] time 2.026 (2.030) data 0.000 (0.001) loss 2.8724 (1.3591) lr 1.9921e-03 eta 1 day, 23:59:23
epoch [8/50] batch [920/2000] time 2.003 (2.030) data 0.000 (0.001) loss 0.2152 (1.3527) lr 1.9921e-03 eta 1 day, 23:58:53
epoch [8/50] batch [940/2000] time 2.056 (2.030) data 0.000 (0.001) loss 1.8504 (1.3482) lr 1.9921e-03 eta 1 day, 23:58:09
epoch [8/50] batch [960/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.2000 (1.3421) lr 1.9921e-03 eta 1 day, 23:57:34
epoch [8/50] batch [980/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.2563 (1.3467) lr 1.9921e-03 eta 1 day, 23:56:51
epoch [8/50] batch [1000/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.4057 (1.3527) lr 1.9921e-03 eta 1 day, 23:56:04
epoch [8/50] batch [1020/2000] time 2.032 (2.030) data 0.000 (0.001) loss 0.9143 (1.3441) lr 1.9921e-03 eta 1 day, 23:55:34
epoch [8/50] batch [1040/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.5714 (1.3394) lr 1.9921e-03 eta 1 day, 23:54:47
epoch [8/50] batch [1060/2000] time 2.000 (2.030) data 0.000 (0.001) loss 1.0984 (1.3402) lr 1.9921e-03 eta 1 day, 23:54:02
epoch [8/50] batch [1080/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.8710 (1.3428) lr 1.9921e-03 eta 1 day, 23:53:23
epoch [8/50] batch [1100/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.8630 (1.3380) lr 1.9921e-03 eta 1 day, 23:52:34
epoch [8/50] batch [1120/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.8139 (1.3366) lr 1.9921e-03 eta 1 day, 23:51:44
epoch [8/50] batch [1140/2000] time 2.054 (2.030) data 0.001 (0.001) loss 0.7260 (1.3397) lr 1.9921e-03 eta 1 day, 23:51:06
epoch [8/50] batch [1160/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.9986 (1.3381) lr 1.9921e-03 eta 1 day, 23:50:30
epoch [8/50] batch [1180/2000] time 2.028 (2.030) data 0.000 (0.001) loss 1.7235 (1.3432) lr 1.9921e-03 eta 1 day, 23:49:56
epoch [8/50] batch [1200/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.4077 (1.3443) lr 1.9921e-03 eta 1 day, 23:49:15
epoch [8/50] batch [1220/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.3985 (1.3421) lr 1.9921e-03 eta 1 day, 23:48:40
epoch [8/50] batch [1240/2000] time 2.003 (2.030) data 0.000 (0.001) loss 1.3154 (1.3414) lr 1.9921e-03 eta 1 day, 23:47:58
epoch [8/50] batch [1260/2000] time 2.058 (2.030) data 0.000 (0.001) loss 0.2590 (1.3377) lr 1.9921e-03 eta 1 day, 23:47:25
epoch [8/50] batch [1280/2000] time 2.055 (2.031) data 0.000 (0.001) loss 1.5088 (1.3366) lr 1.9921e-03 eta 1 day, 23:47:06
epoch [8/50] batch [1300/2000] time 2.027 (2.031) data 0.000 (0.001) loss 1.1973 (1.3349) lr 1.9921e-03 eta 1 day, 23:46:35
epoch [8/50] batch [1320/2000] time 1.975 (2.031) data 0.000 (0.001) loss 0.6514 (1.3329) lr 1.9921e-03 eta 1 day, 23:45:50
epoch [8/50] batch [1340/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.5293 (1.3303) lr 1.9921e-03 eta 1 day, 23:45:08
epoch [8/50] batch [1360/2000] time 1.997 (2.031) data 0.000 (0.001) loss 1.2803 (1.3314) lr 1.9921e-03 eta 1 day, 23:44:27
epoch [8/50] batch [1380/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.3747 (1.3290) lr 1.9921e-03 eta 1 day, 23:43:52
epoch [8/50] batch [1400/2000] time 1.997 (2.031) data 0.000 (0.001) loss 1.1721 (1.3326) lr 1.9921e-03 eta 1 day, 23:43:12
epoch [8/50] batch [1420/2000] time 1.997 (2.031) data 0.000 (0.001) loss 1.5599 (1.3296) lr 1.9921e-03 eta 1 day, 23:42:26
epoch [8/50] batch [1440/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.3680 (1.3274) lr 1.9921e-03 eta 1 day, 23:41:42
epoch [8/50] batch [1460/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.3668 (1.3349) lr 1.9921e-03 eta 1 day, 23:41:00
epoch [8/50] batch [1480/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.3693 (1.3362) lr 1.9921e-03 eta 1 day, 23:40:24
epoch [8/50] batch [1500/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.0890 (1.3366) lr 1.9921e-03 eta 1 day, 23:39:47
epoch [8/50] batch [1520/2000] time 1.999 (2.031) data 0.000 (0.001) loss 1.4114 (1.3328) lr 1.9921e-03 eta 1 day, 23:39:05
epoch [8/50] batch [1540/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.2843 (1.3303) lr 1.9921e-03 eta 1 day, 23:38:22
epoch [8/50] batch [1560/2000] time 1.999 (2.030) data 0.000 (0.001) loss 2.1399 (1.3317) lr 1.9921e-03 eta 1 day, 23:37:31
epoch [8/50] batch [1580/2000] time 2.003 (2.030) data 0.000 (0.001) loss 1.9870 (1.3331) lr 1.9921e-03 eta 1 day, 23:36:54
epoch [8/50] batch [1600/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.1986 (1.3331) lr 1.9921e-03 eta 1 day, 23:36:16
epoch [8/50] batch [1620/2000] time 2.001 (2.030) data 0.000 (0.001) loss 0.4504 (1.3329) lr 1.9921e-03 eta 1 day, 23:35:32
epoch [8/50] batch [1640/2000] time 2.004 (2.031) data 0.000 (0.001) loss 2.5720 (1.3352) lr 1.9921e-03 eta 1 day, 23:34:55
epoch [8/50] batch [1660/2000] time 2.036 (2.031) data 0.000 (0.001) loss 2.7288 (1.3348) lr 1.9921e-03 eta 1 day, 23:34:18
epoch [8/50] batch [1680/2000] time 2.002 (2.031) data 0.000 (0.001) loss 0.0318 (1.3343) lr 1.9921e-03 eta 1 day, 23:33:41
epoch [8/50] batch [1700/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.8522 (1.3290) lr 1.9921e-03 eta 1 day, 23:32:54
epoch [8/50] batch [1720/2000] time 1.974 (2.031) data 0.000 (0.001) loss 4.6076 (1.3292) lr 1.9921e-03 eta 1 day, 23:32:11
epoch [8/50] batch [1740/2000] time 2.054 (2.031) data 0.000 (0.001) loss 3.2630 (1.3282) lr 1.9921e-03 eta 1 day, 23:31:33
epoch [8/50] batch [1760/2000] time 2.052 (2.031) data 0.000 (0.001) loss 2.3393 (1.3321) lr 1.9921e-03 eta 1 day, 23:30:51
epoch [8/50] batch [1780/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.3559 (1.3327) lr 1.9921e-03 eta 1 day, 23:30:03
epoch [8/50] batch [1800/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.6860 (1.3320) lr 1.9921e-03 eta 1 day, 23:29:13
epoch [8/50] batch [1820/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.0847 (1.3318) lr 1.9921e-03 eta 1 day, 23:28:34
epoch [8/50] batch [1840/2000] time 2.026 (2.030) data 0.000 (0.000) loss 2.0617 (1.3276) lr 1.9921e-03 eta 1 day, 23:27:46
epoch [8/50] batch [1860/2000] time 2.049 (2.030) data 0.000 (0.000) loss 1.9203 (1.3273) lr 1.9921e-03 eta 1 day, 23:27:01
epoch [8/50] batch [1880/2000] time 1.997 (2.030) data 0.000 (0.000) loss 1.3402 (1.3242) lr 1.9921e-03 eta 1 day, 23:26:19
epoch [8/50] batch [1900/2000] time 2.053 (2.030) data 0.000 (0.000) loss 0.4757 (1.3206) lr 1.9921e-03 eta 1 day, 23:25:37
epoch [8/50] batch [1920/2000] time 2.028 (2.030) data 0.000 (0.000) loss 1.2200 (1.3214) lr 1.9921e-03 eta 1 day, 23:25:00
epoch [8/50] batch [1940/2000] time 2.051 (2.030) data 0.000 (0.000) loss 3.4484 (1.3205) lr 1.9921e-03 eta 1 day, 23:24:24
epoch [8/50] batch [1960/2000] time 1.998 (2.030) data 0.000 (0.000) loss 1.6522 (1.3223) lr 1.9921e-03 eta 1 day, 23:23:39
epoch [8/50] batch [1980/2000] time 2.025 (2.030) data 0.000 (0.000) loss 0.3908 (1.3191) lr 1.9921e-03 eta 1 day, 23:22:57
epoch [8/50] batch [2000/2000] time 2.047 (2.030) data 0.000 (0.000) loss 2.3388 (1.3217) lr 1.9823e-03 eta 1 day, 23:22:19
epoch [9/50] batch [20/2000] time 2.050 (2.057) data 0.000 (0.027) loss 1.5288 (1.4884) lr 1.9823e-03 eta 1 day, 23:59:33
epoch [9/50] batch [40/2000] time 2.029 (2.043) data 0.000 (0.014) loss 0.7788 (1.5347) lr 1.9823e-03 eta 1 day, 23:38:22
epoch [9/50] batch [60/2000] time 2.054 (2.040) data 0.001 (0.009) loss 2.7132 (1.4748) lr 1.9823e-03 eta 1 day, 23:33:57
epoch [9/50] batch [80/2000] time 2.000 (2.038) data 0.000 (0.007) loss 2.4363 (1.4484) lr 1.9823e-03 eta 1 day, 23:30:10
epoch [9/50] batch [100/2000] time 2.050 (2.036) data 0.000 (0.006) loss 0.4341 (1.4145) lr 1.9823e-03 eta 1 day, 23:27:27
epoch [9/50] batch [120/2000] time 2.053 (2.035) data 0.000 (0.005) loss 0.9572 (1.4184) lr 1.9823e-03 eta 1 day, 23:24:35
epoch [9/50] batch [140/2000] time 1.999 (2.033) data 0.000 (0.004) loss 0.2934 (1.3431) lr 1.9823e-03 eta 1 day, 23:21:27
epoch [9/50] batch [160/2000] time 2.052 (2.033) data 0.000 (0.004) loss 2.1139 (1.3249) lr 1.9823e-03 eta 1 day, 23:20:12
epoch [9/50] batch [180/2000] time 2.027 (2.032) data 0.000 (0.003) loss 1.2784 (1.3130) lr 1.9823e-03 eta 1 day, 23:18:04
epoch [9/50] batch [200/2000] time 1.998 (2.032) data 0.000 (0.003) loss 0.3644 (1.3004) lr 1.9823e-03 eta 1 day, 23:17:29
epoch [9/50] batch [220/2000] time 2.027 (2.032) data 0.000 (0.003) loss 2.6193 (1.2927) lr 1.9823e-03 eta 1 day, 23:17:18
epoch [9/50] batch [240/2000] time 2.051 (2.031) data 0.000 (0.002) loss 2.7048 (1.3230) lr 1.9823e-03 eta 1 day, 23:15:23
epoch [9/50] batch [260/2000] time 2.026 (2.031) data 0.000 (0.002) loss 0.7839 (1.3121) lr 1.9823e-03 eta 1 day, 23:14:52
epoch [9/50] batch [280/2000] time 2.050 (2.031) data 0.000 (0.002) loss 2.2775 (1.3185) lr 1.9823e-03 eta 1 day, 23:13:44
epoch [9/50] batch [300/2000] time 2.051 (2.031) data 0.000 (0.002) loss 0.8663 (1.3220) lr 1.9823e-03 eta 1 day, 23:13:27
epoch [9/50] batch [320/2000] time 2.053 (2.031) data 0.000 (0.002) loss 1.2858 (1.3250) lr 1.9823e-03 eta 1 day, 23:13:01
epoch [9/50] batch [340/2000] time 2.049 (2.031) data 0.000 (0.002) loss 0.7957 (1.3188) lr 1.9823e-03 eta 1 day, 23:11:48
epoch [9/50] batch [360/2000] time 2.051 (2.031) data 0.000 (0.002) loss 1.3537 (1.3172) lr 1.9823e-03 eta 1 day, 23:11:34
epoch [9/50] batch [380/2000] time 2.392 (2.047) data 0.000 (0.002) loss 0.9854 (1.3078) lr 1.9823e-03 eta 1 day, 23:32:49
epoch [9/50] batch [400/2000] time 2.390 (2.063) data 0.000 (0.002) loss 1.5743 (1.2903) lr 1.9823e-03 eta 1 day, 23:54:42
epoch [9/50] batch [420/2000] time 2.386 (2.078) data 0.000 (0.001) loss 2.6591 (1.2800) lr 1.9823e-03 eta 2 days, 0:14:10
epoch [9/50] batch [440/2000] time 2.395 (2.091) data 0.000 (0.001) loss 0.9450 (1.2864) lr 1.9823e-03 eta 2 days, 0:31:45
epoch [9/50] batch [460/2000] time 2.388 (2.103) data 0.000 (0.001) loss 0.1960 (1.2877) lr 1.9823e-03 eta 2 days, 0:47:27
epoch [9/50] batch [480/2000] time 2.381 (2.113) data 0.000 (0.001) loss 0.0370 (1.2865) lr 1.9823e-03 eta 2 days, 1:01:49
epoch [9/50] batch [500/2000] time 2.398 (2.123) data 0.000 (0.001) loss 1.1938 (1.2895) lr 1.9823e-03 eta 2 days, 1:15:10
epoch [9/50] batch [520/2000] time 2.364 (2.133) data 0.000 (0.001) loss 1.0925 (1.2915) lr 1.9823e-03 eta 2 days, 1:27:28
epoch [9/50] batch [540/2000] time 2.332 (2.141) data 0.000 (0.001) loss 2.1411 (1.3037) lr 1.9823e-03 eta 2 days, 1:38:25
epoch [9/50] batch [560/2000] time 2.391 (2.149) data 0.000 (0.001) loss 3.3952 (1.3004) lr 1.9823e-03 eta 2 days, 1:48:48
epoch [9/50] batch [580/2000] time 2.334 (2.157) data 0.000 (0.001) loss 1.1465 (1.3106) lr 1.9823e-03 eta 2 days, 1:58:19
epoch [9/50] batch [600/2000] time 2.369 (2.163) data 0.001 (0.001) loss 2.2088 (1.3096) lr 1.9823e-03 eta 2 days, 2:07:12
epoch [9/50] batch [620/2000] time 2.386 (2.170) data 0.000 (0.001) loss 1.7971 (1.3167) lr 1.9823e-03 eta 2 days, 2:15:42
epoch [9/50] batch [640/2000] time 2.395 (2.176) data 0.000 (0.001) loss 2.2480 (1.3205) lr 1.9823e-03 eta 2 days, 2:22:57
epoch [9/50] batch [660/2000] time 2.407 (2.182) data 0.000 (0.001) loss 1.9771 (1.3125) lr 1.9823e-03 eta 2 days, 2:30:20
epoch [9/50] batch [680/2000] time 2.365 (2.187) data 0.000 (0.001) loss 0.0872 (1.3095) lr 1.9823e-03 eta 2 days, 2:37:06
epoch [9/50] batch [700/2000] time 2.386 (2.192) data 0.000 (0.001) loss 0.2666 (1.3036) lr 1.9823e-03 eta 2 days, 2:43:17
epoch [9/50] batch [720/2000] time 2.375 (2.197) data 0.000 (0.001) loss 0.4778 (1.3050) lr 1.9823e-03 eta 2 days, 2:49:03
epoch [9/50] batch [740/2000] time 2.389 (2.201) data 0.000 (0.001) loss 3.5918 (1.3173) lr 1.9823e-03 eta 2 days, 2:54:37
epoch [9/50] batch [760/2000] time 2.390 (2.206) data 0.000 (0.001) loss 0.8298 (1.3184) lr 1.9823e-03 eta 2 days, 3:00:16
epoch [9/50] batch [780/2000] time 2.377 (2.210) data 0.000 (0.001) loss 0.3381 (1.3170) lr 1.9823e-03 eta 2 days, 3:05:08
epoch [9/50] batch [800/2000] time 2.228 (2.214) data 0.000 (0.001) loss 1.3387 (1.3203) lr 1.9823e-03 eta 2 days, 3:09:39
epoch [9/50] batch [820/2000] time 2.844 (2.226) data 0.000 (0.001) loss 1.2599 (1.3182) lr 1.9823e-03 eta 2 days, 3:26:04
epoch [9/50] batch [840/2000] time 3.033 (2.242) data 0.000 (0.001) loss 2.4853 (1.3177) lr 1.9823e-03 eta 2 days, 3:47:33
epoch [9/50] batch [860/2000] time 2.854 (2.257) data 0.000 (0.001) loss 2.1109 (1.3148) lr 1.9823e-03 eta 2 days, 4:07:39
epoch [9/50] batch [880/2000] time 2.997 (2.272) data 0.000 (0.001) loss 0.2917 (1.3108) lr 1.9823e-03 eta 2 days, 4:27:12
epoch [9/50] batch [900/2000] time 2.555 (2.285) data 0.000 (0.001) loss 0.7886 (1.3135) lr 1.9823e-03 eta 2 days, 4:45:18
epoch [9/50] batch [920/2000] time 3.013 (2.299) data 0.000 (0.001) loss 1.9655 (1.3164) lr 1.9823e-03 eta 2 days, 5:03:13
epoch [9/50] batch [940/2000] time 2.880 (2.311) data 0.000 (0.001) loss 1.7651 (1.3160) lr 1.9823e-03 eta 2 days, 5:19:51
epoch [9/50] batch [960/2000] time 2.886 (2.324) data 0.000 (0.001) loss 1.9787 (1.3190) lr 1.9823e-03 eta 2 days, 5:35:47
epoch [9/50] batch [980/2000] time 3.073 (2.335) data 0.000 (0.001) loss 0.0489 (1.3148) lr 1.9823e-03 eta 2 days, 5:51:19
epoch [9/50] batch [1000/2000] time 2.823 (2.346) data 0.000 (0.001) loss 0.7846 (1.3090) lr 1.9823e-03 eta 2 days, 6:05:57
epoch [9/50] batch [1020/2000] time 2.033 (2.341) data 0.000 (0.001) loss 1.5158 (1.3089) lr 1.9823e-03 eta 2 days, 5:57:54
epoch [9/50] batch [1040/2000] time 2.051 (2.335) data 0.000 (0.001) loss 1.2842 (1.2999) lr 1.9823e-03 eta 2 days, 5:48:50
epoch [9/50] batch [1060/2000] time 2.028 (2.330) data 0.000 (0.001) loss 0.2974 (1.2995) lr 1.9823e-03 eta 2 days, 5:40:09
epoch [9/50] batch [1080/2000] time 2.001 (2.324) data 0.000 (0.001) loss 3.2076 (1.3082) lr 1.9823e-03 eta 2 days, 5:31:40
epoch [9/50] batch [1100/2000] time 2.049 (2.319) data 0.000 (0.001) loss 1.1352 (1.3002) lr 1.9823e-03 eta 2 days, 5:23:34
epoch [9/50] batch [1120/2000] time 2.050 (2.313) data 0.000 (0.001) loss 1.8318 (1.3046) lr 1.9823e-03 eta 2 days, 5:15:35
epoch [9/50] batch [1140/2000] time 2.050 (2.308) data 0.001 (0.001) loss 1.0374 (1.3020) lr 1.9823e-03 eta 2 days, 5:07:55
epoch [9/50] batch [1160/2000] time 1.995 (2.304) data 0.000 (0.001) loss 1.3478 (1.2987) lr 1.9823e-03 eta 2 days, 5:00:23
epoch [9/50] batch [1180/2000] time 2.049 (2.299) data 0.000 (0.001) loss 0.4901 (1.3003) lr 1.9823e-03 eta 2 days, 4:53:09
epoch [9/50] batch [1200/2000] time 2.049 (2.294) data 0.000 (0.001) loss 0.9041 (1.2987) lr 1.9823e-03 eta 2 days, 4:46:19
epoch [9/50] batch [1220/2000] time 2.000 (2.290) data 0.000 (0.001) loss 1.0159 (1.2932) lr 1.9823e-03 eta 2 days, 4:39:36
epoch [9/50] batch [1240/2000] time 2.029 (2.286) data 0.000 (0.001) loss 0.4720 (1.2923) lr 1.9823e-03 eta 2 days, 4:33:04
epoch [9/50] batch [1260/2000] time 1.998 (2.282) data 0.000 (0.001) loss 1.0173 (1.2997) lr 1.9823e-03 eta 2 days, 4:26:43
epoch [9/50] batch [1280/2000] time 1.998 (2.278) data 0.000 (0.001) loss 3.2530 (1.2913) lr 1.9823e-03 eta 2 days, 4:20:31
epoch [9/50] batch [1300/2000] time 1.978 (2.274) data 0.000 (0.001) loss 0.1534 (1.2864) lr 1.9823e-03 eta 2 days, 4:14:27
epoch [9/50] batch [1320/2000] time 2.004 (2.270) data 0.000 (0.001) loss 1.2193 (1.2871) lr 1.9823e-03 eta 2 days, 4:08:35
epoch [9/50] batch [1340/2000] time 2.056 (2.267) data 0.000 (0.001) loss 0.2970 (1.2842) lr 1.9823e-03 eta 2 days, 4:03:00
epoch [9/50] batch [1360/2000] time 2.035 (2.263) data 0.000 (0.001) loss 2.0674 (1.2832) lr 1.9823e-03 eta 2 days, 3:57:26
epoch [9/50] batch [1380/2000] time 2.034 (2.260) data 0.000 (0.001) loss 1.6695 (1.2819) lr 1.9823e-03 eta 2 days, 3:52:07
epoch [9/50] batch [1400/2000] time 2.054 (2.257) data 0.000 (0.001) loss 0.8482 (1.2852) lr 1.9823e-03 eta 2 days, 3:46:57
epoch [9/50] batch [1420/2000] time 2.027 (2.254) data 0.000 (0.001) loss 2.2920 (1.2844) lr 1.9823e-03 eta 2 days, 3:41:49
epoch [9/50] batch [1440/2000] time 2.028 (2.250) data 0.000 (0.001) loss 0.8909 (1.2877) lr 1.9823e-03 eta 2 days, 3:36:32
epoch [9/50] batch [1460/2000] time 1.998 (2.247) data 0.000 (0.001) loss 0.8057 (1.2908) lr 1.9823e-03 eta 2 days, 3:31:34
epoch [9/50] batch [1480/2000] time 1.975 (2.244) data 0.000 (0.001) loss 1.0982 (1.2910) lr 1.9823e-03 eta 2 days, 3:26:46
epoch [9/50] batch [1500/2000] time 2.028 (2.241) data 0.000 (0.001) loss 1.5951 (1.2892) lr 1.9823e-03 eta 2 days, 3:22:02
epoch [9/50] batch [1520/2000] time 1.973 (2.239) data 0.000 (0.001) loss 1.3700 (1.2867) lr 1.9823e-03 eta 2 days, 3:17:23
epoch [9/50] batch [1540/2000] time 2.000 (2.236) data 0.000 (0.001) loss 0.2598 (1.2859) lr 1.9823e-03 eta 2 days, 3:12:49
epoch [9/50] batch [1560/2000] time 2.051 (2.233) data 0.000 (0.001) loss 2.2596 (1.2862) lr 1.9823e-03 eta 2 days, 3:08:25
epoch [9/50] batch [1580/2000] time 1.998 (2.231) data 0.000 (0.001) loss 0.2189 (1.2860) lr 1.9823e-03 eta 2 days, 3:04:06
epoch [9/50] batch [1600/2000] time 1.997 (2.228) data 0.000 (0.001) loss 2.7382 (1.2902) lr 1.9823e-03 eta 2 days, 2:59:47
epoch [9/50] batch [1620/2000] time 1.997 (2.226) data 0.000 (0.001) loss 1.4405 (1.2900) lr 1.9823e-03 eta 2 days, 2:55:38
epoch [9/50] batch [1640/2000] time 1.999 (2.223) data 0.000 (0.001) loss 0.8896 (1.2866) lr 1.9823e-03 eta 2 days, 2:51:31
epoch [9/50] batch [1660/2000] time 1.972 (2.221) data 0.000 (0.001) loss 2.1126 (1.2866) lr 1.9823e-03 eta 2 days, 2:47:34
epoch [9/50] batch [1680/2000] time 2.050 (2.218) data 0.001 (0.001) loss 1.1623 (1.2853) lr 1.9823e-03 eta 2 days, 2:43:45
epoch [9/50] batch [1700/2000] time 1.972 (2.216) data 0.000 (0.001) loss 0.4213 (1.2856) lr 1.9823e-03 eta 2 days, 2:39:46
epoch [9/50] batch [1720/2000] time 2.052 (2.214) data 0.000 (0.001) loss 2.1751 (1.2859) lr 1.9823e-03 eta 2 days, 2:36:06
epoch [9/50] batch [1740/2000] time 2.051 (2.212) data 0.000 (0.001) loss 2.2592 (1.2866) lr 1.9823e-03 eta 2 days, 2:32:26
epoch [9/50] batch [1760/2000] time 2.001 (2.210) data 0.000 (0.001) loss 0.8373 (1.2893) lr 1.9823e-03 eta 2 days, 2:28:54
epoch [9/50] batch [1780/2000] time 2.030 (2.208) data 0.000 (0.001) loss 1.3786 (1.2903) lr 1.9823e-03 eta 2 days, 2:25:25
epoch [9/50] batch [1800/2000] time 2.029 (2.206) data 0.000 (0.001) loss 1.1492 (1.2889) lr 1.9823e-03 eta 2 days, 2:21:57
epoch [9/50] batch [1820/2000] time 2.054 (2.204) data 0.000 (0.001) loss 0.2639 (1.2866) lr 1.9823e-03 eta 2 days, 2:18:33
epoch [9/50] batch [1840/2000] time 1.996 (2.202) data 0.000 (0.001) loss 0.6895 (1.2878) lr 1.9823e-03 eta 2 days, 2:15:06
epoch [9/50] batch [1860/2000] time 2.058 (2.200) data 0.000 (0.001) loss 2.3398 (1.2923) lr 1.9823e-03 eta 2 days, 2:11:50
epoch [9/50] batch [1880/2000] time 2.006 (2.198) data 0.000 (0.001) loss 1.5612 (1.2916) lr 1.9823e-03 eta 2 days, 2:08:40
epoch [9/50] batch [1900/2000] time 2.034 (2.197) data 0.000 (0.001) loss 2.2399 (1.2913) lr 1.9823e-03 eta 2 days, 2:05:33
epoch [9/50] batch [1920/2000] time 2.057 (2.195) data 0.000 (0.001) loss 2.3538 (1.2935) lr 1.9823e-03 eta 2 days, 2:02:29
epoch [9/50] batch [1940/2000] time 2.058 (2.193) data 0.000 (0.000) loss 1.2927 (1.2926) lr 1.9823e-03 eta 2 days, 1:59:30
epoch [9/50] batch [1960/2000] time 2.002 (2.192) data 0.000 (0.000) loss 0.3754 (1.2927) lr 1.9823e-03 eta 2 days, 1:56:32
epoch [9/50] batch [1980/2000] time 2.059 (2.190) data 0.000 (0.000) loss 0.1194 (1.2880) lr 1.9823e-03 eta 2 days, 1:53:38
epoch [9/50] batch [2000/2000] time 1.975 (2.188) data 0.000 (0.000) loss 1.3957 (1.2897) lr 1.9686e-03 eta 2 days, 1:50:45
epoch [10/50] batch [20/2000] time 1.998 (2.049) data 0.000 (0.027) loss 1.1138 (1.2667) lr 1.9686e-03 eta 1 day, 22:39:41
epoch [10/50] batch [40/2000] time 2.051 (2.038) data 0.000 (0.013) loss 1.8765 (1.1207) lr 1.9686e-03 eta 1 day, 22:24:34
epoch [10/50] batch [60/2000] time 2.056 (2.034) data 0.001 (0.009) loss 2.7242 (1.1355) lr 1.9686e-03 eta 1 day, 22:18:18
epoch [10/50] batch [80/2000] time 1.999 (2.034) data 0.000 (0.007) loss 0.4530 (1.1985) lr 1.9686e-03 eta 1 day, 22:17:26
epoch [10/50] batch [100/2000] time 2.028 (2.034) data 0.000 (0.005) loss 1.1111 (1.1931) lr 1.9686e-03 eta 1 day, 22:16:49
epoch [10/50] batch [120/2000] time 2.028 (2.035) data 0.000 (0.005) loss 0.6748 (1.2714) lr 1.9686e-03 eta 1 day, 22:17:01
epoch [10/50] batch [140/2000] time 2.053 (2.034) data 0.000 (0.004) loss 1.5671 (1.2733) lr 1.9686e-03 eta 1 day, 22:15:37
epoch [10/50] batch [160/2000] time 2.053 (2.035) data 0.000 (0.003) loss 2.0414 (1.2569) lr 1.9686e-03 eta 1 day, 22:15:04
epoch [10/50] batch [180/2000] time 2.029 (2.034) data 0.000 (0.003) loss 0.5855 (1.2515) lr 1.9686e-03 eta 1 day, 22:13:26
epoch [10/50] batch [200/2000] time 2.054 (2.034) data 0.000 (0.003) loss 0.2088 (1.2366) lr 1.9686e-03 eta 1 day, 22:13:29
epoch [10/50] batch [220/2000] time 2.025 (2.034) data 0.000 (0.003) loss 1.8247 (1.2189) lr 1.9686e-03 eta 1 day, 22:12:30
epoch [10/50] batch [240/2000] time 1.995 (2.033) data 0.000 (0.002) loss 0.3109 (1.2031) lr 1.9686e-03 eta 1 day, 22:10:49
epoch [10/50] batch [260/2000] time 1.995 (2.032) data 0.000 (0.002) loss 0.6843 (1.1865) lr 1.9686e-03 eta 1 day, 22:08:41
epoch [10/50] batch [280/2000] time 2.049 (2.032) data 0.000 (0.002) loss 3.1833 (1.2166) lr 1.9686e-03 eta 1 day, 22:08:00
epoch [10/50] batch [300/2000] time 1.997 (2.032) data 0.000 (0.002) loss 1.5101 (1.2022) lr 1.9686e-03 eta 1 day, 22:06:55
epoch [10/50] batch [320/2000] time 2.051 (2.031) data 0.000 (0.002) loss 2.9361 (1.1966) lr 1.9686e-03 eta 1 day, 22:05:30
epoch [10/50] batch [340/2000] time 2.027 (2.031) data 0.000 (0.002) loss 0.3438 (1.1841) lr 1.9686e-03 eta 1 day, 22:04:28
epoch [10/50] batch [360/2000] time 1.996 (2.031) data 0.000 (0.002) loss 2.5885 (1.2027) lr 1.9686e-03 eta 1 day, 22:03:29
epoch [10/50] batch [380/2000] time 2.050 (2.031) data 0.000 (0.002) loss 0.6539 (1.2009) lr 1.9686e-03 eta 1 day, 22:02:49
epoch [10/50] batch [400/2000] time 1.995 (2.031) data 0.000 (0.002) loss 0.4321 (1.2102) lr 1.9686e-03 eta 1 day, 22:01:50
epoch [10/50] batch [420/2000] time 2.026 (2.031) data 0.000 (0.001) loss 1.1556 (1.2172) lr 1.9686e-03 eta 1 day, 22:01:04
epoch [10/50] batch [440/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.9596 (1.2199) lr 1.9686e-03 eta 1 day, 22:00:05
epoch [10/50] batch [460/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.0104 (1.2143) lr 1.9686e-03 eta 1 day, 21:59:51
epoch [10/50] batch [480/2000] time 2.051 (2.031) data 0.000 (0.001) loss 2.8923 (1.2104) lr 1.9686e-03 eta 1 day, 21:59:07
epoch [10/50] batch [500/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.9650 (1.2058) lr 1.9686e-03 eta 1 day, 21:57:37
epoch [10/50] batch [520/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.7487 (1.2208) lr 1.9686e-03 eta 1 day, 21:56:47
epoch [10/50] batch [540/2000] time 2.056 (2.030) data 0.000 (0.001) loss 1.7212 (1.2165) lr 1.9686e-03 eta 1 day, 21:56:09
epoch [10/50] batch [560/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.6949 (1.2220) lr 1.9686e-03 eta 1 day, 21:55:18
epoch [10/50] batch [580/2000] time 1.994 (2.030) data 0.000 (0.001) loss 0.8067 (1.2263) lr 1.9686e-03 eta 1 day, 21:54:15
epoch [10/50] batch [600/2000] time 1.996 (2.030) data 0.001 (0.001) loss 1.3829 (1.2392) lr 1.9686e-03 eta 1 day, 21:53:27
epoch [10/50] batch [620/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.6649 (1.2449) lr 1.9686e-03 eta 1 day, 21:52:45
epoch [10/50] batch [640/2000] time 2.026 (2.030) data 0.000 (0.001) loss 2.9473 (1.2430) lr 1.9686e-03 eta 1 day, 21:52:18
epoch [10/50] batch [660/2000] time 2.049 (2.030) data 0.000 (0.001) loss 2.5795 (1.2447) lr 1.9686e-03 eta 1 day, 21:51:34
epoch [10/50] batch [680/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.6674 (1.2480) lr 1.9686e-03 eta 1 day, 21:50:32
epoch [10/50] batch [700/2000] time 2.052 (2.029) data 0.000 (0.001) loss 3.9155 (1.2482) lr 1.9686e-03 eta 1 day, 21:49:44
epoch [10/50] batch [720/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.5229 (1.2486) lr 1.9686e-03 eta 1 day, 21:48:54
epoch [10/50] batch [740/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.8787 (1.2473) lr 1.9686e-03 eta 1 day, 21:47:58
epoch [10/50] batch [760/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.0690 (1.2453) lr 1.9686e-03 eta 1 day, 21:47:24
epoch [10/50] batch [780/2000] time 2.028 (2.029) data 0.000 (0.001) loss 3.8244 (1.2533) lr 1.9686e-03 eta 1 day, 21:46:33
epoch [10/50] batch [800/2000] time 2.055 (2.029) data 0.000 (0.001) loss 1.3141 (1.2548) lr 1.9686e-03 eta 1 day, 21:45:50
epoch [10/50] batch [820/2000] time 1.995 (2.029) data 0.000 (0.001) loss 0.5302 (1.2547) lr 1.9686e-03 eta 1 day, 21:44:53
epoch [10/50] batch [840/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.0293 (1.2479) lr 1.9686e-03 eta 1 day, 21:44:06
epoch [10/50] batch [860/2000] time 1.998 (2.029) data 0.000 (0.001) loss 2.4237 (1.2403) lr 1.9686e-03 eta 1 day, 21:43:33
epoch [10/50] batch [880/2000] time 2.051 (2.029) data 0.000 (0.001) loss 4.0445 (1.2426) lr 1.9686e-03 eta 1 day, 21:42:50
epoch [10/50] batch [900/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.1133 (1.2474) lr 1.9686e-03 eta 1 day, 21:42:18
epoch [10/50] batch [920/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.5697 (1.2441) lr 1.9686e-03 eta 1 day, 21:41:29
epoch [10/50] batch [940/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.5007 (1.2499) lr 1.9686e-03 eta 1 day, 21:40:47
epoch [10/50] batch [960/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.0678 (1.2490) lr 1.9686e-03 eta 1 day, 21:39:56
epoch [10/50] batch [980/2000] time 2.056 (2.029) data 0.000 (0.001) loss 0.8435 (1.2582) lr 1.9686e-03 eta 1 day, 21:39:17
epoch [10/50] batch [1000/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.1148 (1.2507) lr 1.9686e-03 eta 1 day, 21:38:35
epoch [10/50] batch [1020/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.3220 (1.2524) lr 1.9686e-03 eta 1 day, 21:37:48
epoch [10/50] batch [1040/2000] time 1.998 (2.028) data 0.000 (0.001) loss 1.3097 (1.2470) lr 1.9686e-03 eta 1 day, 21:37:04
epoch [10/50] batch [1060/2000] time 2.003 (2.028) data 0.000 (0.001) loss 1.8700 (1.2513) lr 1.9686e-03 eta 1 day, 21:36:15
epoch [10/50] batch [1080/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.4087 (1.2594) lr 1.9686e-03 eta 1 day, 21:35:25
epoch [10/50] batch [1100/2000] time 2.032 (2.028) data 0.000 (0.001) loss 2.3061 (1.2633) lr 1.9686e-03 eta 1 day, 21:34:59
epoch [10/50] batch [1120/2000] time 2.054 (2.029) data 0.000 (0.001) loss 2.7259 (1.2589) lr 1.9686e-03 eta 1 day, 21:34:25
epoch [10/50] batch [1140/2000] time 2.054 (2.028) data 0.001 (0.001) loss 0.6506 (1.2547) lr 1.9686e-03 eta 1 day, 21:33:43
epoch [10/50] batch [1160/2000] time 2.031 (2.029) data 0.000 (0.001) loss 0.4965 (1.2537) lr 1.9686e-03 eta 1 day, 21:33:11
epoch [10/50] batch [1180/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.4073 (1.2510) lr 1.9686e-03 eta 1 day, 21:32:35
epoch [10/50] batch [1200/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.8594 (1.2509) lr 1.9686e-03 eta 1 day, 21:31:48
epoch [10/50] batch [1220/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.5775 (1.2568) lr 1.9686e-03 eta 1 day, 21:31:04
epoch [10/50] batch [1240/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.0322 (1.2538) lr 1.9686e-03 eta 1 day, 21:30:20
epoch [10/50] batch [1260/2000] time 1.995 (2.028) data 0.000 (0.001) loss 1.1690 (1.2538) lr 1.9686e-03 eta 1 day, 21:29:34
epoch [10/50] batch [1280/2000] time 1.997 (2.028) data 0.000 (0.001) loss 2.3253 (1.2589) lr 1.9686e-03 eta 1 day, 21:28:47
epoch [10/50] batch [1300/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.6541 (1.2567) lr 1.9686e-03 eta 1 day, 21:27:58
epoch [10/50] batch [1320/2000] time 2.052 (2.028) data 0.000 (0.001) loss 1.7044 (1.2588) lr 1.9686e-03 eta 1 day, 21:27:26
epoch [10/50] batch [1340/2000] time 2.054 (2.028) data 0.000 (0.001) loss 2.3947 (1.2589) lr 1.9686e-03 eta 1 day, 21:26:52
epoch [10/50] batch [1360/2000] time 2.026 (2.028) data 0.000 (0.001) loss 1.2530 (1.2609) lr 1.9686e-03 eta 1 day, 21:26:12
epoch [10/50] batch [1380/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.4913 (1.2616) lr 1.9686e-03 eta 1 day, 21:25:30
epoch [10/50] batch [1400/2000] time 1.975 (2.028) data 0.000 (0.001) loss 0.7301 (1.2686) lr 1.9686e-03 eta 1 day, 21:24:46
epoch [10/50] batch [1420/2000] time 1.996 (2.028) data 0.000 (0.001) loss 1.2749 (1.2703) lr 1.9686e-03 eta 1 day, 21:24:07
epoch [10/50] batch [1440/2000] time 2.028 (2.028) data 0.000 (0.001) loss 0.2816 (1.2685) lr 1.9686e-03 eta 1 day, 21:23:21
epoch [10/50] batch [1460/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.2972 (1.2661) lr 1.9686e-03 eta 1 day, 21:22:32
epoch [10/50] batch [1480/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.3601 (1.2656) lr 1.9686e-03 eta 1 day, 21:21:55
epoch [10/50] batch [1500/2000] time 1.973 (2.028) data 0.000 (0.001) loss 1.0553 (1.2623) lr 1.9686e-03 eta 1 day, 21:21:05
epoch [10/50] batch [1520/2000] time 2.056 (2.028) data 0.000 (0.001) loss 2.2350 (1.2600) lr 1.9686e-03 eta 1 day, 21:20:24
epoch [10/50] batch [1540/2000] time 2.055 (2.028) data 0.000 (0.001) loss 2.6139 (1.2614) lr 1.9686e-03 eta 1 day, 21:19:57
epoch [10/50] batch [1560/2000] time 2.033 (2.028) data 0.000 (0.001) loss 0.4919 (1.2605) lr 1.9686e-03 eta 1 day, 21:19:18
epoch [10/50] batch [1580/2000] time 2.055 (2.028) data 0.000 (0.001) loss 0.8984 (1.2582) lr 1.9686e-03 eta 1 day, 21:18:46
epoch [10/50] batch [1600/2000] time 2.033 (2.029) data 0.000 (0.001) loss 1.4529 (1.2587) lr 1.9686e-03 eta 1 day, 21:18:13
epoch [10/50] batch [1620/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.8930 (1.2645) lr 1.9686e-03 eta 1 day, 21:17:37
epoch [10/50] batch [1640/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.0452 (1.2639) lr 1.9686e-03 eta 1 day, 21:16:59
epoch [10/50] batch [1660/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.6552 (1.2663) lr 1.9686e-03 eta 1 day, 21:16:16
epoch [10/50] batch [1680/2000] time 2.031 (2.029) data 0.001 (0.001) loss 0.6683 (1.2698) lr 1.9686e-03 eta 1 day, 21:15:42
epoch [10/50] batch [1700/2000] time 1.978 (2.029) data 0.000 (0.001) loss 0.2583 (1.2720) lr 1.9686e-03 eta 1 day, 21:15:01
epoch [10/50] batch [1720/2000] time 1.978 (2.029) data 0.000 (0.001) loss 0.7019 (1.2693) lr 1.9686e-03 eta 1 day, 21:14:26
epoch [10/50] batch [1740/2000] time 2.004 (2.029) data 0.000 (0.001) loss 1.1620 (1.2678) lr 1.9686e-03 eta 1 day, 21:13:54
epoch [10/50] batch [1760/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.6273 (1.2656) lr 1.9686e-03 eta 1 day, 21:13:16
epoch [10/50] batch [1780/2000] time 2.050 (2.029) data 0.000 (0.000) loss 2.9924 (1.2645) lr 1.9686e-03 eta 1 day, 21:12:39
epoch [10/50] batch [1800/2000] time 2.053 (2.029) data 0.000 (0.000) loss 3.4642 (1.2681) lr 1.9686e-03 eta 1 day, 21:12:06
epoch [10/50] batch [1820/2000] time 2.028 (2.029) data 0.000 (0.000) loss 0.6328 (1.2670) lr 1.9686e-03 eta 1 day, 21:11:26
epoch [10/50] batch [1840/2000] time 2.002 (2.029) data 0.000 (0.000) loss 1.1970 (1.2677) lr 1.9686e-03 eta 1 day, 21:10:45
epoch [10/50] batch [1860/2000] time 2.030 (2.029) data 0.000 (0.000) loss 0.8687 (1.2708) lr 1.9686e-03 eta 1 day, 21:10:08
epoch [10/50] batch [1880/2000] time 2.057 (2.029) data 0.000 (0.000) loss 1.5181 (1.2762) lr 1.9686e-03 eta 1 day, 21:09:33
epoch [10/50] batch [1900/2000] time 2.033 (2.029) data 0.000 (0.000) loss 0.4265 (1.2780) lr 1.9686e-03 eta 1 day, 21:08:56
epoch [10/50] batch [1920/2000] time 2.032 (2.029) data 0.000 (0.000) loss 5.7950 (1.2842) lr 1.9686e-03 eta 1 day, 21:08:19
epoch [10/50] batch [1940/2000] time 2.051 (2.029) data 0.000 (0.000) loss 4.1146 (1.2856) lr 1.9686e-03 eta 1 day, 21:07:40
epoch [10/50] batch [1960/2000] time 2.052 (2.029) data 0.000 (0.000) loss 0.3336 (1.2829) lr 1.9686e-03 eta 1 day, 21:07:00
epoch [10/50] batch [1980/2000] time 2.051 (2.029) data 0.000 (0.000) loss 0.8488 (1.2827) lr 1.9686e-03 eta 1 day, 21:06:25
epoch [10/50] batch [2000/2000] time 2.028 (2.029) data 0.000 (0.000) loss 1.4350 (1.2836) lr 1.9511e-03 eta 1 day, 21:05:46
epoch [11/50] batch [20/2000] time 2.028 (2.056) data 0.000 (0.026) loss 1.7234 (1.2394) lr 1.9511e-03 eta 1 day, 21:41:09
epoch [11/50] batch [40/2000] time 2.029 (2.043) data 0.000 (0.013) loss 1.6438 (1.3261) lr 1.9511e-03 eta 1 day, 21:22:26
epoch [11/50] batch [60/2000] time 2.051 (2.039) data 0.001 (0.009) loss 1.3541 (1.2691) lr 1.9511e-03 eta 1 day, 21:17:07
epoch [11/50] batch [80/2000] time 1.999 (2.037) data 0.000 (0.007) loss 0.2007 (1.2421) lr 1.9511e-03 eta 1 day, 21:12:42
epoch [11/50] batch [100/2000] time 2.027 (2.036) data 0.000 (0.005) loss 0.5963 (1.2613) lr 1.9511e-03 eta 1 day, 21:11:48
epoch [11/50] batch [120/2000] time 2.029 (2.035) data 0.000 (0.005) loss 0.7773 (1.2562) lr 1.9511e-03 eta 1 day, 21:08:43
epoch [11/50] batch [140/2000] time 1.999 (2.033) data 0.000 (0.004) loss 2.0979 (1.2565) lr 1.9511e-03 eta 1 day, 21:06:22
epoch [11/50] batch [160/2000] time 1.998 (2.033) data 0.000 (0.003) loss 0.9615 (1.2635) lr 1.9511e-03 eta 1 day, 21:04:57
epoch [11/50] batch [180/2000] time 2.001 (2.032) data 0.000 (0.003) loss 3.8612 (1.3337) lr 1.9511e-03 eta 1 day, 21:03:16
epoch [11/50] batch [200/2000] time 2.027 (2.031) data 0.000 (0.003) loss 1.0417 (1.3645) lr 1.9511e-03 eta 1 day, 21:01:53
epoch [11/50] batch [220/2000] time 1.995 (2.031) data 0.000 (0.003) loss 1.7330 (1.3453) lr 1.9511e-03 eta 1 day, 21:00:02
epoch [11/50] batch [240/2000] time 2.027 (2.030) data 0.000 (0.002) loss 1.3976 (1.3241) lr 1.9511e-03 eta 1 day, 20:59:10
epoch [11/50] batch [260/2000] time 1.996 (2.030) data 0.000 (0.002) loss 1.1238 (1.3359) lr 1.9511e-03 eta 1 day, 20:58:08
epoch [11/50] batch [280/2000] time 2.000 (2.029) data 0.000 (0.002) loss 1.8870 (1.3261) lr 1.9511e-03 eta 1 day, 20:56:19
epoch [11/50] batch [300/2000] time 1.998 (2.030) data 0.000 (0.002) loss 1.3194 (1.3248) lr 1.9511e-03 eta 1 day, 20:55:56
epoch [11/50] batch [320/2000] time 2.052 (2.029) data 0.000 (0.002) loss 2.4210 (1.3035) lr 1.9511e-03 eta 1 day, 20:54:48
epoch [11/50] batch [340/2000] time 2.053 (2.029) data 0.000 (0.002) loss 0.6162 (1.2990) lr 1.9511e-03 eta 1 day, 20:54:23
epoch [11/50] batch [360/2000] time 2.050 (2.029) data 0.000 (0.002) loss 0.2798 (1.3045) lr 1.9511e-03 eta 1 day, 20:53:26
epoch [11/50] batch [380/2000] time 2.000 (2.029) data 0.000 (0.002) loss 1.3379 (1.2946) lr 1.9511e-03 eta 1 day, 20:52:43
epoch [11/50] batch [400/2000] time 1.999 (2.029) data 0.000 (0.002) loss 0.2861 (1.2759) lr 1.9511e-03 eta 1 day, 20:52:01
epoch [11/50] batch [420/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.0352 (1.2665) lr 1.9511e-03 eta 1 day, 20:51:15
epoch [11/50] batch [440/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.9673 (1.2684) lr 1.9511e-03 eta 1 day, 20:50:18
epoch [11/50] batch [460/2000] time 1.994 (2.029) data 0.000 (0.001) loss 3.0916 (1.2827) lr 1.9511e-03 eta 1 day, 20:49:53
epoch [11/50] batch [480/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.3920 (1.3080) lr 1.9511e-03 eta 1 day, 20:49:10
epoch [11/50] batch [500/2000] time 2.057 (2.029) data 0.000 (0.001) loss 1.3074 (1.2943) lr 1.9511e-03 eta 1 day, 20:48:25
epoch [11/50] batch [520/2000] time 2.035 (2.029) data 0.000 (0.001) loss 1.2161 (1.2926) lr 1.9511e-03 eta 1 day, 20:47:58
epoch [11/50] batch [540/2000] time 2.029 (2.030) data 0.000 (0.001) loss 2.2466 (1.2951) lr 1.9511e-03 eta 1 day, 20:47:53
epoch [11/50] batch [560/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.7577 (1.2898) lr 1.9511e-03 eta 1 day, 20:47:29
epoch [11/50] batch [580/2000] time 2.047 (2.030) data 0.000 (0.001) loss 1.5991 (1.2834) lr 1.9511e-03 eta 1 day, 20:46:43
epoch [11/50] batch [600/2000] time 2.048 (2.030) data 0.001 (0.001) loss 2.6252 (1.2910) lr 1.9511e-03 eta 1 day, 20:45:49
epoch [11/50] batch [620/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.7372 (1.2905) lr 1.9511e-03 eta 1 day, 20:45:15
epoch [11/50] batch [640/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.2297 (1.2818) lr 1.9511e-03 eta 1 day, 20:44:09
epoch [11/50] batch [660/2000] time 1.973 (2.029) data 0.000 (0.001) loss 0.7999 (1.2799) lr 1.9511e-03 eta 1 day, 20:43:34
epoch [11/50] batch [680/2000] time 1.994 (2.029) data 0.000 (0.001) loss 0.0817 (1.2783) lr 1.9511e-03 eta 1 day, 20:42:59
epoch [11/50] batch [700/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.2503 (1.2761) lr 1.9511e-03 eta 1 day, 20:42:23
epoch [11/50] batch [720/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.8242 (1.2703) lr 1.9511e-03 eta 1 day, 20:41:23
epoch [11/50] batch [740/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.4458 (1.2643) lr 1.9511e-03 eta 1 day, 20:40:38
epoch [11/50] batch [760/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.4410 (1.2567) lr 1.9511e-03 eta 1 day, 20:39:49
epoch [11/50] batch [780/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.9672 (1.2539) lr 1.9511e-03 eta 1 day, 20:39:18
epoch [11/50] batch [800/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.4552 (1.2597) lr 1.9511e-03 eta 1 day, 20:38:39
epoch [11/50] batch [820/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.7426 (1.2588) lr 1.9511e-03 eta 1 day, 20:38:01
epoch [11/50] batch [840/2000] time 2.003 (2.029) data 0.000 (0.001) loss 0.8380 (1.2609) lr 1.9511e-03 eta 1 day, 20:37:17
epoch [11/50] batch [860/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.5651 (1.2597) lr 1.9511e-03 eta 1 day, 20:36:40
epoch [11/50] batch [880/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.7225 (1.2578) lr 1.9511e-03 eta 1 day, 20:35:46
epoch [11/50] batch [900/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.2329 (1.2640) lr 1.9511e-03 eta 1 day, 20:35:11
epoch [11/50] batch [920/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.6184 (1.2612) lr 1.9511e-03 eta 1 day, 20:34:30
epoch [11/50] batch [940/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.2186 (1.2691) lr 1.9511e-03 eta 1 day, 20:33:54
epoch [11/50] batch [960/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.2831 (1.2744) lr 1.9511e-03 eta 1 day, 20:33:11
epoch [11/50] batch [980/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.2522 (1.2686) lr 1.9511e-03 eta 1 day, 20:32:23
epoch [11/50] batch [1000/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.2807 (1.2701) lr 1.9511e-03 eta 1 day, 20:31:36
epoch [11/50] batch [1020/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.0103 (1.2662) lr 1.9511e-03 eta 1 day, 20:30:56
epoch [11/50] batch [1040/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.6345 (1.2634) lr 1.9511e-03 eta 1 day, 20:30:17
epoch [11/50] batch [1060/2000] time 2.000 (2.029) data 0.000 (0.001) loss 1.0253 (1.2684) lr 1.9511e-03 eta 1 day, 20:29:32
epoch [11/50] batch [1080/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.0717 (1.2681) lr 1.9511e-03 eta 1 day, 20:28:55
epoch [11/50] batch [1100/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.1595 (1.2689) lr 1.9511e-03 eta 1 day, 20:28:06
epoch [11/50] batch [1120/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.4135 (1.2715) lr 1.9511e-03 eta 1 day, 20:27:31
epoch [11/50] batch [1140/2000] time 1.997 (2.029) data 0.001 (0.001) loss 0.5638 (1.2714) lr 1.9511e-03 eta 1 day, 20:26:49
epoch [11/50] batch [1160/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.1197 (1.2718) lr 1.9511e-03 eta 1 day, 20:26:06
epoch [11/50] batch [1180/2000] time 1.999 (2.029) data 0.000 (0.001) loss 1.2660 (1.2673) lr 1.9511e-03 eta 1 day, 20:25:27
epoch [11/50] batch [1200/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.4190 (1.2643) lr 1.9511e-03 eta 1 day, 20:24:36
epoch [11/50] batch [1220/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.5853 (1.2607) lr 1.9511e-03 eta 1 day, 20:23:46
epoch [11/50] batch [1240/2000] time 2.057 (2.029) data 0.000 (0.001) loss 0.9266 (1.2589) lr 1.9511e-03 eta 1 day, 20:23:08
epoch [11/50] batch [1260/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.4111 (1.2575) lr 1.9511e-03 eta 1 day, 20:22:33
epoch [11/50] batch [1280/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.4060 (1.2563) lr 1.9511e-03 eta 1 day, 20:21:51
epoch [11/50] batch [1300/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.2887 (1.2559) lr 1.9511e-03 eta 1 day, 20:21:08
epoch [11/50] batch [1320/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.9180 (1.2537) lr 1.9511e-03 eta 1 day, 20:20:13
epoch [11/50] batch [1340/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.6561 (1.2486) lr 1.9511e-03 eta 1 day, 20:19:35
epoch [11/50] batch [1360/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.3253 (1.2492) lr 1.9511e-03 eta 1 day, 20:18:46
epoch [11/50] batch [1380/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.6518 (1.2509) lr 1.9511e-03 eta 1 day, 20:18:10
epoch [11/50] batch [1400/2000] time 2.050 (2.029) data 0.000 (0.001) loss 3.1377 (1.2570) lr 1.9511e-03 eta 1 day, 20:17:32
epoch [11/50] batch [1420/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.3798 (1.2545) lr 1.9511e-03 eta 1 day, 20:16:54
epoch [11/50] batch [1440/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.1368 (1.2519) lr 1.9511e-03 eta 1 day, 20:16:22
epoch [11/50] batch [1460/2000] time 1.998 (2.029) data 0.000 (0.001) loss 2.8358 (1.2546) lr 1.9511e-03 eta 1 day, 20:15:48
epoch [11/50] batch [1480/2000] time 1.992 (2.029) data 0.000 (0.001) loss 2.2908 (1.2517) lr 1.9511e-03 eta 1 day, 20:15:08
epoch [11/50] batch [1500/2000] time 2.046 (2.029) data 0.000 (0.001) loss 0.3933 (1.2514) lr 1.9511e-03 eta 1 day, 20:14:19
epoch [11/50] batch [1520/2000] time 2.047 (2.029) data 0.000 (0.001) loss 2.1163 (1.2502) lr 1.9511e-03 eta 1 day, 20:13:40
epoch [11/50] batch [1540/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.1100 (1.2489) lr 1.9511e-03 eta 1 day, 20:13:03
epoch [11/50] batch [1560/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.2432 (1.2533) lr 1.9511e-03 eta 1 day, 20:12:19
epoch [11/50] batch [1580/2000] time 2.052 (2.029) data 0.000 (0.001) loss 2.4098 (1.2501) lr 1.9511e-03 eta 1 day, 20:11:49
epoch [11/50] batch [1600/2000] time 2.001 (2.029) data 0.000 (0.001) loss 0.1886 (1.2473) lr 1.9511e-03 eta 1 day, 20:11:05
epoch [11/50] batch [1620/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.6766 (1.2437) lr 1.9511e-03 eta 1 day, 20:10:21
epoch [11/50] batch [1640/2000] time 1.997 (2.029) data 0.000 (0.001) loss 2.3025 (1.2449) lr 1.9511e-03 eta 1 day, 20:09:38
epoch [11/50] batch [1660/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.6909 (1.2478) lr 1.9511e-03 eta 1 day, 20:08:49
epoch [11/50] batch [1680/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.1050 (1.2437) lr 1.9511e-03 eta 1 day, 20:08:05
epoch [11/50] batch [1700/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.8560 (1.2457) lr 1.9511e-03 eta 1 day, 20:07:21
epoch [11/50] batch [1720/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.7651 (1.2494) lr 1.9511e-03 eta 1 day, 20:06:38
epoch [11/50] batch [1740/2000] time 2.027 (2.028) data 0.000 (0.000) loss 1.0574 (1.2525) lr 1.9511e-03 eta 1 day, 20:05:47
epoch [11/50] batch [1760/2000] time 2.028 (2.029) data 0.000 (0.000) loss 0.3392 (1.2530) lr 1.9511e-03 eta 1 day, 20:05:13
epoch [11/50] batch [1780/2000] time 2.030 (2.029) data 0.000 (0.000) loss 0.2910 (1.2541) lr 1.9511e-03 eta 1 day, 20:04:35
epoch [11/50] batch [1800/2000] time 2.052 (2.029) data 0.000 (0.000) loss 0.9064 (1.2529) lr 1.9511e-03 eta 1 day, 20:03:52
epoch [11/50] batch [1820/2000] time 2.052 (2.029) data 0.000 (0.000) loss 1.3534 (1.2559) lr 1.9511e-03 eta 1 day, 20:03:14
epoch [11/50] batch [1840/2000] time 2.030 (2.029) data 0.000 (0.000) loss 1.6610 (1.2559) lr 1.9511e-03 eta 1 day, 20:02:32
epoch [11/50] batch [1860/2000] time 2.050 (2.029) data 0.000 (0.000) loss 1.2423 (1.2543) lr 1.9511e-03 eta 1 day, 20:01:51
epoch [11/50] batch [1880/2000] time 2.049 (2.029) data 0.000 (0.000) loss 1.2589 (1.2542) lr 1.9511e-03 eta 1 day, 20:01:09
epoch [11/50] batch [1900/2000] time 1.997 (2.029) data 0.000 (0.000) loss 1.4974 (1.2525) lr 1.9511e-03 eta 1 day, 20:00:30
epoch [11/50] batch [1920/2000] time 1.998 (2.029) data 0.000 (0.000) loss 1.0449 (1.2557) lr 1.9511e-03 eta 1 day, 19:59:51
epoch [11/50] batch [1940/2000] time 2.027 (2.029) data 0.000 (0.000) loss 1.2548 (1.2554) lr 1.9511e-03 eta 1 day, 19:59:12
epoch [11/50] batch [1960/2000] time 2.002 (2.029) data 0.000 (0.000) loss 2.6439 (1.2556) lr 1.9511e-03 eta 1 day, 19:58:28
epoch [11/50] batch [1980/2000] time 2.011 (2.029) data 0.000 (0.000) loss 0.5716 (1.2549) lr 1.9511e-03 eta 1 day, 19:57:50
epoch [11/50] batch [2000/2000] time 2.049 (2.029) data 0.000 (0.000) loss 1.0380 (1.2540) lr 1.9298e-03 eta 1 day, 19:57:11
epoch [12/50] batch [20/2000] time 2.055 (2.055) data 0.000 (0.027) loss 0.8566 (1.0289) lr 1.9298e-03 eta 1 day, 20:30:13
epoch [12/50] batch [40/2000] time 2.051 (2.043) data 0.000 (0.014) loss 1.7521 (1.0622) lr 1.9298e-03 eta 1 day, 20:15:09
epoch [12/50] batch [60/2000] time 2.029 (2.040) data 0.001 (0.009) loss 1.5403 (1.0841) lr 1.9298e-03 eta 1 day, 20:10:11
epoch [12/50] batch [80/2000] time 2.051 (2.037) data 0.000 (0.007) loss 3.9426 (1.2574) lr 1.9298e-03 eta 1 day, 20:05:09
epoch [12/50] batch [100/2000] time 2.053 (2.036) data 0.000 (0.006) loss 0.5090 (1.3133) lr 1.9298e-03 eta 1 day, 20:03:41
epoch [12/50] batch [120/2000] time 2.052 (2.035) data 0.000 (0.005) loss 1.7478 (1.3692) lr 1.9298e-03 eta 1 day, 20:01:26
epoch [12/50] batch [140/2000] time 2.051 (2.035) data 0.000 (0.004) loss 0.6666 (1.3408) lr 1.9298e-03 eta 1 day, 20:00:13
epoch [12/50] batch [160/2000] time 2.053 (2.034) data 0.000 (0.004) loss 1.1217 (1.2903) lr 1.9298e-03 eta 1 day, 19:58:42
epoch [12/50] batch [180/2000] time 2.053 (2.033) data 0.000 (0.003) loss 0.4496 (1.3111) lr 1.9298e-03 eta 1 day, 19:57:05
epoch [12/50] batch [200/2000] time 1.997 (2.033) data 0.000 (0.003) loss 0.1943 (1.3565) lr 1.9298e-03 eta 1 day, 19:56:08
epoch [12/50] batch [220/2000] time 2.049 (2.033) data 0.000 (0.003) loss 0.9198 (1.3441) lr 1.9298e-03 eta 1 day, 19:55:19
epoch [12/50] batch [240/2000] time 2.052 (2.033) data 0.000 (0.002) loss 2.9959 (1.3734) lr 1.9298e-03 eta 1 day, 19:54:13
epoch [12/50] batch [260/2000] time 1.998 (2.033) data 0.000 (0.002) loss 1.1644 (1.3461) lr 1.9298e-03 eta 1 day, 19:53:50
epoch [12/50] batch [280/2000] time 2.026 (2.032) data 0.000 (0.002) loss 1.0618 (1.3658) lr 1.9298e-03 eta 1 day, 19:52:39
epoch [12/50] batch [300/2000] time 2.049 (2.032) data 0.000 (0.002) loss 0.6479 (1.3596) lr 1.9298e-03 eta 1 day, 19:51:31
epoch [12/50] batch [320/2000] time 1.999 (2.032) data 0.000 (0.002) loss 2.7095 (1.3566) lr 1.9298e-03 eta 1 day, 19:50:47
epoch [12/50] batch [340/2000] time 2.030 (2.032) data 0.000 (0.002) loss 0.6505 (1.3416) lr 1.9298e-03 eta 1 day, 19:49:52
epoch [12/50] batch [360/2000] time 2.028 (2.031) data 0.000 (0.002) loss 0.7334 (1.3234) lr 1.9298e-03 eta 1 day, 19:48:44
epoch [12/50] batch [380/2000] time 2.054 (2.031) data 0.000 (0.002) loss 0.2083 (1.3063) lr 1.9298e-03 eta 1 day, 19:47:45
epoch [12/50] batch [400/2000] time 2.051 (2.032) data 0.000 (0.002) loss 2.2619 (1.2948) lr 1.9298e-03 eta 1 day, 19:47:29
epoch [12/50] batch [420/2000] time 2.028 (2.032) data 0.000 (0.001) loss 1.9285 (1.2906) lr 1.9298e-03 eta 1 day, 19:46:51
epoch [12/50] batch [440/2000] time 2.026 (2.031) data 0.000 (0.001) loss 0.3861 (1.2785) lr 1.9298e-03 eta 1 day, 19:45:54
epoch [12/50] batch [460/2000] time 1.975 (2.031) data 0.000 (0.001) loss 0.8399 (1.2933) lr 1.9298e-03 eta 1 day, 19:44:44
epoch [12/50] batch [480/2000] time 2.054 (2.031) data 0.000 (0.001) loss 1.7459 (1.2945) lr 1.9298e-03 eta 1 day, 19:43:55
epoch [12/50] batch [500/2000] time 2.036 (2.031) data 0.000 (0.001) loss 1.7191 (1.2980) lr 1.9298e-03 eta 1 day, 19:43:30
epoch [12/50] batch [520/2000] time 2.027 (2.031) data 0.000 (0.001) loss 2.6685 (1.2847) lr 1.9298e-03 eta 1 day, 19:42:52
epoch [12/50] batch [540/2000] time 1.997 (2.031) data 0.000 (0.001) loss 1.9252 (1.2857) lr 1.9298e-03 eta 1 day, 19:42:09
epoch [12/50] batch [560/2000] time 2.051 (2.031) data 0.000 (0.001) loss 2.4158 (1.3028) lr 1.9298e-03 eta 1 day, 19:41:26
epoch [12/50] batch [580/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.7264 (1.2922) lr 1.9298e-03 eta 1 day, 19:40:14
epoch [12/50] batch [600/2000] time 2.050 (2.031) data 0.001 (0.001) loss 3.6137 (1.2940) lr 1.9298e-03 eta 1 day, 19:39:35
epoch [12/50] batch [620/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.9031 (1.2975) lr 1.9298e-03 eta 1 day, 19:39:10
epoch [12/50] batch [640/2000] time 2.032 (2.031) data 0.000 (0.001) loss 0.4855 (1.3004) lr 1.9298e-03 eta 1 day, 19:38:04
epoch [12/50] batch [660/2000] time 1.974 (2.030) data 0.000 (0.001) loss 1.9826 (1.3018) lr 1.9298e-03 eta 1 day, 19:37:01
epoch [12/50] batch [680/2000] time 2.028 (2.030) data 0.000 (0.001) loss 1.5031 (1.3112) lr 1.9298e-03 eta 1 day, 19:36:22
epoch [12/50] batch [700/2000] time 2.030 (2.030) data 0.000 (0.001) loss 2.2926 (1.3055) lr 1.9298e-03 eta 1 day, 19:35:32
epoch [12/50] batch [720/2000] time 2.048 (2.030) data 0.000 (0.001) loss 0.2284 (1.3001) lr 1.9298e-03 eta 1 day, 19:34:47
epoch [12/50] batch [740/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.1166 (1.3046) lr 1.9298e-03 eta 1 day, 19:33:56
epoch [12/50] batch [760/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.4806 (1.2987) lr 1.9298e-03 eta 1 day, 19:33:13
epoch [12/50] batch [780/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.9213 (1.2989) lr 1.9298e-03 eta 1 day, 19:32:31
epoch [12/50] batch [800/2000] time 2.050 (2.030) data 0.000 (0.001) loss 2.2728 (1.2929) lr 1.9298e-03 eta 1 day, 19:31:46
epoch [12/50] batch [820/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.2964 (1.3042) lr 1.9298e-03 eta 1 day, 19:31:04
epoch [12/50] batch [840/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.4169 (1.2941) lr 1.9298e-03 eta 1 day, 19:30:12
epoch [12/50] batch [860/2000] time 1.997 (2.030) data 0.000 (0.001) loss 2.9946 (1.3014) lr 1.9298e-03 eta 1 day, 19:29:34
epoch [12/50] batch [880/2000] time 2.049 (2.030) data 0.000 (0.001) loss 2.0691 (1.3047) lr 1.9298e-03 eta 1 day, 19:29:01
epoch [12/50] batch [900/2000] time 2.025 (2.030) data 0.000 (0.001) loss 0.1757 (1.3061) lr 1.9298e-03 eta 1 day, 19:28:22
epoch [12/50] batch [920/2000] time 2.026 (2.030) data 0.000 (0.001) loss 0.3138 (1.3044) lr 1.9298e-03 eta 1 day, 19:27:33
epoch [12/50] batch [940/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.1226 (1.3016) lr 1.9298e-03 eta 1 day, 19:26:57
epoch [12/50] batch [960/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.2534 (1.3087) lr 1.9298e-03 eta 1 day, 19:26:17
epoch [12/50] batch [980/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.2328 (1.3012) lr 1.9298e-03 eta 1 day, 19:25:28
epoch [12/50] batch [1000/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.2338 (1.3012) lr 1.9298e-03 eta 1 day, 19:24:43
epoch [12/50] batch [1020/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.9885 (1.2956) lr 1.9298e-03 eta 1 day, 19:24:08
epoch [12/50] batch [1040/2000] time 2.032 (2.030) data 0.000 (0.001) loss 1.4384 (1.2939) lr 1.9298e-03 eta 1 day, 19:23:24
epoch [12/50] batch [1060/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.4861 (1.2914) lr 1.9298e-03 eta 1 day, 19:22:42
epoch [12/50] batch [1080/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.2894 (1.2981) lr 1.9298e-03 eta 1 day, 19:22:03
epoch [12/50] batch [1100/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.3205 (1.2923) lr 1.9298e-03 eta 1 day, 19:21:16
epoch [12/50] batch [1120/2000] time 2.052 (2.030) data 0.000 (0.001) loss 4.3881 (1.2906) lr 1.9298e-03 eta 1 day, 19:20:36
epoch [12/50] batch [1140/2000] time 2.045 (2.030) data 0.001 (0.001) loss 2.3472 (1.2913) lr 1.9298e-03 eta 1 day, 19:19:55
epoch [12/50] batch [1160/2000] time 2.026 (2.030) data 0.000 (0.001) loss 0.2273 (1.2872) lr 1.9298e-03 eta 1 day, 19:19:08
epoch [12/50] batch [1180/2000] time 2.047 (2.029) data 0.000 (0.001) loss 2.0716 (1.2900) lr 1.9298e-03 eta 1 day, 19:18:23
epoch [12/50] batch [1200/2000] time 2.024 (2.029) data 0.000 (0.001) loss 1.5880 (1.2885) lr 1.9298e-03 eta 1 day, 19:17:38
epoch [12/50] batch [1220/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.5928 (1.2846) lr 1.9298e-03 eta 1 day, 19:16:59
epoch [12/50] batch [1240/2000] time 2.001 (2.029) data 0.000 (0.001) loss 0.3024 (1.2867) lr 1.9298e-03 eta 1 day, 19:16:12
epoch [12/50] batch [1260/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.5238 (1.2819) lr 1.9298e-03 eta 1 day, 19:15:27
epoch [12/50] batch [1280/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.8273 (1.2782) lr 1.9298e-03 eta 1 day, 19:14:50
epoch [12/50] batch [1300/2000] time 2.025 (2.029) data 0.000 (0.001) loss 1.0726 (1.2787) lr 1.9298e-03 eta 1 day, 19:13:57
epoch [12/50] batch [1320/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.8026 (1.2798) lr 1.9298e-03 eta 1 day, 19:13:12
epoch [12/50] batch [1340/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.9701 (1.2768) lr 1.9298e-03 eta 1 day, 19:12:20
epoch [12/50] batch [1360/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.4618 (1.2793) lr 1.9298e-03 eta 1 day, 19:11:35
epoch [12/50] batch [1380/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.3427 (1.2781) lr 1.9298e-03 eta 1 day, 19:10:46
epoch [12/50] batch [1400/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.3687 (1.2725) lr 1.9298e-03 eta 1 day, 19:10:03
epoch [12/50] batch [1420/2000] time 2.030 (2.029) data 0.000 (0.001) loss 1.6112 (1.2743) lr 1.9298e-03 eta 1 day, 19:09:34
epoch [12/50] batch [1440/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.9009 (1.2787) lr 1.9298e-03 eta 1 day, 19:08:49
epoch [12/50] batch [1460/2000] time 1.975 (2.029) data 0.000 (0.001) loss 0.3203 (1.2741) lr 1.9298e-03 eta 1 day, 19:08:11
epoch [12/50] batch [1480/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.5211 (1.2734) lr 1.9298e-03 eta 1 day, 19:07:34
epoch [12/50] batch [1500/2000] time 2.061 (2.029) data 0.000 (0.001) loss 2.0966 (1.2775) lr 1.9298e-03 eta 1 day, 19:06:59
epoch [12/50] batch [1520/2000] time 2.034 (2.029) data 0.000 (0.001) loss 0.2855 (1.2793) lr 1.9298e-03 eta 1 day, 19:06:25
epoch [12/50] batch [1540/2000] time 2.032 (2.029) data 0.000 (0.001) loss 1.7065 (1.2812) lr 1.9298e-03 eta 1 day, 19:05:51
epoch [12/50] batch [1560/2000] time 1.976 (2.029) data 0.000 (0.001) loss 0.9261 (1.2787) lr 1.9298e-03 eta 1 day, 19:05:07
epoch [12/50] batch [1580/2000] time 1.995 (2.029) data 0.000 (0.001) loss 0.1910 (1.2827) lr 1.9298e-03 eta 1 day, 19:04:24
epoch [12/50] batch [1600/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.7226 (1.2838) lr 1.9298e-03 eta 1 day, 19:03:45
epoch [12/50] batch [1620/2000] time 2.049 (2.029) data 0.000 (0.001) loss 4.1601 (1.2850) lr 1.9298e-03 eta 1 day, 19:03:01
epoch [12/50] batch [1640/2000] time 1.975 (2.029) data 0.000 (0.001) loss 0.0139 (1.2872) lr 1.9298e-03 eta 1 day, 19:02:21
epoch [12/50] batch [1660/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.1919 (1.2846) lr 1.9298e-03 eta 1 day, 19:01:42
epoch [12/50] batch [1680/2000] time 2.029 (2.029) data 0.001 (0.001) loss 0.7678 (1.2848) lr 1.9298e-03 eta 1 day, 19:01:06
epoch [12/50] batch [1700/2000] time 2.051 (2.029) data 0.000 (0.001) loss 2.0148 (1.2819) lr 1.9298e-03 eta 1 day, 19:00:24
epoch [12/50] batch [1720/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.1824 (1.2827) lr 1.9298e-03 eta 1 day, 18:59:44
epoch [12/50] batch [1740/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.2872 (1.2830) lr 1.9298e-03 eta 1 day, 18:59:10
epoch [12/50] batch [1760/2000] time 1.977 (2.029) data 0.000 (0.001) loss 1.7545 (1.2812) lr 1.9298e-03 eta 1 day, 18:58:27
epoch [12/50] batch [1780/2000] time 2.052 (2.029) data 0.000 (0.000) loss 0.4899 (1.2817) lr 1.9298e-03 eta 1 day, 18:57:49
epoch [12/50] batch [1800/2000] time 2.029 (2.029) data 0.000 (0.000) loss 0.4648 (1.2777) lr 1.9298e-03 eta 1 day, 18:57:06
epoch [12/50] batch [1820/2000] time 2.029 (2.029) data 0.000 (0.000) loss 1.8365 (1.2766) lr 1.9298e-03 eta 1 day, 18:56:24
epoch [12/50] batch [1840/2000] time 1.999 (2.029) data 0.000 (0.000) loss 0.8613 (1.2778) lr 1.9298e-03 eta 1 day, 18:55:46
epoch [12/50] batch [1860/2000] time 2.052 (2.029) data 0.000 (0.000) loss 1.2855 (1.2784) lr 1.9298e-03 eta 1 day, 18:55:05
epoch [12/50] batch [1880/2000] time 1.996 (2.029) data 0.000 (0.000) loss 0.3891 (1.2781) lr 1.9298e-03 eta 1 day, 18:54:31
epoch [12/50] batch [1900/2000] time 2.051 (2.029) data 0.000 (0.000) loss 0.6634 (1.2777) lr 1.9298e-03 eta 1 day, 18:53:52
epoch [12/50] batch [1920/2000] time 1.996 (2.029) data 0.000 (0.000) loss 0.7136 (1.2763) lr 1.9298e-03 eta 1 day, 18:53:08
epoch [12/50] batch [1940/2000] time 2.027 (2.029) data 0.000 (0.000) loss 0.8477 (1.2749) lr 1.9298e-03 eta 1 day, 18:52:25
epoch [12/50] batch [1960/2000] time 2.050 (2.029) data 0.000 (0.000) loss 0.6750 (1.2726) lr 1.9298e-03 eta 1 day, 18:51:48
epoch [12/50] batch [1980/2000] time 2.026 (2.029) data 0.000 (0.000) loss 2.5020 (1.2744) lr 1.9298e-03 eta 1 day, 18:51:08
epoch [12/50] batch [2000/2000] time 2.049 (2.029) data 0.000 (0.000) loss 0.8284 (1.2708) lr 1.9048e-03 eta 1 day, 18:50:23
epoch [13/50] batch [20/2000] time 1.996 (2.048) data 0.000 (0.027) loss 0.4310 (0.9637) lr 1.9048e-03 eta 1 day, 19:14:02
epoch [13/50] batch [40/2000] time 1.974 (2.040) data 0.000 (0.013) loss 1.5191 (1.1684) lr 1.9048e-03 eta 1 day, 19:03:00
epoch [13/50] batch [60/2000] time 2.055 (2.040) data 0.001 (0.009) loss 0.2257 (1.1042) lr 1.9048e-03 eta 1 day, 19:02:00
epoch [13/50] batch [80/2000] time 1.999 (2.037) data 0.000 (0.007) loss 1.4441 (1.1626) lr 1.9048e-03 eta 1 day, 18:57:40
epoch [13/50] batch [100/2000] time 2.000 (2.036) data 0.000 (0.005) loss 0.2023 (1.1353) lr 1.9048e-03 eta 1 day, 18:55:45
epoch [13/50] batch [120/2000] time 1.998 (2.036) data 0.000 (0.005) loss 1.1401 (1.1589) lr 1.9048e-03 eta 1 day, 18:54:53
epoch [13/50] batch [140/2000] time 2.053 (2.036) data 0.000 (0.004) loss 0.5604 (1.2279) lr 1.9048e-03 eta 1 day, 18:54:00
epoch [13/50] batch [160/2000] time 2.032 (2.035) data 0.000 (0.003) loss 0.5072 (1.2639) lr 1.9048e-03 eta 1 day, 18:52:05
epoch [13/50] batch [180/2000] time 1.999 (2.034) data 0.000 (0.003) loss 0.9309 (1.2763) lr 1.9048e-03 eta 1 day, 18:50:06
epoch [13/50] batch [200/2000] time 2.003 (2.033) data 0.000 (0.003) loss 0.8558 (1.2517) lr 1.9048e-03 eta 1 day, 18:48:51
epoch [13/50] batch [220/2000] time 2.058 (2.034) data 0.000 (0.003) loss 1.0547 (1.2750) lr 1.9048e-03 eta 1 day, 18:48:40
epoch [13/50] batch [240/2000] time 2.031 (2.033) data 0.000 (0.002) loss 0.9245 (1.2728) lr 1.9048e-03 eta 1 day, 18:47:34
epoch [13/50] batch [260/2000] time 2.050 (2.033) data 0.000 (0.002) loss 1.7208 (1.2867) lr 1.9048e-03 eta 1 day, 18:46:53
epoch [13/50] batch [280/2000] time 1.996 (2.033) data 0.000 (0.002) loss 2.1687 (1.2843) lr 1.9048e-03 eta 1 day, 18:45:59
epoch [13/50] batch [300/2000] time 2.052 (2.033) data 0.000 (0.002) loss 1.1604 (1.2833) lr 1.9048e-03 eta 1 day, 18:45:12
epoch [13/50] batch [320/2000] time 2.053 (2.033) data 0.000 (0.002) loss 1.6539 (1.2688) lr 1.9048e-03 eta 1 day, 18:44:04
epoch [13/50] batch [340/2000] time 2.052 (2.033) data 0.000 (0.002) loss 0.7859 (1.2722) lr 1.9048e-03 eta 1 day, 18:42:59
epoch [13/50] batch [360/2000] time 2.052 (2.032) data 0.000 (0.002) loss 0.0561 (1.2649) lr 1.9048e-03 eta 1 day, 18:41:55
epoch [13/50] batch [380/2000] time 2.027 (2.032) data 0.000 (0.002) loss 1.6621 (1.2555) lr 1.9048e-03 eta 1 day, 18:41:34
epoch [13/50] batch [400/2000] time 2.052 (2.032) data 0.000 (0.002) loss 1.8596 (1.2553) lr 1.9048e-03 eta 1 day, 18:40:18
epoch [13/50] batch [420/2000] time 2.029 (2.032) data 0.000 (0.001) loss 0.2663 (1.2760) lr 1.9048e-03 eta 1 day, 18:39:35
epoch [13/50] batch [440/2000] time 2.052 (2.032) data 0.000 (0.001) loss 1.4944 (1.2947) lr 1.9048e-03 eta 1 day, 18:38:50
epoch [13/50] batch [460/2000] time 2.000 (2.032) data 0.000 (0.001) loss 1.6836 (1.3041) lr 1.9048e-03 eta 1 day, 18:37:50
epoch [13/50] batch [480/2000] time 2.031 (2.032) data 0.000 (0.001) loss 0.2242 (1.3021) lr 1.9048e-03 eta 1 day, 18:37:05
epoch [13/50] batch [500/2000] time 1.998 (2.031) data 0.000 (0.001) loss 2.3824 (1.2949) lr 1.9048e-03 eta 1 day, 18:36:10
epoch [13/50] batch [520/2000] time 1.997 (2.031) data 0.000 (0.001) loss 1.5219 (1.2857) lr 1.9048e-03 eta 1 day, 18:35:23
epoch [13/50] batch [540/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.6769 (1.2815) lr 1.9048e-03 eta 1 day, 18:34:39
epoch [13/50] batch [560/2000] time 2.031 (2.031) data 0.000 (0.001) loss 2.9430 (1.2808) lr 1.9048e-03 eta 1 day, 18:33:57
epoch [13/50] batch [580/2000] time 2.028 (2.031) data 0.000 (0.001) loss 1.2479 (1.2771) lr 1.9048e-03 eta 1 day, 18:33:23
epoch [13/50] batch [600/2000] time 2.028 (2.031) data 0.001 (0.001) loss 2.8945 (1.2756) lr 1.9048e-03 eta 1 day, 18:32:52
epoch [13/50] batch [620/2000] time 2.055 (2.031) data 0.000 (0.001) loss 2.3902 (1.2720) lr 1.9048e-03 eta 1 day, 18:32:10
epoch [13/50] batch [640/2000] time 2.000 (2.031) data 0.000 (0.001) loss 0.2140 (1.2795) lr 1.9048e-03 eta 1 day, 18:31:12
epoch [13/50] batch [660/2000] time 2.053 (2.031) data 0.000 (0.001) loss 2.4081 (1.2786) lr 1.9048e-03 eta 1 day, 18:30:28
epoch [13/50] batch [680/2000] time 2.028 (2.031) data 0.000 (0.001) loss 2.4427 (1.2888) lr 1.9048e-03 eta 1 day, 18:30:06
epoch [13/50] batch [700/2000] time 1.999 (2.031) data 0.000 (0.001) loss 1.3095 (1.2935) lr 1.9048e-03 eta 1 day, 18:29:06
epoch [13/50] batch [720/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.6385 (1.2868) lr 1.9048e-03 eta 1 day, 18:28:15
epoch [13/50] batch [740/2000] time 2.027 (2.031) data 0.000 (0.001) loss 0.7103 (1.2979) lr 1.9048e-03 eta 1 day, 18:27:47
epoch [13/50] batch [760/2000] time 2.000 (2.031) data 0.000 (0.001) loss 0.5704 (1.2944) lr 1.9048e-03 eta 1 day, 18:27:09
epoch [13/50] batch [780/2000] time 2.028 (2.031) data 0.000 (0.001) loss 2.1993 (1.3052) lr 1.9048e-03 eta 1 day, 18:26:22
epoch [13/50] batch [800/2000] time 2.032 (2.031) data 0.000 (0.001) loss 2.2091 (1.3046) lr 1.9048e-03 eta 1 day, 18:25:24
epoch [13/50] batch [820/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.5809 (1.2923) lr 1.9048e-03 eta 1 day, 18:24:44
epoch [13/50] batch [840/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.3864 (1.2937) lr 1.9048e-03 eta 1 day, 18:24:01
epoch [13/50] batch [860/2000] time 2.027 (2.031) data 0.000 (0.001) loss 2.0259 (1.2932) lr 1.9048e-03 eta 1 day, 18:23:25
epoch [13/50] batch [880/2000] time 1.999 (2.031) data 0.000 (0.001) loss 1.0793 (1.2933) lr 1.9048e-03 eta 1 day, 18:22:41
epoch [13/50] batch [900/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.3867 (1.3000) lr 1.9048e-03 eta 1 day, 18:21:49
epoch [13/50] batch [920/2000] time 1.974 (2.031) data 0.000 (0.001) loss 0.1475 (1.3030) lr 1.9048e-03 eta 1 day, 18:21:01
epoch [13/50] batch [940/2000] time 2.001 (2.031) data 0.000 (0.001) loss 0.6197 (1.3062) lr 1.9048e-03 eta 1 day, 18:20:19
epoch [13/50] batch [960/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.3635 (1.3017) lr 1.9048e-03 eta 1 day, 18:19:28
epoch [13/50] batch [980/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.2925 (1.3035) lr 1.9048e-03 eta 1 day, 18:18:45
epoch [13/50] batch [1000/2000] time 2.028 (2.031) data 0.000 (0.001) loss 1.1345 (1.3118) lr 1.9048e-03 eta 1 day, 18:18:09
epoch [13/50] batch [1020/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.3953 (1.3098) lr 1.9048e-03 eta 1 day, 18:17:29
epoch [13/50] batch [1040/2000] time 1.974 (2.030) data 0.000 (0.001) loss 1.7531 (1.3103) lr 1.9048e-03 eta 1 day, 18:16:43
epoch [13/50] batch [1060/2000] time 2.028 (2.030) data 0.000 (0.001) loss 1.7934 (1.3060) lr 1.9048e-03 eta 1 day, 18:15:53
epoch [13/50] batch [1080/2000] time 2.059 (2.030) data 0.000 (0.001) loss 0.2309 (1.3046) lr 1.9048e-03 eta 1 day, 18:15:08
epoch [13/50] batch [1100/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.1715 (1.3067) lr 1.9048e-03 eta 1 day, 18:14:24
epoch [13/50] batch [1120/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.3020 (1.3040) lr 1.9048e-03 eta 1 day, 18:13:52
epoch [13/50] batch [1140/2000] time 2.032 (2.031) data 0.001 (0.001) loss 1.6750 (1.3072) lr 1.9048e-03 eta 1 day, 18:13:23
epoch [13/50] batch [1160/2000] time 2.004 (2.031) data 0.000 (0.001) loss 1.5061 (1.3075) lr 1.9048e-03 eta 1 day, 18:12:44
epoch [13/50] batch [1180/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.9225 (1.3066) lr 1.9048e-03 eta 1 day, 18:12:03
epoch [13/50] batch [1200/2000] time 2.054 (2.031) data 0.000 (0.001) loss 0.1060 (1.3048) lr 1.9048e-03 eta 1 day, 18:11:29
epoch [13/50] batch [1220/2000] time 2.052 (2.031) data 0.000 (0.001) loss 2.0194 (1.3082) lr 1.9048e-03 eta 1 day, 18:10:41
epoch [13/50] batch [1240/2000] time 2.032 (2.031) data 0.000 (0.001) loss 0.4415 (1.3054) lr 1.9048e-03 eta 1 day, 18:10:01
epoch [13/50] batch [1260/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.4296 (1.3022) lr 1.9048e-03 eta 1 day, 18:09:23
epoch [13/50] batch [1280/2000] time 1.973 (2.030) data 0.000 (0.001) loss 2.8885 (1.2997) lr 1.9048e-03 eta 1 day, 18:08:34
epoch [13/50] batch [1300/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.4779 (1.2996) lr 1.9048e-03 eta 1 day, 18:07:39
epoch [13/50] batch [1320/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.6215 (1.2976) lr 1.9048e-03 eta 1 day, 18:07:00
epoch [13/50] batch [1340/2000] time 2.030 (2.030) data 0.000 (0.001) loss 1.6958 (1.2957) lr 1.9048e-03 eta 1 day, 18:06:18
epoch [13/50] batch [1360/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.8172 (1.2950) lr 1.9048e-03 eta 1 day, 18:05:32
epoch [13/50] batch [1380/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.6435 (1.2921) lr 1.9048e-03 eta 1 day, 18:04:49
epoch [13/50] batch [1400/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.9359 (1.2889) lr 1.9048e-03 eta 1 day, 18:04:07
epoch [13/50] batch [1420/2000] time 2.000 (2.030) data 0.000 (0.001) loss 1.5634 (1.2876) lr 1.9048e-03 eta 1 day, 18:03:31
epoch [13/50] batch [1440/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.3880 (1.2898) lr 1.9048e-03 eta 1 day, 18:02:48
epoch [13/50] batch [1460/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.7109 (1.2925) lr 1.9048e-03 eta 1 day, 18:02:08
epoch [13/50] batch [1480/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.7607 (1.2936) lr 1.9048e-03 eta 1 day, 18:01:34
epoch [13/50] batch [1500/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.3866 (1.2887) lr 1.9048e-03 eta 1 day, 18:00:54
epoch [13/50] batch [1520/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.4091 (1.2879) lr 1.9048e-03 eta 1 day, 18:00:00
epoch [13/50] batch [1540/2000] time 2.052 (2.030) data 0.000 (0.001) loss 3.4288 (1.2923) lr 1.9048e-03 eta 1 day, 17:59:24
epoch [13/50] batch [1560/2000] time 2.028 (2.030) data 0.000 (0.001) loss 2.5180 (1.2943) lr 1.9048e-03 eta 1 day, 17:58:33
epoch [13/50] batch [1580/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.9866 (1.2917) lr 1.9048e-03 eta 1 day, 17:57:50
epoch [13/50] batch [1600/2000] time 2.026 (2.030) data 0.000 (0.001) loss 1.3024 (1.2892) lr 1.9048e-03 eta 1 day, 17:57:09
epoch [13/50] batch [1620/2000] time 2.024 (2.030) data 0.000 (0.001) loss 0.9123 (1.2898) lr 1.9048e-03 eta 1 day, 17:56:29
epoch [13/50] batch [1640/2000] time 2.028 (2.030) data 0.000 (0.001) loss 4.6149 (1.2913) lr 1.9048e-03 eta 1 day, 17:55:49
epoch [13/50] batch [1660/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.5859 (1.2895) lr 1.9048e-03 eta 1 day, 17:55:05
epoch [13/50] batch [1680/2000] time 2.055 (2.030) data 0.001 (0.001) loss 3.7535 (1.2899) lr 1.9048e-03 eta 1 day, 17:54:24
epoch [13/50] batch [1700/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.7574 (1.2890) lr 1.9048e-03 eta 1 day, 17:53:44
epoch [13/50] batch [1720/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.1386 (1.2874) lr 1.9048e-03 eta 1 day, 17:53:02
epoch [13/50] batch [1740/2000] time 2.049 (2.030) data 0.000 (0.001) loss 3.0242 (1.2883) lr 1.9048e-03 eta 1 day, 17:52:19
epoch [13/50] batch [1760/2000] time 2.049 (2.030) data 0.000 (0.001) loss 3.3964 (1.2867) lr 1.9048e-03 eta 1 day, 17:51:39
epoch [13/50] batch [1780/2000] time 2.049 (2.030) data 0.000 (0.000) loss 0.1791 (1.2835) lr 1.9048e-03 eta 1 day, 17:50:59
epoch [13/50] batch [1800/2000] time 1.996 (2.030) data 0.000 (0.000) loss 0.3171 (1.2812) lr 1.9048e-03 eta 1 day, 17:50:15
epoch [13/50] batch [1820/2000] time 2.029 (2.030) data 0.000 (0.000) loss 0.7740 (1.2843) lr 1.9048e-03 eta 1 day, 17:49:36
epoch [13/50] batch [1840/2000] time 2.050 (2.030) data 0.000 (0.000) loss 0.9741 (1.2828) lr 1.9048e-03 eta 1 day, 17:48:57
epoch [13/50] batch [1860/2000] time 1.995 (2.030) data 0.000 (0.000) loss 0.5294 (1.2841) lr 1.9048e-03 eta 1 day, 17:48:17
epoch [13/50] batch [1880/2000] time 2.051 (2.030) data 0.000 (0.000) loss 1.2481 (1.2845) lr 1.9048e-03 eta 1 day, 17:47:38
epoch [13/50] batch [1900/2000] time 1.997 (2.030) data 0.000 (0.000) loss 1.6691 (1.2821) lr 1.9048e-03 eta 1 day, 17:46:52
epoch [13/50] batch [1920/2000] time 2.052 (2.030) data 0.000 (0.000) loss 0.6812 (1.2879) lr 1.9048e-03 eta 1 day, 17:46:09
epoch [13/50] batch [1940/2000] time 2.029 (2.030) data 0.000 (0.000) loss 4.2227 (1.2933) lr 1.9048e-03 eta 1 day, 17:45:20
epoch [13/50] batch [1960/2000] time 2.054 (2.030) data 0.000 (0.000) loss 1.2706 (1.2917) lr 1.9048e-03 eta 1 day, 17:44:42
epoch [13/50] batch [1980/2000] time 2.048 (2.030) data 0.000 (0.000) loss 3.0470 (1.2916) lr 1.9048e-03 eta 1 day, 17:44:00
epoch [13/50] batch [2000/2000] time 2.049 (2.030) data 0.000 (0.000) loss 1.2750 (1.2900) lr 1.8763e-03 eta 1 day, 17:43:14
epoch [14/50] batch [20/2000] time 1.997 (2.059) data 0.000 (0.027) loss 1.3895 (1.2730) lr 1.8763e-03 eta 1 day, 18:18:20
epoch [14/50] batch [40/2000] time 2.026 (2.042) data 0.000 (0.013) loss 0.1707 (1.4370) lr 1.8763e-03 eta 1 day, 17:57:40
epoch [14/50] batch [60/2000] time 2.050 (2.037) data 0.001 (0.009) loss 0.4965 (1.3327) lr 1.8763e-03 eta 1 day, 17:50:20
epoch [14/50] batch [80/2000] time 2.049 (2.035) data 0.000 (0.007) loss 1.8810 (1.3928) lr 1.8763e-03 eta 1 day, 17:46:58
epoch [14/50] batch [100/2000] time 1.998 (2.034) data 0.000 (0.006) loss 0.2493 (1.3250) lr 1.8763e-03 eta 1 day, 17:45:38
epoch [14/50] batch [120/2000] time 2.027 (2.032) data 0.000 (0.005) loss 0.2735 (1.2681) lr 1.8763e-03 eta 1 day, 17:41:48
epoch [14/50] batch [140/2000] time 1.974 (2.032) data 0.000 (0.004) loss 1.2316 (1.2331) lr 1.8763e-03 eta 1 day, 17:41:06
epoch [14/50] batch [160/2000] time 2.029 (2.031) data 0.000 (0.004) loss 0.2064 (1.2191) lr 1.8763e-03 eta 1 day, 17:39:37
epoch [14/50] batch [180/2000] time 2.028 (2.030) data 0.000 (0.003) loss 0.4966 (1.2177) lr 1.8763e-03 eta 1 day, 17:37:59
epoch [14/50] batch [200/2000] time 2.055 (2.030) data 0.000 (0.003) loss 0.8357 (1.2336) lr 1.8763e-03 eta 1 day, 17:37:14
epoch [14/50] batch [220/2000] time 2.001 (2.030) data 0.000 (0.003) loss 2.0769 (1.2312) lr 1.8763e-03 eta 1 day, 17:36:48
epoch [14/50] batch [240/2000] time 2.053 (2.031) data 0.000 (0.002) loss 0.9085 (1.2217) lr 1.8763e-03 eta 1 day, 17:36:16
epoch [14/50] batch [260/2000] time 2.030 (2.030) data 0.000 (0.002) loss 1.8512 (1.2048) lr 1.8763e-03 eta 1 day, 17:34:45
epoch [14/50] batch [280/2000] time 2.054 (2.030) data 0.000 (0.002) loss 1.7132 (1.2099) lr 1.8763e-03 eta 1 day, 17:34:01
epoch [14/50] batch [300/2000] time 2.054 (2.030) data 0.000 (0.002) loss 7.0862 (1.2253) lr 1.8763e-03 eta 1 day, 17:33:36
epoch [14/50] batch [320/2000] time 2.054 (2.030) data 0.000 (0.002) loss 1.2643 (1.2139) lr 1.8763e-03 eta 1 day, 17:32:51
epoch [14/50] batch [340/2000] time 2.003 (2.030) data 0.000 (0.002) loss 1.6975 (1.2047) lr 1.8763e-03 eta 1 day, 17:32:24
epoch [14/50] batch [360/2000] time 2.054 (2.030) data 0.000 (0.002) loss 1.6728 (1.2078) lr 1.8763e-03 eta 1 day, 17:31:27
epoch [14/50] batch [380/2000] time 2.058 (2.030) data 0.000 (0.002) loss 0.2805 (1.1954) lr 1.8763e-03 eta 1 day, 17:30:38
epoch [14/50] batch [400/2000] time 2.054 (2.030) data 0.000 (0.002) loss 1.8169 (1.2045) lr 1.8763e-03 eta 1 day, 17:29:49
epoch [14/50] batch [420/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.8972 (1.2240) lr 1.8763e-03 eta 1 day, 17:29:38
epoch [14/50] batch [440/2000] time 2.052 (2.031) data 0.000 (0.001) loss 2.1584 (1.2215) lr 1.8763e-03 eta 1 day, 17:29:25
epoch [14/50] batch [460/2000] time 2.028 (2.030) data 0.000 (0.001) loss 2.2658 (1.2260) lr 1.8763e-03 eta 1 day, 17:28:12
epoch [14/50] batch [480/2000] time 2.026 (2.030) data 0.000 (0.001) loss 0.9364 (1.2243) lr 1.8763e-03 eta 1 day, 17:27:33
epoch [14/50] batch [500/2000] time 2.024 (2.030) data 0.000 (0.001) loss 0.1520 (1.2109) lr 1.8763e-03 eta 1 day, 17:26:44
epoch [14/50] batch [520/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.4981 (1.2019) lr 1.8763e-03 eta 1 day, 17:25:50
epoch [14/50] batch [540/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.3616 (1.1995) lr 1.8763e-03 eta 1 day, 17:25:00
epoch [14/50] batch [560/2000] time 1.974 (2.030) data 0.000 (0.001) loss 0.2100 (1.1950) lr 1.8763e-03 eta 1 day, 17:24:15
epoch [14/50] batch [580/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.1808 (1.1969) lr 1.8763e-03 eta 1 day, 17:23:30
epoch [14/50] batch [600/2000] time 2.055 (2.030) data 0.001 (0.001) loss 1.8394 (1.2077) lr 1.8763e-03 eta 1 day, 17:22:59
epoch [14/50] batch [620/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.3271 (1.2027) lr 1.8763e-03 eta 1 day, 17:22:17
epoch [14/50] batch [640/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.3420 (1.2096) lr 1.8763e-03 eta 1 day, 17:21:41
epoch [14/50] batch [660/2000] time 1.995 (2.030) data 0.000 (0.001) loss 0.0728 (1.2124) lr 1.8763e-03 eta 1 day, 17:20:50
epoch [14/50] batch [680/2000] time 1.975 (2.030) data 0.000 (0.001) loss 0.4305 (1.2108) lr 1.8763e-03 eta 1 day, 17:20:21
epoch [14/50] batch [700/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.9127 (1.2100) lr 1.8763e-03 eta 1 day, 17:19:39
epoch [14/50] batch [720/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.7253 (1.2116) lr 1.8763e-03 eta 1 day, 17:18:58
epoch [14/50] batch [740/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.3533 (1.2147) lr 1.8763e-03 eta 1 day, 17:18:28
epoch [14/50] batch [760/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.1662 (1.2223) lr 1.8763e-03 eta 1 day, 17:18:01
epoch [14/50] batch [780/2000] time 2.028 (2.030) data 0.000 (0.001) loss 2.5086 (1.2127) lr 1.8763e-03 eta 1 day, 17:17:23
epoch [14/50] batch [800/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.9279 (1.2060) lr 1.8763e-03 eta 1 day, 17:16:43
epoch [14/50] batch [820/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.5862 (1.2120) lr 1.8763e-03 eta 1 day, 17:16:25
epoch [14/50] batch [840/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.5430 (1.2059) lr 1.8763e-03 eta 1 day, 17:15:25
epoch [14/50] batch [860/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.4013 (1.2032) lr 1.8763e-03 eta 1 day, 17:14:54
epoch [14/50] batch [880/2000] time 1.972 (2.030) data 0.000 (0.001) loss 0.4553 (1.2054) lr 1.8763e-03 eta 1 day, 17:14:04
epoch [14/50] batch [900/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.8589 (1.2204) lr 1.8763e-03 eta 1 day, 17:13:23
epoch [14/50] batch [920/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.6324 (1.2212) lr 1.8763e-03 eta 1 day, 17:12:43
epoch [14/50] batch [940/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.9321 (1.2199) lr 1.8763e-03 eta 1 day, 17:11:48
epoch [14/50] batch [960/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.8445 (1.2189) lr 1.8763e-03 eta 1 day, 17:11:05
epoch [14/50] batch [980/2000] time 2.060 (2.030) data 0.000 (0.001) loss 0.0802 (1.2150) lr 1.8763e-03 eta 1 day, 17:10:26
epoch [14/50] batch [1000/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.7355 (1.2244) lr 1.8763e-03 eta 1 day, 17:09:38
epoch [14/50] batch [1020/2000] time 1.998 (2.030) data 0.000 (0.001) loss 2.3400 (1.2236) lr 1.8763e-03 eta 1 day, 17:08:54
epoch [14/50] batch [1040/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.4588 (1.2192) lr 1.8763e-03 eta 1 day, 17:08:06
epoch [14/50] batch [1060/2000] time 1.975 (2.030) data 0.000 (0.001) loss 3.5071 (1.2210) lr 1.8763e-03 eta 1 day, 17:07:32
epoch [14/50] batch [1080/2000] time 2.056 (2.030) data 0.000 (0.001) loss 0.0924 (1.2238) lr 1.8763e-03 eta 1 day, 17:06:47
epoch [14/50] batch [1100/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.1877 (1.2263) lr 1.8763e-03 eta 1 day, 17:06:18
epoch [14/50] batch [1120/2000] time 2.028 (2.030) data 0.000 (0.001) loss 1.2800 (1.2327) lr 1.8763e-03 eta 1 day, 17:05:32
epoch [14/50] batch [1140/2000] time 2.053 (2.030) data 0.001 (0.001) loss 0.1360 (1.2396) lr 1.8763e-03 eta 1 day, 17:04:51
epoch [14/50] batch [1160/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.2092 (1.2460) lr 1.8763e-03 eta 1 day, 17:04:11
epoch [14/50] batch [1180/2000] time 2.055 (2.030) data 0.000 (0.001) loss 2.5595 (1.2445) lr 1.8763e-03 eta 1 day, 17:03:30
epoch [14/50] batch [1200/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.1832 (1.2466) lr 1.8763e-03 eta 1 day, 17:02:49
epoch [14/50] batch [1220/2000] time 1.973 (2.030) data 0.000 (0.001) loss 1.1069 (1.2487) lr 1.8763e-03 eta 1 day, 17:02:10
epoch [14/50] batch [1240/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.2792 (1.2574) lr 1.8763e-03 eta 1 day, 17:01:26
epoch [14/50] batch [1260/2000] time 2.026 (2.030) data 0.000 (0.001) loss 3.5749 (1.2600) lr 1.8763e-03 eta 1 day, 17:00:38
epoch [14/50] batch [1280/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.3154 (1.2591) lr 1.8763e-03 eta 1 day, 16:59:53
epoch [14/50] batch [1300/2000] time 2.051 (2.030) data 0.000 (0.001) loss 2.5193 (1.2589) lr 1.8763e-03 eta 1 day, 16:59:07
epoch [14/50] batch [1320/2000] time 1.974 (2.030) data 0.000 (0.001) loss 0.9315 (1.2620) lr 1.8763e-03 eta 1 day, 16:58:25
epoch [14/50] batch [1340/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.1907 (1.2612) lr 1.8763e-03 eta 1 day, 16:57:48
epoch [14/50] batch [1360/2000] time 2.051 (2.030) data 0.000 (0.001) loss 4.2524 (1.2634) lr 1.8763e-03 eta 1 day, 16:57:09
epoch [14/50] batch [1380/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.6726 (1.2643) lr 1.8763e-03 eta 1 day, 16:56:31
epoch [14/50] batch [1400/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.1350 (1.2658) lr 1.8763e-03 eta 1 day, 16:55:46
epoch [14/50] batch [1420/2000] time 2.046 (2.029) data 0.000 (0.001) loss 2.3334 (1.2655) lr 1.8763e-03 eta 1 day, 16:55:00
epoch [14/50] batch [1440/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.9216 (1.2659) lr 1.8763e-03 eta 1 day, 16:54:20
epoch [14/50] batch [1460/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.5928 (1.2691) lr 1.8763e-03 eta 1 day, 16:53:39
epoch [14/50] batch [1480/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.3302 (1.2700) lr 1.8763e-03 eta 1 day, 16:52:55
epoch [14/50] batch [1500/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.2100 (1.2732) lr 1.8763e-03 eta 1 day, 16:52:13
epoch [14/50] batch [1520/2000] time 2.057 (2.029) data 0.000 (0.001) loss 1.0128 (1.2717) lr 1.8763e-03 eta 1 day, 16:51:31
epoch [14/50] batch [1540/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.6541 (1.2706) lr 1.8763e-03 eta 1 day, 16:50:55
epoch [14/50] batch [1560/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.7066 (1.2698) lr 1.8763e-03 eta 1 day, 16:50:09
epoch [14/50] batch [1580/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.7422 (1.2716) lr 1.8763e-03 eta 1 day, 16:49:27
epoch [14/50] batch [1600/2000] time 1.995 (2.029) data 0.000 (0.001) loss 0.3225 (1.2728) lr 1.8763e-03 eta 1 day, 16:48:47
epoch [14/50] batch [1620/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.3624 (1.2714) lr 1.8763e-03 eta 1 day, 16:48:04
epoch [14/50] batch [1640/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.1530 (1.2758) lr 1.8763e-03 eta 1 day, 16:47:15
epoch [14/50] batch [1660/2000] time 2.027 (2.029) data 0.000 (0.001) loss 2.6431 (1.2772) lr 1.8763e-03 eta 1 day, 16:46:37
epoch [14/50] batch [1680/2000] time 2.027 (2.029) data 0.001 (0.001) loss 1.9790 (1.2769) lr 1.8763e-03 eta 1 day, 16:45:50
epoch [14/50] batch [1700/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.5823 (1.2783) lr 1.8763e-03 eta 1 day, 16:45:07
epoch [14/50] batch [1720/2000] time 1.977 (2.029) data 0.000 (0.001) loss 0.8188 (1.2764) lr 1.8763e-03 eta 1 day, 16:44:24
epoch [14/50] batch [1740/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.7334 (1.2732) lr 1.8763e-03 eta 1 day, 16:43:43
epoch [14/50] batch [1760/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.8741 (1.2719) lr 1.8763e-03 eta 1 day, 16:43:06
epoch [14/50] batch [1780/2000] time 2.052 (2.029) data 0.000 (0.000) loss 0.5100 (1.2720) lr 1.8763e-03 eta 1 day, 16:42:29
epoch [14/50] batch [1800/2000] time 2.051 (2.029) data 0.000 (0.000) loss 0.1146 (1.2703) lr 1.8763e-03 eta 1 day, 16:41:48
epoch [14/50] batch [1820/2000] time 2.048 (2.029) data 0.000 (0.000) loss 0.3919 (1.2717) lr 1.8763e-03 eta 1 day, 16:41:13
epoch [14/50] batch [1840/2000] time 2.025 (2.029) data 0.000 (0.000) loss 1.1484 (1.2704) lr 1.8763e-03 eta 1 day, 16:40:34
epoch [14/50] batch [1860/2000] time 2.050 (2.029) data 0.000 (0.000) loss 1.2898 (1.2744) lr 1.8763e-03 eta 1 day, 16:39:55
epoch [14/50] batch [1880/2000] time 2.050 (2.029) data 0.000 (0.000) loss 0.6065 (1.2719) lr 1.8763e-03 eta 1 day, 16:39:15
epoch [14/50] batch [1900/2000] time 2.050 (2.029) data 0.000 (0.000) loss 0.1778 (1.2726) lr 1.8763e-03 eta 1 day, 16:38:31
epoch [14/50] batch [1920/2000] time 1.973 (2.029) data 0.000 (0.000) loss 0.2694 (1.2706) lr 1.8763e-03 eta 1 day, 16:37:46
epoch [14/50] batch [1940/2000] time 2.026 (2.029) data 0.000 (0.000) loss 0.0904 (1.2728) lr 1.8763e-03 eta 1 day, 16:37:06
epoch [14/50] batch [1960/2000] time 1.995 (2.029) data 0.000 (0.000) loss 2.7718 (1.2744) lr 1.8763e-03 eta 1 day, 16:36:25
epoch [14/50] batch [1980/2000] time 2.027 (2.029) data 0.000 (0.000) loss 0.8536 (1.2722) lr 1.8763e-03 eta 1 day, 16:35:44
epoch [14/50] batch [2000/2000] time 2.050 (2.029) data 0.000 (0.000) loss 1.2662 (1.2756) lr 1.8443e-03 eta 1 day, 16:35:05
epoch [15/50] batch [20/2000] time 2.054 (2.061) data 0.000 (0.026) loss 1.0667 (1.3465) lr 1.8443e-03 eta 1 day, 17:12:00
epoch [15/50] batch [40/2000] time 2.049 (2.047) data 0.000 (0.013) loss 2.7470 (1.3230) lr 1.8443e-03 eta 1 day, 16:54:26
epoch [15/50] batch [60/2000] time 1.997 (2.039) data 0.000 (0.009) loss 0.7983 (1.3314) lr 1.8443e-03 eta 1 day, 16:45:15
epoch [15/50] batch [80/2000] time 1.997 (2.038) data 0.000 (0.007) loss 1.6135 (1.2572) lr 1.8443e-03 eta 1 day, 16:42:30
epoch [15/50] batch [100/2000] time 1.999 (2.036) data 0.000 (0.005) loss 0.7364 (1.2142) lr 1.8443e-03 eta 1 day, 16:39:15
epoch [15/50] batch [120/2000] time 1.999 (2.035) data 0.000 (0.005) loss 1.2289 (1.2127) lr 1.8443e-03 eta 1 day, 16:37:23
epoch [15/50] batch [140/2000] time 2.049 (2.034) data 0.000 (0.004) loss 1.7776 (1.2464) lr 1.8443e-03 eta 1 day, 16:35:41
epoch [15/50] batch [160/2000] time 2.049 (2.033) data 0.000 (0.003) loss 1.9254 (1.2511) lr 1.8443e-03 eta 1 day, 16:34:15
epoch [15/50] batch [180/2000] time 2.052 (2.033) data 0.000 (0.003) loss 1.0476 (1.2225) lr 1.8443e-03 eta 1 day, 16:33:52
epoch [15/50] batch [200/2000] time 1.997 (2.033) data 0.000 (0.003) loss 1.7319 (1.2176) lr 1.8443e-03 eta 1 day, 16:32:53
epoch [15/50] batch [220/2000] time 2.030 (2.033) data 0.000 (0.003) loss 0.5101 (1.2297) lr 1.8443e-03 eta 1 day, 16:32:22
epoch [15/50] batch [240/2000] time 2.028 (2.033) data 0.000 (0.002) loss 1.3861 (1.2420) lr 1.8443e-03 eta 1 day, 16:31:25
epoch [15/50] batch [260/2000] time 2.052 (2.033) data 0.000 (0.002) loss 2.9016 (1.2678) lr 1.8443e-03 eta 1 day, 16:31:08
epoch [15/50] batch [280/2000] time 2.002 (2.033) data 0.000 (0.002) loss 2.7279 (1.2851) lr 1.8443e-03 eta 1 day, 16:30:04
epoch [15/50] batch [300/2000] time 2.057 (2.033) data 0.000 (0.002) loss 1.9188 (1.2734) lr 1.8443e-03 eta 1 day, 16:29:30
epoch [15/50] batch [320/2000] time 2.002 (2.033) data 0.000 (0.002) loss 3.6447 (1.2688) lr 1.8443e-03 eta 1 day, 16:28:49
epoch [15/50] batch [340/2000] time 2.004 (2.033) data 0.000 (0.002) loss 1.7459 (1.2768) lr 1.8443e-03 eta 1 day, 16:28:00
epoch [15/50] batch [360/2000] time 2.059 (2.033) data 0.000 (0.002) loss 1.2450 (1.2910) lr 1.8443e-03 eta 1 day, 16:27:46
epoch [15/50] batch [380/2000] time 2.036 (2.033) data 0.000 (0.002) loss 1.3032 (1.2911) lr 1.8443e-03 eta 1 day, 16:27:09
epoch [15/50] batch [400/2000] time 2.052 (2.033) data 0.000 (0.002) loss 1.2215 (1.3131) lr 1.8443e-03 eta 1 day, 16:26:04
epoch [15/50] batch [420/2000] time 1.998 (2.033) data 0.000 (0.001) loss 1.2704 (1.2836) lr 1.8443e-03 eta 1 day, 16:24:55
epoch [15/50] batch [440/2000] time 2.030 (2.033) data 0.000 (0.001) loss 2.1793 (1.2785) lr 1.8443e-03 eta 1 day, 16:24:05
epoch [15/50] batch [460/2000] time 2.054 (2.032) data 0.000 (0.001) loss 1.3068 (1.2894) lr 1.8443e-03 eta 1 day, 16:23:21
epoch [15/50] batch [480/2000] time 2.053 (2.032) data 0.000 (0.001) loss 2.3115 (1.2930) lr 1.8443e-03 eta 1 day, 16:21:56
epoch [15/50] batch [500/2000] time 1.974 (2.032) data 0.000 (0.001) loss 0.9638 (1.2873) lr 1.8443e-03 eta 1 day, 16:21:09
epoch [15/50] batch [520/2000] time 1.998 (2.032) data 0.000 (0.001) loss 2.3328 (1.2817) lr 1.8443e-03 eta 1 day, 16:20:26
epoch [15/50] batch [540/2000] time 2.030 (2.032) data 0.000 (0.001) loss 0.9864 (1.2912) lr 1.8443e-03 eta 1 day, 16:19:40
epoch [15/50] batch [560/2000] time 2.027 (2.032) data 0.000 (0.001) loss 0.4103 (1.2945) lr 1.8443e-03 eta 1 day, 16:19:04
epoch [15/50] batch [580/2000] time 1.975 (2.032) data 0.000 (0.001) loss 1.8780 (1.2951) lr 1.8443e-03 eta 1 day, 16:18:22
epoch [15/50] batch [600/2000] time 1.998 (2.031) data 0.001 (0.001) loss 1.3826 (1.2956) lr 1.8443e-03 eta 1 day, 16:17:28
epoch [15/50] batch [620/2000] time 2.050 (2.031) data 0.000 (0.001) loss 1.2182 (1.3027) lr 1.8443e-03 eta 1 day, 16:16:30
epoch [15/50] batch [640/2000] time 2.025 (2.031) data 0.000 (0.001) loss 0.1738 (1.3018) lr 1.8443e-03 eta 1 day, 16:15:43
epoch [15/50] batch [660/2000] time 1.996 (2.031) data 0.000 (0.001) loss 0.6210 (1.2973) lr 1.8443e-03 eta 1 day, 16:14:49
epoch [15/50] batch [680/2000] time 2.025 (2.031) data 0.000 (0.001) loss 1.5356 (1.2981) lr 1.8443e-03 eta 1 day, 16:14:12
epoch [15/50] batch [700/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.8852 (1.3008) lr 1.8443e-03 eta 1 day, 16:13:24
epoch [15/50] batch [720/2000] time 2.049 (2.031) data 0.000 (0.001) loss 1.2245 (1.3002) lr 1.8443e-03 eta 1 day, 16:12:28
epoch [15/50] batch [740/2000] time 2.027 (2.031) data 0.000 (0.001) loss 1.9988 (1.3020) lr 1.8443e-03 eta 1 day, 16:11:38
epoch [15/50] batch [760/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.6018 (1.2935) lr 1.8443e-03 eta 1 day, 16:10:44
epoch [15/50] batch [780/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.4687 (1.3025) lr 1.8443e-03 eta 1 day, 16:10:09
epoch [15/50] batch [800/2000] time 2.050 (2.030) data 0.000 (0.001) loss 2.3770 (1.3024) lr 1.8443e-03 eta 1 day, 16:09:24
epoch [15/50] batch [820/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.1027 (1.2979) lr 1.8443e-03 eta 1 day, 16:08:26
epoch [15/50] batch [840/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.9727 (1.3016) lr 1.8443e-03 eta 1 day, 16:07:47
epoch [15/50] batch [860/2000] time 2.030 (2.030) data 0.000 (0.001) loss 1.5830 (1.2953) lr 1.8443e-03 eta 1 day, 16:07:06
epoch [15/50] batch [880/2000] time 1.973 (2.030) data 0.000 (0.001) loss 1.1418 (1.2954) lr 1.8443e-03 eta 1 day, 16:06:24
epoch [15/50] batch [900/2000] time 2.029 (2.030) data 0.000 (0.001) loss 2.1677 (1.2932) lr 1.8443e-03 eta 1 day, 16:05:42
epoch [15/50] batch [920/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.1818 (1.2963) lr 1.8443e-03 eta 1 day, 16:05:07
epoch [15/50] batch [940/2000] time 1.995 (2.030) data 0.000 (0.001) loss 0.4966 (1.2977) lr 1.8443e-03 eta 1 day, 16:04:14
epoch [15/50] batch [960/2000] time 2.001 (2.030) data 0.000 (0.001) loss 2.3417 (1.2984) lr 1.8443e-03 eta 1 day, 16:03:33
epoch [15/50] batch [980/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.7208 (1.2940) lr 1.8443e-03 eta 1 day, 16:02:57
epoch [15/50] batch [1000/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.5486 (1.3022) lr 1.8443e-03 eta 1 day, 16:02:19
epoch [15/50] batch [1020/2000] time 2.025 (2.030) data 0.000 (0.001) loss 1.0005 (1.3049) lr 1.8443e-03 eta 1 day, 16:01:34
epoch [15/50] batch [1040/2000] time 2.050 (2.030) data 0.000 (0.001) loss 3.7148 (1.3017) lr 1.8443e-03 eta 1 day, 16:00:54
epoch [15/50] batch [1060/2000] time 2.052 (2.030) data 0.000 (0.001) loss 2.1083 (1.2978) lr 1.8443e-03 eta 1 day, 16:00:11
epoch [15/50] batch [1080/2000] time 2.051 (2.030) data 0.000 (0.001) loss 3.6739 (1.3006) lr 1.8443e-03 eta 1 day, 15:59:33
epoch [15/50] batch [1100/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.9974 (1.2973) lr 1.8443e-03 eta 1 day, 15:58:51
epoch [15/50] batch [1120/2000] time 2.026 (2.030) data 0.000 (0.001) loss 1.0997 (1.2975) lr 1.8443e-03 eta 1 day, 15:58:07
epoch [15/50] batch [1140/2000] time 1.999 (2.030) data 0.001 (0.001) loss 0.5965 (1.2982) lr 1.8443e-03 eta 1 day, 15:57:16
epoch [15/50] batch [1160/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.1293 (1.3002) lr 1.8443e-03 eta 1 day, 15:56:43
epoch [15/50] batch [1180/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.7693 (1.2939) lr 1.8443e-03 eta 1 day, 15:56:00
epoch [15/50] batch [1200/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.9221 (1.2892) lr 1.8443e-03 eta 1 day, 15:55:20
epoch [15/50] batch [1220/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.2824 (1.2910) lr 1.8443e-03 eta 1 day, 15:54:36
epoch [15/50] batch [1240/2000] time 2.052 (2.030) data 0.000 (0.001) loss 3.2620 (1.2892) lr 1.8443e-03 eta 1 day, 15:53:54
epoch [15/50] batch [1260/2000] time 1.975 (2.030) data 0.000 (0.001) loss 1.8968 (1.2867) lr 1.8443e-03 eta 1 day, 15:53:11
epoch [15/50] batch [1280/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.6203 (1.2867) lr 1.8443e-03 eta 1 day, 15:52:32
epoch [15/50] batch [1300/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.7505 (1.2856) lr 1.8443e-03 eta 1 day, 15:51:52
epoch [15/50] batch [1320/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.1737 (1.2810) lr 1.8443e-03 eta 1 day, 15:51:11
epoch [15/50] batch [1340/2000] time 2.025 (2.030) data 0.000 (0.001) loss 0.7106 (1.2817) lr 1.8443e-03 eta 1 day, 15:50:24
epoch [15/50] batch [1360/2000] time 2.026 (2.030) data 0.000 (0.001) loss 0.8909 (1.2846) lr 1.8443e-03 eta 1 day, 15:49:38
epoch [15/50] batch [1380/2000] time 2.026 (2.030) data 0.000 (0.001) loss 1.1227 (1.2847) lr 1.8443e-03 eta 1 day, 15:48:57
epoch [15/50] batch [1400/2000] time 2.031 (2.030) data 0.000 (0.001) loss 2.5543 (1.2834) lr 1.8443e-03 eta 1 day, 15:48:19
epoch [15/50] batch [1420/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.9408 (1.2858) lr 1.8443e-03 eta 1 day, 15:47:35
epoch [15/50] batch [1440/2000] time 2.002 (2.030) data 0.000 (0.001) loss 0.6621 (1.2836) lr 1.8443e-03 eta 1 day, 15:47:02
epoch [15/50] batch [1460/2000] time 2.029 (2.030) data 0.000 (0.001) loss 2.7386 (1.2859) lr 1.8443e-03 eta 1 day, 15:46:24
epoch [15/50] batch [1480/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.6525 (1.2867) lr 1.8443e-03 eta 1 day, 15:45:45
epoch [15/50] batch [1500/2000] time 1.975 (2.030) data 0.000 (0.001) loss 0.0601 (1.2883) lr 1.8443e-03 eta 1 day, 15:45:03
epoch [15/50] batch [1520/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.7391 (1.2869) lr 1.8443e-03 eta 1 day, 15:44:25
epoch [15/50] batch [1540/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.3445 (1.2889) lr 1.8443e-03 eta 1 day, 15:43:42
epoch [15/50] batch [1560/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.6406 (1.2911) lr 1.8443e-03 eta 1 day, 15:43:00
epoch [15/50] batch [1580/2000] time 2.001 (2.030) data 0.000 (0.001) loss 1.4091 (1.2899) lr 1.8443e-03 eta 1 day, 15:42:14
epoch [15/50] batch [1600/2000] time 2.058 (2.030) data 0.000 (0.001) loss 0.4738 (1.2872) lr 1.8443e-03 eta 1 day, 15:41:36
epoch [15/50] batch [1620/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.5261 (1.2853) lr 1.8443e-03 eta 1 day, 15:41:01
epoch [15/50] batch [1640/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.1880 (1.2827) lr 1.8443e-03 eta 1 day, 15:40:17
epoch [15/50] batch [1660/2000] time 1.973 (2.030) data 0.000 (0.001) loss 1.2329 (1.2782) lr 1.8443e-03 eta 1 day, 15:39:36
epoch [15/50] batch [1680/2000] time 2.030 (2.030) data 0.001 (0.001) loss 0.7486 (1.2759) lr 1.8443e-03 eta 1 day, 15:38:53
epoch [15/50] batch [1700/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.6424 (1.2763) lr 1.8443e-03 eta 1 day, 15:38:09
epoch [15/50] batch [1720/2000] time 2.054 (2.030) data 0.000 (0.001) loss 3.0000 (1.2780) lr 1.8443e-03 eta 1 day, 15:37:26
epoch [15/50] batch [1740/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.9786 (1.2778) lr 1.8443e-03 eta 1 day, 15:36:46
epoch [15/50] batch [1760/2000] time 1.973 (2.030) data 0.000 (0.001) loss 2.6353 (1.2753) lr 1.8443e-03 eta 1 day, 15:36:01
epoch [15/50] batch [1780/2000] time 2.000 (2.030) data 0.000 (0.001) loss 1.7563 (1.2771) lr 1.8443e-03 eta 1 day, 15:35:19
epoch [15/50] batch [1800/2000] time 2.027 (2.030) data 0.000 (0.000) loss 0.9027 (1.2774) lr 1.8443e-03 eta 1 day, 15:34:37
epoch [15/50] batch [1820/2000] time 2.049 (2.029) data 0.000 (0.000) loss 0.4118 (1.2785) lr 1.8443e-03 eta 1 day, 15:33:49
epoch [15/50] batch [1840/2000] time 2.053 (2.029) data 0.000 (0.000) loss 1.8629 (1.2830) lr 1.8443e-03 eta 1 day, 15:33:01
epoch [15/50] batch [1860/2000] time 2.001 (2.029) data 0.000 (0.000) loss 3.1908 (1.2857) lr 1.8443e-03 eta 1 day, 15:32:16
epoch [15/50] batch [1880/2000] time 2.026 (2.029) data 0.000 (0.000) loss 0.8991 (1.2839) lr 1.8443e-03 eta 1 day, 15:31:32
epoch [15/50] batch [1900/2000] time 2.028 (2.029) data 0.000 (0.000) loss 1.7812 (1.2825) lr 1.8443e-03 eta 1 day, 15:30:51
epoch [15/50] batch [1920/2000] time 2.000 (2.029) data 0.000 (0.000) loss 0.4564 (1.2803) lr 1.8443e-03 eta 1 day, 15:30:08
epoch [15/50] batch [1940/2000] time 1.997 (2.029) data 0.000 (0.000) loss 0.2756 (1.2798) lr 1.8443e-03 eta 1 day, 15:29:29
epoch [15/50] batch [1960/2000] time 2.028 (2.029) data 0.000 (0.000) loss 2.0924 (1.2813) lr 1.8443e-03 eta 1 day, 15:28:41
epoch [15/50] batch [1980/2000] time 2.051 (2.029) data 0.000 (0.000) loss 0.5271 (1.2804) lr 1.8443e-03 eta 1 day, 15:28:02
epoch [15/50] batch [2000/2000] time 2.049 (2.029) data 0.000 (0.000) loss 0.3860 (1.2801) lr 1.8090e-03 eta 1 day, 15:27:23
epoch [16/50] batch [20/2000] time 2.054 (2.063) data 0.000 (0.026) loss 0.4852 (0.8244) lr 1.8090e-03 eta 1 day, 16:05:41
epoch [16/50] batch [40/2000] time 2.049 (2.044) data 0.000 (0.013) loss 1.7289 (1.0550) lr 1.8090e-03 eta 1 day, 15:43:43
epoch [16/50] batch [60/2000] time 2.001 (2.040) data 0.001 (0.009) loss 1.4301 (1.0814) lr 1.8090e-03 eta 1 day, 15:37:58
epoch [16/50] batch [80/2000] time 1.999 (2.037) data 0.000 (0.007) loss 0.1153 (1.0453) lr 1.8090e-03 eta 1 day, 15:33:56
epoch [16/50] batch [100/2000] time 1.997 (2.036) data 0.000 (0.005) loss 2.5555 (1.1617) lr 1.8090e-03 eta 1 day, 15:31:30
epoch [16/50] batch [120/2000] time 2.028 (2.034) data 0.000 (0.005) loss 0.7793 (1.1686) lr 1.8090e-03 eta 1 day, 15:28:25
epoch [16/50] batch [140/2000] time 2.028 (2.032) data 0.000 (0.004) loss 1.2368 (1.1807) lr 1.8090e-03 eta 1 day, 15:26:06
epoch [16/50] batch [160/2000] time 2.049 (2.033) data 0.000 (0.003) loss 0.8395 (1.2171) lr 1.8090e-03 eta 1 day, 15:25:52
epoch [16/50] batch [180/2000] time 2.027 (2.033) data 0.000 (0.003) loss 1.1897 (1.2103) lr 1.8090e-03 eta 1 day, 15:25:10
epoch [16/50] batch [200/2000] time 2.028 (2.031) data 0.000 (0.003) loss 0.2871 (1.1966) lr 1.8090e-03 eta 1 day, 15:22:42
epoch [16/50] batch [220/2000] time 2.027 (2.031) data 0.000 (0.003) loss 0.3513 (1.2132) lr 1.8090e-03 eta 1 day, 15:22:10
epoch [16/50] batch [240/2000] time 2.030 (2.031) data 0.000 (0.002) loss 1.9269 (1.2266) lr 1.8090e-03 eta 1 day, 15:21:26
epoch [16/50] batch [260/2000] time 2.052 (2.031) data 0.000 (0.002) loss 1.9673 (1.2051) lr 1.8090e-03 eta 1 day, 15:20:42
epoch [16/50] batch [280/2000] time 2.051 (2.031) data 0.000 (0.002) loss 2.4892 (1.1953) lr 1.8090e-03 eta 1 day, 15:20:13
epoch [16/50] batch [300/2000] time 2.026 (2.031) data 0.000 (0.002) loss 0.4626 (1.2242) lr 1.8090e-03 eta 1 day, 15:19:44
epoch [16/50] batch [320/2000] time 2.050 (2.031) data 0.000 (0.002) loss 1.1831 (1.2069) lr 1.8090e-03 eta 1 day, 15:18:45
epoch [16/50] batch [340/2000] time 1.996 (2.031) data 0.000 (0.002) loss 2.3534 (1.2041) lr 1.8090e-03 eta 1 day, 15:17:47
epoch [16/50] batch [360/2000] time 2.027 (2.031) data 0.000 (0.002) loss 1.5561 (1.2044) lr 1.8090e-03 eta 1 day, 15:17:00
epoch [16/50] batch [380/2000] time 2.051 (2.031) data 0.000 (0.002) loss 1.1884 (1.2276) lr 1.8090e-03 eta 1 day, 15:16:20
epoch [16/50] batch [400/2000] time 2.055 (2.031) data 0.000 (0.002) loss 0.9039 (1.2335) lr 1.8090e-03 eta 1 day, 15:15:43
epoch [16/50] batch [420/2000] time 2.030 (2.030) data 0.000 (0.001) loss 1.3831 (1.2449) lr 1.8090e-03 eta 1 day, 15:14:34
epoch [16/50] batch [440/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.8085 (1.2403) lr 1.8090e-03 eta 1 day, 15:14:00
epoch [16/50] batch [460/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.6811 (1.2539) lr 1.8090e-03 eta 1 day, 15:12:50
epoch [16/50] batch [480/2000] time 2.058 (2.030) data 0.000 (0.001) loss 1.1137 (1.2450) lr 1.8090e-03 eta 1 day, 15:11:58
epoch [16/50] batch [500/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.4361 (1.2510) lr 1.8090e-03 eta 1 day, 15:10:57
epoch [16/50] batch [520/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.4469 (1.2644) lr 1.8090e-03 eta 1 day, 15:10:18
epoch [16/50] batch [540/2000] time 2.052 (2.030) data 0.000 (0.001) loss 3.7172 (1.2700) lr 1.8090e-03 eta 1 day, 15:09:32
epoch [16/50] batch [560/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.6022 (1.2743) lr 1.8090e-03 eta 1 day, 15:08:33
epoch [16/50] batch [580/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.8598 (1.2777) lr 1.8090e-03 eta 1 day, 15:07:40
epoch [16/50] batch [600/2000] time 2.031 (2.029) data 0.001 (0.001) loss 1.7203 (1.2841) lr 1.8090e-03 eta 1 day, 15:07:12
epoch [16/50] batch [620/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.6016 (1.2724) lr 1.8090e-03 eta 1 day, 15:06:31
epoch [16/50] batch [640/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.3429 (1.2619) lr 1.8090e-03 eta 1 day, 15:05:55
epoch [16/50] batch [660/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.4658 (1.2700) lr 1.8090e-03 eta 1 day, 15:05:03
epoch [16/50] batch [680/2000] time 2.029 (2.029) data 0.000 (0.001) loss 3.4298 (1.2644) lr 1.8090e-03 eta 1 day, 15:04:16
epoch [16/50] batch [700/2000] time 1.975 (2.029) data 0.000 (0.001) loss 2.2371 (1.2650) lr 1.8090e-03 eta 1 day, 15:03:25
epoch [16/50] batch [720/2000] time 2.056 (2.029) data 0.000 (0.001) loss 1.8851 (1.2711) lr 1.8090e-03 eta 1 day, 15:02:57
epoch [16/50] batch [740/2000] time 2.006 (2.029) data 0.000 (0.001) loss 2.6911 (1.2682) lr 1.8090e-03 eta 1 day, 15:02:17
epoch [16/50] batch [760/2000] time 2.032 (2.029) data 0.000 (0.001) loss 2.5227 (1.2760) lr 1.8090e-03 eta 1 day, 15:01:43
epoch [16/50] batch [780/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.8750 (1.2690) lr 1.8090e-03 eta 1 day, 15:01:13
epoch [16/50] batch [800/2000] time 1.996 (2.029) data 0.000 (0.001) loss 2.6344 (1.2766) lr 1.8090e-03 eta 1 day, 15:00:23
epoch [16/50] batch [820/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.1857 (1.2747) lr 1.8090e-03 eta 1 day, 14:59:38
epoch [16/50] batch [840/2000] time 2.002 (2.029) data 0.000 (0.001) loss 0.5595 (1.2740) lr 1.8090e-03 eta 1 day, 14:58:54
epoch [16/50] batch [860/2000] time 2.051 (2.029) data 0.000 (0.001) loss 2.7845 (1.2748) lr 1.8090e-03 eta 1 day, 14:58:17
epoch [16/50] batch [880/2000] time 1.994 (2.029) data 0.000 (0.001) loss 1.1725 (1.2750) lr 1.8090e-03 eta 1 day, 14:57:34
epoch [16/50] batch [900/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.7167 (1.2793) lr 1.8090e-03 eta 1 day, 14:56:52
epoch [16/50] batch [920/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.0690 (1.2743) lr 1.8090e-03 eta 1 day, 14:56:11
epoch [16/50] batch [940/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.1669 (1.2743) lr 1.8090e-03 eta 1 day, 14:55:30
epoch [16/50] batch [960/2000] time 2.051 (2.029) data 0.000 (0.001) loss 2.5902 (1.2793) lr 1.8090e-03 eta 1 day, 14:54:51
epoch [16/50] batch [980/2000] time 2.053 (2.029) data 0.000 (0.001) loss 2.3597 (1.2785) lr 1.8090e-03 eta 1 day, 14:54:13
epoch [16/50] batch [1000/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.7222 (1.2798) lr 1.8090e-03 eta 1 day, 14:53:40
epoch [16/50] batch [1020/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.2600 (1.2827) lr 1.8090e-03 eta 1 day, 14:53:04
epoch [16/50] batch [1040/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.6087 (1.2901) lr 1.8090e-03 eta 1 day, 14:52:21
epoch [16/50] batch [1060/2000] time 2.056 (2.029) data 0.000 (0.001) loss 0.3454 (1.2878) lr 1.8090e-03 eta 1 day, 14:51:39
epoch [16/50] batch [1080/2000] time 2.005 (2.029) data 0.000 (0.001) loss 1.8159 (1.2776) lr 1.8090e-03 eta 1 day, 14:51:03
epoch [16/50] batch [1100/2000] time 2.001 (2.030) data 0.000 (0.001) loss 1.4083 (1.2767) lr 1.8090e-03 eta 1 day, 14:50:33
epoch [16/50] batch [1120/2000] time 2.030 (2.030) data 0.000 (0.001) loss 1.3251 (1.2753) lr 1.8090e-03 eta 1 day, 14:49:57
epoch [16/50] batch [1140/2000] time 2.001 (2.030) data 0.001 (0.001) loss 1.9078 (1.2781) lr 1.8090e-03 eta 1 day, 14:49:16
epoch [16/50] batch [1160/2000] time 1.976 (2.030) data 0.000 (0.001) loss 0.2061 (1.2764) lr 1.8090e-03 eta 1 day, 14:48:39
epoch [16/50] batch [1180/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.6412 (1.2711) lr 1.8090e-03 eta 1 day, 14:47:49
epoch [16/50] batch [1200/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.1748 (1.2632) lr 1.8090e-03 eta 1 day, 14:47:05
epoch [16/50] batch [1220/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.4783 (1.2600) lr 1.8090e-03 eta 1 day, 14:46:15
epoch [16/50] batch [1240/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.1923 (1.2621) lr 1.8090e-03 eta 1 day, 14:45:35
epoch [16/50] batch [1260/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.3169 (1.2609) lr 1.8090e-03 eta 1 day, 14:44:50
epoch [16/50] batch [1280/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.1834 (1.2577) lr 1.8090e-03 eta 1 day, 14:44:09
epoch [16/50] batch [1300/2000] time 1.998 (2.029) data 0.000 (0.001) loss 2.2112 (1.2554) lr 1.8090e-03 eta 1 day, 14:43:24
epoch [16/50] batch [1320/2000] time 2.033 (2.029) data 0.000 (0.001) loss 0.6555 (1.2586) lr 1.8090e-03 eta 1 day, 14:42:35
epoch [16/50] batch [1340/2000] time 2.057 (2.029) data 0.000 (0.001) loss 1.6992 (1.2573) lr 1.8090e-03 eta 1 day, 14:42:02
epoch [16/50] batch [1360/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.5316 (1.2561) lr 1.8090e-03 eta 1 day, 14:41:20
epoch [16/50] batch [1380/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.1380 (1.2532) lr 1.8090e-03 eta 1 day, 14:40:42
epoch [16/50] batch [1400/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.6834 (1.2512) lr 1.8090e-03 eta 1 day, 14:40:07
epoch [16/50] batch [1420/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.9786 (1.2491) lr 1.8090e-03 eta 1 day, 14:39:26
epoch [16/50] batch [1440/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.8230 (1.2517) lr 1.8090e-03 eta 1 day, 14:38:40
epoch [16/50] batch [1460/2000] time 2.049 (2.029) data 0.000 (0.001) loss 2.0410 (1.2529) lr 1.8090e-03 eta 1 day, 14:38:04
epoch [16/50] batch [1480/2000] time 2.053 (2.029) data 0.000 (0.001) loss 2.9011 (1.2528) lr 1.8090e-03 eta 1 day, 14:37:22
epoch [16/50] batch [1500/2000] time 2.003 (2.029) data 0.000 (0.001) loss 0.9974 (1.2543) lr 1.8090e-03 eta 1 day, 14:36:43
epoch [16/50] batch [1520/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.9220 (1.2512) lr 1.8090e-03 eta 1 day, 14:36:03
epoch [16/50] batch [1540/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.9627 (1.2549) lr 1.8090e-03 eta 1 day, 14:35:31
epoch [16/50] batch [1560/2000] time 2.055 (2.029) data 0.000 (0.001) loss 2.3884 (1.2516) lr 1.8090e-03 eta 1 day, 14:34:54
epoch [16/50] batch [1580/2000] time 2.001 (2.029) data 0.000 (0.001) loss 2.1357 (1.2548) lr 1.8090e-03 eta 1 day, 14:34:10
epoch [16/50] batch [1600/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.2049 (1.2560) lr 1.8090e-03 eta 1 day, 14:33:24
epoch [16/50] batch [1620/2000] time 2.001 (2.029) data 0.000 (0.001) loss 0.2734 (1.2559) lr 1.8090e-03 eta 1 day, 14:32:48
epoch [16/50] batch [1640/2000] time 1.999 (2.029) data 0.000 (0.001) loss 1.5474 (1.2542) lr 1.8090e-03 eta 1 day, 14:32:03
epoch [16/50] batch [1660/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.8238 (1.2546) lr 1.8090e-03 eta 1 day, 14:31:24
epoch [16/50] batch [1680/2000] time 1.999 (2.029) data 0.001 (0.001) loss 0.3570 (1.2500) lr 1.8090e-03 eta 1 day, 14:30:50
epoch [16/50] batch [1700/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.6701 (1.2527) lr 1.8090e-03 eta 1 day, 14:30:10
epoch [16/50] batch [1720/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.2810 (1.2519) lr 1.8090e-03 eta 1 day, 14:29:28
epoch [16/50] batch [1740/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.8320 (1.2531) lr 1.8090e-03 eta 1 day, 14:28:52
epoch [16/50] batch [1760/2000] time 2.055 (2.029) data 0.000 (0.001) loss 2.2522 (1.2491) lr 1.8090e-03 eta 1 day, 14:28:09
epoch [16/50] batch [1780/2000] time 2.028 (2.029) data 0.000 (0.001) loss 2.4986 (1.2526) lr 1.8090e-03 eta 1 day, 14:27:30
epoch [16/50] batch [1800/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.2982 (1.2488) lr 1.8090e-03 eta 1 day, 14:26:55
epoch [16/50] batch [1820/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.4030 (1.2486) lr 1.8090e-03 eta 1 day, 14:26:19
epoch [16/50] batch [1840/2000] time 2.050 (2.030) data 0.000 (0.000) loss 0.9363 (1.2478) lr 1.8090e-03 eta 1 day, 14:25:42
epoch [16/50] batch [1860/2000] time 2.048 (2.030) data 0.000 (0.000) loss 0.5200 (1.2475) lr 1.8090e-03 eta 1 day, 14:25:00
epoch [16/50] batch [1880/2000] time 2.053 (2.030) data 0.000 (0.000) loss 0.3327 (1.2502) lr 1.8090e-03 eta 1 day, 14:24:15
epoch [16/50] batch [1900/2000] time 1.997 (2.030) data 0.000 (0.000) loss 1.7337 (1.2482) lr 1.8090e-03 eta 1 day, 14:23:35
epoch [16/50] batch [1920/2000] time 2.030 (2.030) data 0.000 (0.000) loss 0.6861 (1.2499) lr 1.8090e-03 eta 1 day, 14:22:53
epoch [16/50] batch [1940/2000] time 2.051 (2.030) data 0.000 (0.000) loss 2.2510 (1.2499) lr 1.8090e-03 eta 1 day, 14:22:18
epoch [16/50] batch [1960/2000] time 2.051 (2.030) data 0.000 (0.000) loss 0.6749 (1.2534) lr 1.8090e-03 eta 1 day, 14:21:34
epoch [16/50] batch [1980/2000] time 1.997 (2.030) data 0.000 (0.000) loss 1.4206 (1.2532) lr 1.8090e-03 eta 1 day, 14:20:54
epoch [16/50] batch [2000/2000] time 2.047 (2.030) data 0.000 (0.000) loss 1.1344 (1.2499) lr 1.7705e-03 eta 1 day, 14:20:12
epoch [17/50] batch [20/2000] time 2.053 (2.064) data 0.000 (0.026) loss 1.6704 (1.1072) lr 1.7705e-03 eta 1 day, 14:58:05
epoch [17/50] batch [40/2000] time 2.052 (2.043) data 0.000 (0.013) loss 2.1453 (1.3343) lr 1.7705e-03 eta 1 day, 14:34:04
epoch [17/50] batch [60/2000] time 2.000 (2.039) data 0.000 (0.009) loss 0.0362 (1.2757) lr 1.7705e-03 eta 1 day, 14:29:12
epoch [17/50] batch [80/2000] time 2.030 (2.035) data 0.000 (0.007) loss 1.8974 (1.3537) lr 1.7705e-03 eta 1 day, 14:23:32
epoch [17/50] batch [100/2000] time 2.054 (2.033) data 0.000 (0.005) loss 1.9101 (1.3711) lr 1.7705e-03 eta 1 day, 14:20:28
epoch [17/50] batch [120/2000] time 2.057 (2.033) data 0.000 (0.005) loss 0.7768 (1.3389) lr 1.7705e-03 eta 1 day, 14:20:08
epoch [17/50] batch [140/2000] time 2.058 (2.033) data 0.000 (0.004) loss 1.7750 (1.2958) lr 1.7705e-03 eta 1 day, 14:18:53
epoch [17/50] batch [160/2000] time 2.033 (2.033) data 0.000 (0.003) loss 2.3063 (1.2939) lr 1.7705e-03 eta 1 day, 14:18:56
epoch [17/50] batch [180/2000] time 1.977 (2.033) data 0.000 (0.003) loss 1.4203 (1.3187) lr 1.7705e-03 eta 1 day, 14:18:30
epoch [17/50] batch [200/2000] time 2.032 (2.033) data 0.000 (0.003) loss 0.3804 (1.3111) lr 1.7705e-03 eta 1 day, 14:17:31
epoch [17/50] batch [220/2000] time 2.029 (2.033) data 0.000 (0.003) loss 1.0780 (1.3084) lr 1.7705e-03 eta 1 day, 14:16:37
epoch [17/50] batch [240/2000] time 2.027 (2.032) data 0.000 (0.002) loss 2.5400 (1.3053) lr 1.7705e-03 eta 1 day, 14:15:08
epoch [17/50] batch [260/2000] time 2.029 (2.032) data 0.000 (0.002) loss 0.9188 (1.2954) lr 1.7705e-03 eta 1 day, 14:14:20
epoch [17/50] batch [280/2000] time 2.052 (2.032) data 0.000 (0.002) loss 2.0277 (1.2633) lr 1.7705e-03 eta 1 day, 14:13:24
epoch [17/50] batch [300/2000] time 2.027 (2.032) data 0.000 (0.002) loss 0.6937 (1.2652) lr 1.7705e-03 eta 1 day, 14:12:18
epoch [17/50] batch [320/2000] time 1.998 (2.031) data 0.000 (0.002) loss 0.8825 (1.2699) lr 1.7705e-03 eta 1 day, 14:11:11
epoch [17/50] batch [340/2000] time 2.056 (2.031) data 0.000 (0.002) loss 0.1606 (1.2707) lr 1.7705e-03 eta 1 day, 14:10:34
epoch [17/50] batch [360/2000] time 2.000 (2.031) data 0.000 (0.002) loss 1.1950 (1.2742) lr 1.7705e-03 eta 1 day, 14:09:37
epoch [17/50] batch [380/2000] time 2.031 (2.031) data 0.000 (0.002) loss 1.1197 (1.2858) lr 1.7705e-03 eta 1 day, 14:08:54
epoch [17/50] batch [400/2000] time 1.998 (2.031) data 0.000 (0.002) loss 1.4464 (1.2791) lr 1.7705e-03 eta 1 day, 14:07:52
epoch [17/50] batch [420/2000] time 1.995 (2.031) data 0.000 (0.001) loss 0.8331 (1.2829) lr 1.7705e-03 eta 1 day, 14:07:06
epoch [17/50] batch [440/2000] time 2.028 (2.031) data 0.000 (0.001) loss 1.2083 (1.2768) lr 1.7705e-03 eta 1 day, 14:06:36
epoch [17/50] batch [460/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.9023 (1.2755) lr 1.7705e-03 eta 1 day, 14:05:39
epoch [17/50] batch [480/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.7561 (1.2705) lr 1.7705e-03 eta 1 day, 14:04:50
epoch [17/50] batch [500/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.2584 (1.2595) lr 1.7705e-03 eta 1 day, 14:03:53
epoch [17/50] batch [520/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.4000 (1.2543) lr 1.7705e-03 eta 1 day, 14:03:20
epoch [17/50] batch [540/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.8116 (1.2554) lr 1.7705e-03 eta 1 day, 14:02:27
epoch [17/50] batch [560/2000] time 2.052 (2.030) data 0.000 (0.001) loss 3.9790 (1.2537) lr 1.7705e-03 eta 1 day, 14:01:42
epoch [17/50] batch [580/2000] time 1.998 (2.030) data 0.000 (0.001) loss 2.2263 (1.2504) lr 1.7705e-03 eta 1 day, 14:01:02
epoch [17/50] batch [600/2000] time 2.027 (2.030) data 0.001 (0.001) loss 1.3019 (1.2536) lr 1.7705e-03 eta 1 day, 14:00:22
epoch [17/50] batch [620/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.8206 (1.2523) lr 1.7705e-03 eta 1 day, 13:59:39
epoch [17/50] batch [640/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.4479 (1.2498) lr 1.7705e-03 eta 1 day, 13:58:55
epoch [17/50] batch [660/2000] time 2.027 (2.030) data 0.000 (0.001) loss 2.3387 (1.2550) lr 1.7705e-03 eta 1 day, 13:58:03
epoch [17/50] batch [680/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.3086 (1.2433) lr 1.7705e-03 eta 1 day, 13:57:16
epoch [17/50] batch [700/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.1582 (1.2447) lr 1.7705e-03 eta 1 day, 13:56:32
epoch [17/50] batch [720/2000] time 1.997 (2.030) data 0.000 (0.001) loss 2.4893 (1.2525) lr 1.7705e-03 eta 1 day, 13:55:45
epoch [17/50] batch [740/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.4811 (1.2516) lr 1.7705e-03 eta 1 day, 13:55:05
epoch [17/50] batch [760/2000] time 2.051 (2.030) data 0.000 (0.001) loss 2.4335 (1.2738) lr 1.7705e-03 eta 1 day, 13:54:33
epoch [17/50] batch [780/2000] time 2.054 (2.030) data 0.000 (0.001) loss 2.3639 (1.2813) lr 1.7705e-03 eta 1 day, 13:53:54
epoch [17/50] batch [800/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.1485 (1.2800) lr 1.7705e-03 eta 1 day, 13:53:20
epoch [17/50] batch [820/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.2388 (1.2748) lr 1.7705e-03 eta 1 day, 13:52:34
epoch [17/50] batch [840/2000] time 2.025 (2.030) data 0.000 (0.001) loss 1.0670 (1.2739) lr 1.7705e-03 eta 1 day, 13:51:52
epoch [17/50] batch [860/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.6120 (1.2665) lr 1.7705e-03 eta 1 day, 13:51:08
epoch [17/50] batch [880/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.4335 (1.2638) lr 1.7705e-03 eta 1 day, 13:50:27
epoch [17/50] batch [900/2000] time 1.995 (2.030) data 0.000 (0.001) loss 0.8218 (1.2648) lr 1.7705e-03 eta 1 day, 13:49:46
epoch [17/50] batch [920/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.4244 (1.2607) lr 1.7705e-03 eta 1 day, 13:49:08
epoch [17/50] batch [940/2000] time 1.995 (2.030) data 0.000 (0.001) loss 2.0440 (1.2566) lr 1.7705e-03 eta 1 day, 13:48:26
epoch [17/50] batch [960/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.1905 (1.2582) lr 1.7705e-03 eta 1 day, 13:47:52
epoch [17/50] batch [980/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.6446 (1.2565) lr 1.7705e-03 eta 1 day, 13:47:11
epoch [17/50] batch [1000/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.5874 (1.2585) lr 1.7705e-03 eta 1 day, 13:46:16
epoch [17/50] batch [1020/2000] time 1.975 (2.029) data 0.000 (0.001) loss 1.1403 (1.2641) lr 1.7705e-03 eta 1 day, 13:45:31
epoch [17/50] batch [1040/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.8636 (1.2638) lr 1.7705e-03 eta 1 day, 13:44:50
epoch [17/50] batch [1060/2000] time 2.030 (2.029) data 0.000 (0.001) loss 1.6219 (1.2618) lr 1.7705e-03 eta 1 day, 13:44:05
epoch [17/50] batch [1080/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.3109 (1.2556) lr 1.7705e-03 eta 1 day, 13:43:23
epoch [17/50] batch [1100/2000] time 1.999 (2.029) data 0.000 (0.001) loss 1.6004 (1.2587) lr 1.7705e-03 eta 1 day, 13:42:41
epoch [17/50] batch [1120/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.0913 (1.2559) lr 1.7705e-03 eta 1 day, 13:41:59
epoch [17/50] batch [1140/2000] time 2.000 (2.029) data 0.001 (0.001) loss 1.4063 (1.2524) lr 1.7705e-03 eta 1 day, 13:41:13
epoch [17/50] batch [1160/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.6734 (1.2569) lr 1.7705e-03 eta 1 day, 13:40:33
epoch [17/50] batch [1180/2000] time 2.052 (2.029) data 0.000 (0.001) loss 2.9801 (1.2595) lr 1.7705e-03 eta 1 day, 13:39:48
epoch [17/50] batch [1200/2000] time 2.029 (2.029) data 0.000 (0.001) loss 2.5846 (1.2576) lr 1.7705e-03 eta 1 day, 13:39:12
epoch [17/50] batch [1220/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.2472 (1.2608) lr 1.7705e-03 eta 1 day, 13:38:30
epoch [17/50] batch [1240/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.2399 (1.2618) lr 1.7705e-03 eta 1 day, 13:37:55
epoch [17/50] batch [1260/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.8721 (1.2599) lr 1.7705e-03 eta 1 day, 13:37:16
epoch [17/50] batch [1280/2000] time 1.996 (2.029) data 0.000 (0.001) loss 2.6552 (1.2639) lr 1.7705e-03 eta 1 day, 13:36:35
epoch [17/50] batch [1300/2000] time 2.052 (2.029) data 0.000 (0.001) loss 2.1206 (1.2677) lr 1.7705e-03 eta 1 day, 13:35:43
epoch [17/50] batch [1320/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.2714 (1.2643) lr 1.7705e-03 eta 1 day, 13:35:05
epoch [17/50] batch [1340/2000] time 2.053 (2.029) data 0.000 (0.001) loss 2.3575 (1.2618) lr 1.7705e-03 eta 1 day, 13:34:26
epoch [17/50] batch [1360/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.2658 (1.2628) lr 1.7705e-03 eta 1 day, 13:33:40
epoch [17/50] batch [1380/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.6501 (1.2638) lr 1.7705e-03 eta 1 day, 13:33:06
epoch [17/50] batch [1400/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.5182 (1.2579) lr 1.7705e-03 eta 1 day, 13:32:29
epoch [17/50] batch [1420/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.4555 (1.2552) lr 1.7705e-03 eta 1 day, 13:31:46
epoch [17/50] batch [1440/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.6954 (1.2556) lr 1.7705e-03 eta 1 day, 13:31:05
epoch [17/50] batch [1460/2000] time 1.995 (2.029) data 0.000 (0.001) loss 4.8957 (1.2562) lr 1.7705e-03 eta 1 day, 13:30:18
epoch [17/50] batch [1480/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.8687 (1.2558) lr 1.7705e-03 eta 1 day, 13:29:32
epoch [17/50] batch [1500/2000] time 2.030 (2.029) data 0.000 (0.001) loss 1.6411 (1.2551) lr 1.7705e-03 eta 1 day, 13:28:55
epoch [17/50] batch [1520/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.1481 (1.2562) lr 1.7705e-03 eta 1 day, 13:28:21
epoch [17/50] batch [1540/2000] time 1.995 (2.029) data 0.000 (0.001) loss 1.3627 (1.2591) lr 1.7705e-03 eta 1 day, 13:27:34
epoch [17/50] batch [1560/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.7461 (1.2565) lr 1.7705e-03 eta 1 day, 13:26:48
epoch [17/50] batch [1580/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.1667 (1.2588) lr 1.7705e-03 eta 1 day, 13:26:08
epoch [17/50] batch [1600/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.5474 (1.2566) lr 1.7705e-03 eta 1 day, 13:25:25
epoch [17/50] batch [1620/2000] time 1.996 (2.029) data 0.000 (0.001) loss 2.1174 (1.2576) lr 1.7705e-03 eta 1 day, 13:24:39
epoch [17/50] batch [1640/2000] time 2.048 (2.029) data 0.000 (0.001) loss 2.2309 (1.2575) lr 1.7705e-03 eta 1 day, 13:23:55
epoch [17/50] batch [1660/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.0625 (1.2616) lr 1.7705e-03 eta 1 day, 13:23:18
epoch [17/50] batch [1680/2000] time 2.030 (2.029) data 0.001 (0.001) loss 0.2014 (1.2635) lr 1.7705e-03 eta 1 day, 13:22:35
epoch [17/50] batch [1700/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.5006 (1.2655) lr 1.7705e-03 eta 1 day, 13:21:54
epoch [17/50] batch [1720/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.6864 (1.2646) lr 1.7705e-03 eta 1 day, 13:21:22
epoch [17/50] batch [1740/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.1469 (1.2666) lr 1.7705e-03 eta 1 day, 13:20:40
epoch [17/50] batch [1760/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.9827 (1.2686) lr 1.7705e-03 eta 1 day, 13:20:01
epoch [17/50] batch [1780/2000] time 1.999 (2.029) data 0.000 (0.000) loss 1.3673 (1.2665) lr 1.7705e-03 eta 1 day, 13:19:17
epoch [17/50] batch [1800/2000] time 2.001 (2.029) data 0.000 (0.000) loss 0.1172 (1.2651) lr 1.7705e-03 eta 1 day, 13:18:35
epoch [17/50] batch [1820/2000] time 1.998 (2.029) data 0.000 (0.000) loss 1.3232 (1.2640) lr 1.7705e-03 eta 1 day, 13:17:48
epoch [17/50] batch [1840/2000] time 2.000 (2.029) data 0.000 (0.000) loss 0.8471 (1.2664) lr 1.7705e-03 eta 1 day, 13:17:10
epoch [17/50] batch [1860/2000] time 2.049 (2.029) data 0.000 (0.000) loss 0.4097 (1.2689) lr 1.7705e-03 eta 1 day, 13:16:30
epoch [17/50] batch [1880/2000] time 2.050 (2.029) data 0.000 (0.000) loss 0.6137 (1.2684) lr 1.7705e-03 eta 1 day, 13:15:53
epoch [17/50] batch [1900/2000] time 1.994 (2.029) data 0.000 (0.000) loss 2.2433 (1.2661) lr 1.7705e-03 eta 1 day, 13:15:09
epoch [17/50] batch [1920/2000] time 2.050 (2.029) data 0.000 (0.000) loss 1.7265 (1.2658) lr 1.7705e-03 eta 1 day, 13:14:26
epoch [17/50] batch [1940/2000] time 2.026 (2.029) data 0.000 (0.000) loss 0.8217 (1.2666) lr 1.7705e-03 eta 1 day, 13:13:45
epoch [17/50] batch [1960/2000] time 2.052 (2.029) data 0.000 (0.000) loss 0.6415 (1.2621) lr 1.7705e-03 eta 1 day, 13:13:04
epoch [17/50] batch [1980/2000] time 2.052 (2.029) data 0.000 (0.000) loss 0.4414 (1.2585) lr 1.7705e-03 eta 1 day, 13:12:24
epoch [17/50] batch [2000/2000] time 2.050 (2.029) data 0.000 (0.000) loss 1.8099 (1.2622) lr 1.7290e-03 eta 1 day, 13:11:44
epoch [18/50] batch [20/2000] time 2.023 (2.055) data 0.000 (0.028) loss 0.8352 (1.6352) lr 1.7290e-03 eta 1 day, 13:40:19
epoch [18/50] batch [40/2000] time 1.970 (2.041) data 0.000 (0.014) loss 0.5196 (1.5307) lr 1.7290e-03 eta 1 day, 13:23:39
epoch [18/50] batch [60/2000] time 2.047 (2.036) data 0.000 (0.009) loss 1.5208 (1.3967) lr 1.7290e-03 eta 1 day, 13:17:09
epoch [18/50] batch [80/2000] time 1.971 (2.034) data 0.000 (0.007) loss 0.4098 (1.4361) lr 1.7290e-03 eta 1 day, 13:14:40
epoch [18/50] batch [100/2000] time 2.048 (2.032) data 0.000 (0.006) loss 0.5105 (1.4488) lr 1.7290e-03 eta 1 day, 13:12:12
epoch [18/50] batch [120/2000] time 2.049 (2.031) data 0.000 (0.005) loss 0.1015 (1.3901) lr 1.7290e-03 eta 1 day, 13:09:49
epoch [18/50] batch [140/2000] time 2.050 (2.031) data 0.000 (0.004) loss 1.3845 (1.3676) lr 1.7290e-03 eta 1 day, 13:09:51
epoch [18/50] batch [160/2000] time 1.994 (2.030) data 0.000 (0.004) loss 1.1559 (1.3446) lr 1.7290e-03 eta 1 day, 13:07:37
epoch [18/50] batch [180/2000] time 1.998 (2.029) data 0.000 (0.003) loss 0.7575 (1.3825) lr 1.7290e-03 eta 1 day, 13:06:12
epoch [18/50] batch [200/2000] time 2.057 (2.030) data 0.000 (0.003) loss 3.2675 (1.3779) lr 1.7290e-03 eta 1 day, 13:05:42
epoch [18/50] batch [220/2000] time 1.976 (2.029) data 0.000 (0.003) loss 1.7614 (1.3577) lr 1.7290e-03 eta 1 day, 13:04:21
epoch [18/50] batch [240/2000] time 2.029 (2.029) data 0.000 (0.002) loss 0.9892 (1.3535) lr 1.7290e-03 eta 1 day, 13:03:29
epoch [18/50] batch [260/2000] time 1.999 (2.028) data 0.000 (0.002) loss 1.8503 (1.3612) lr 1.7290e-03 eta 1 day, 13:02:24
epoch [18/50] batch [280/2000] time 2.032 (2.028) data 0.000 (0.002) loss 3.8691 (1.3761) lr 1.7290e-03 eta 1 day, 13:01:12
epoch [18/50] batch [300/2000] time 2.055 (2.028) data 0.000 (0.002) loss 1.2920 (1.3856) lr 1.7290e-03 eta 1 day, 13:01:04
epoch [18/50] batch [320/2000] time 2.027 (2.028) data 0.000 (0.002) loss 0.3805 (1.3672) lr 1.7290e-03 eta 1 day, 13:00:28
epoch [18/50] batch [340/2000] time 2.031 (2.029) data 0.000 (0.002) loss 1.5615 (1.3822) lr 1.7290e-03 eta 1 day, 13:00:08
epoch [18/50] batch [360/2000] time 2.048 (2.029) data 0.000 (0.002) loss 0.0860 (1.3761) lr 1.7290e-03 eta 1 day, 12:59:25
epoch [18/50] batch [380/2000] time 2.026 (2.029) data 0.000 (0.002) loss 1.1734 (1.3597) lr 1.7290e-03 eta 1 day, 12:58:37
epoch [18/50] batch [400/2000] time 2.056 (2.028) data 0.000 (0.002) loss 2.8686 (1.3446) lr 1.7290e-03 eta 1 day, 12:57:46
epoch [18/50] batch [420/2000] time 2.029 (2.029) data 0.000 (0.002) loss 0.5735 (1.3423) lr 1.7290e-03 eta 1 day, 12:57:26
epoch [18/50] batch [440/2000] time 2.028 (2.028) data 0.000 (0.001) loss 2.6449 (1.3533) lr 1.7290e-03 eta 1 day, 12:56:20
epoch [18/50] batch [460/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.7973 (1.3414) lr 1.7290e-03 eta 1 day, 12:55:26
epoch [18/50] batch [480/2000] time 1.996 (2.028) data 0.000 (0.001) loss 2.4432 (1.3347) lr 1.7290e-03 eta 1 day, 12:54:34
epoch [18/50] batch [500/2000] time 2.052 (2.028) data 0.000 (0.001) loss 4.2112 (1.3361) lr 1.7290e-03 eta 1 day, 12:54:04
epoch [18/50] batch [520/2000] time 2.031 (2.028) data 0.000 (0.001) loss 0.2439 (1.3307) lr 1.7290e-03 eta 1 day, 12:53:34
epoch [18/50] batch [540/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.6728 (1.3227) lr 1.7290e-03 eta 1 day, 12:53:00
epoch [18/50] batch [560/2000] time 2.028 (2.029) data 0.000 (0.001) loss 2.0229 (1.3241) lr 1.7290e-03 eta 1 day, 12:52:30
epoch [18/50] batch [580/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.5503 (1.3185) lr 1.7290e-03 eta 1 day, 12:51:30
epoch [18/50] batch [600/2000] time 2.050 (2.028) data 0.001 (0.001) loss 0.2466 (1.3095) lr 1.7290e-03 eta 1 day, 12:50:48
epoch [18/50] batch [620/2000] time 1.997 (2.028) data 0.000 (0.001) loss 0.3899 (1.3005) lr 1.7290e-03 eta 1 day, 12:50:07
epoch [18/50] batch [640/2000] time 2.030 (2.028) data 0.000 (0.001) loss 1.0892 (1.3089) lr 1.7290e-03 eta 1 day, 12:49:23
epoch [18/50] batch [660/2000] time 2.052 (2.028) data 0.000 (0.001) loss 2.0791 (1.3179) lr 1.7290e-03 eta 1 day, 12:48:51
epoch [18/50] batch [680/2000] time 2.053 (2.028) data 0.000 (0.001) loss 0.9597 (1.3111) lr 1.7290e-03 eta 1 day, 12:48:04
epoch [18/50] batch [700/2000] time 1.997 (2.028) data 0.000 (0.001) loss 2.8551 (1.3046) lr 1.7290e-03 eta 1 day, 12:47:32
epoch [18/50] batch [720/2000] time 2.052 (2.028) data 0.000 (0.001) loss 2.0105 (1.3035) lr 1.7290e-03 eta 1 day, 12:46:51
epoch [18/50] batch [740/2000] time 2.029 (2.028) data 0.000 (0.001) loss 2.0650 (1.2996) lr 1.7290e-03 eta 1 day, 12:46:07
epoch [18/50] batch [760/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.2002 (1.2938) lr 1.7290e-03 eta 1 day, 12:45:32
epoch [18/50] batch [780/2000] time 1.996 (2.028) data 0.000 (0.001) loss 1.8391 (1.2914) lr 1.7290e-03 eta 1 day, 12:44:44
epoch [18/50] batch [800/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.3477 (1.2832) lr 1.7290e-03 eta 1 day, 12:44:09
epoch [18/50] batch [820/2000] time 2.026 (2.028) data 0.000 (0.001) loss 1.4671 (1.2834) lr 1.7290e-03 eta 1 day, 12:43:25
epoch [18/50] batch [840/2000] time 2.026 (2.028) data 0.000 (0.001) loss 1.3628 (1.2845) lr 1.7290e-03 eta 1 day, 12:42:47
epoch [18/50] batch [860/2000] time 2.052 (2.028) data 0.000 (0.001) loss 1.9799 (1.2842) lr 1.7290e-03 eta 1 day, 12:42:00
epoch [18/50] batch [880/2000] time 1.973 (2.028) data 0.000 (0.001) loss 0.3741 (1.2853) lr 1.7290e-03 eta 1 day, 12:41:22
epoch [18/50] batch [900/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.3331 (1.2898) lr 1.7290e-03 eta 1 day, 12:40:35
epoch [18/50] batch [920/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.5301 (1.2870) lr 1.7290e-03 eta 1 day, 12:39:57
epoch [18/50] batch [940/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.6265 (1.2866) lr 1.7290e-03 eta 1 day, 12:39:20
epoch [18/50] batch [960/2000] time 1.996 (2.028) data 0.000 (0.001) loss 0.6754 (1.2830) lr 1.7290e-03 eta 1 day, 12:38:34
epoch [18/50] batch [980/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.4369 (1.2892) lr 1.7290e-03 eta 1 day, 12:37:43
epoch [18/50] batch [1000/2000] time 2.026 (2.028) data 0.000 (0.001) loss 1.7327 (1.2861) lr 1.7290e-03 eta 1 day, 12:37:07
epoch [18/50] batch [1020/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.0555 (1.2797) lr 1.7290e-03 eta 1 day, 12:36:33
epoch [18/50] batch [1040/2000] time 1.995 (2.028) data 0.000 (0.001) loss 4.0074 (1.2782) lr 1.7290e-03 eta 1 day, 12:35:46
epoch [18/50] batch [1060/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.3111 (1.2765) lr 1.7290e-03 eta 1 day, 12:35:02
epoch [18/50] batch [1080/2000] time 2.027 (2.028) data 0.000 (0.001) loss 2.1685 (1.2793) lr 1.7290e-03 eta 1 day, 12:34:26
epoch [18/50] batch [1100/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.9644 (1.2773) lr 1.7290e-03 eta 1 day, 12:33:46
epoch [18/50] batch [1120/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.0176 (1.2743) lr 1.7290e-03 eta 1 day, 12:33:06
epoch [18/50] batch [1140/2000] time 1.997 (2.028) data 0.001 (0.001) loss 0.4409 (1.2712) lr 1.7290e-03 eta 1 day, 12:32:22
epoch [18/50] batch [1160/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.3505 (1.2681) lr 1.7290e-03 eta 1 day, 12:31:47
epoch [18/50] batch [1180/2000] time 1.999 (2.028) data 0.000 (0.001) loss 1.3006 (1.2742) lr 1.7290e-03 eta 1 day, 12:31:07
epoch [18/50] batch [1200/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.9224 (1.2734) lr 1.7290e-03 eta 1 day, 12:30:26
epoch [18/50] batch [1220/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.6318 (1.2680) lr 1.7290e-03 eta 1 day, 12:29:49
epoch [18/50] batch [1240/2000] time 1.998 (2.028) data 0.000 (0.001) loss 1.9257 (1.2672) lr 1.7290e-03 eta 1 day, 12:29:10
epoch [18/50] batch [1260/2000] time 2.030 (2.028) data 0.000 (0.001) loss 0.2403 (1.2684) lr 1.7290e-03 eta 1 day, 12:28:32
epoch [18/50] batch [1280/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.1351 (1.2720) lr 1.7290e-03 eta 1 day, 12:27:46
epoch [18/50] batch [1300/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.4727 (1.2704) lr 1.7290e-03 eta 1 day, 12:27:01
epoch [18/50] batch [1320/2000] time 1.999 (2.028) data 0.000 (0.001) loss 0.4871 (1.2688) lr 1.7290e-03 eta 1 day, 12:26:20
epoch [18/50] batch [1340/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.2583 (1.2673) lr 1.7290e-03 eta 1 day, 12:25:41
epoch [18/50] batch [1360/2000] time 2.001 (2.028) data 0.000 (0.001) loss 0.5926 (1.2675) lr 1.7290e-03 eta 1 day, 12:25:06
epoch [18/50] batch [1380/2000] time 2.055 (2.028) data 0.000 (0.001) loss 1.2705 (1.2656) lr 1.7290e-03 eta 1 day, 12:24:27
epoch [18/50] batch [1400/2000] time 2.028 (2.028) data 0.000 (0.001) loss 2.4848 (1.2661) lr 1.7290e-03 eta 1 day, 12:23:48
epoch [18/50] batch [1420/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.8623 (1.2653) lr 1.7290e-03 eta 1 day, 12:23:12
epoch [18/50] batch [1440/2000] time 2.029 (2.028) data 0.000 (0.001) loss 0.7657 (1.2627) lr 1.7290e-03 eta 1 day, 12:22:31
epoch [18/50] batch [1460/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.9477 (1.2625) lr 1.7290e-03 eta 1 day, 12:21:46
epoch [18/50] batch [1480/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.3726 (1.2603) lr 1.7290e-03 eta 1 day, 12:21:07
epoch [18/50] batch [1500/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.0834 (1.2593) lr 1.7290e-03 eta 1 day, 12:20:31
epoch [18/50] batch [1520/2000] time 2.030 (2.028) data 0.000 (0.001) loss 0.1580 (1.2529) lr 1.7290e-03 eta 1 day, 12:19:51
epoch [18/50] batch [1540/2000] time 2.029 (2.028) data 0.000 (0.001) loss 0.5129 (1.2562) lr 1.7290e-03 eta 1 day, 12:19:15
epoch [18/50] batch [1560/2000] time 1.997 (2.028) data 0.000 (0.001) loss 1.8351 (1.2588) lr 1.7290e-03 eta 1 day, 12:18:35
epoch [18/50] batch [1580/2000] time 2.050 (2.029) data 0.000 (0.001) loss 3.1948 (1.2566) lr 1.7290e-03 eta 1 day, 12:17:56
epoch [18/50] batch [1600/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.3017 (1.2593) lr 1.7290e-03 eta 1 day, 12:17:18
epoch [18/50] batch [1620/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.3560 (1.2565) lr 1.7290e-03 eta 1 day, 12:16:36
epoch [18/50] batch [1640/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.4644 (1.2524) lr 1.7290e-03 eta 1 day, 12:15:55
epoch [18/50] batch [1660/2000] time 2.028 (2.029) data 0.000 (0.001) loss 3.3782 (1.2517) lr 1.7290e-03 eta 1 day, 12:15:17
epoch [18/50] batch [1680/2000] time 1.976 (2.029) data 0.001 (0.001) loss 3.3815 (1.2560) lr 1.7290e-03 eta 1 day, 12:14:39
epoch [18/50] batch [1700/2000] time 1.995 (2.029) data 0.000 (0.001) loss 2.5740 (1.2575) lr 1.7290e-03 eta 1 day, 12:14:01
epoch [18/50] batch [1720/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.0553 (1.2573) lr 1.7290e-03 eta 1 day, 12:13:18
epoch [18/50] batch [1740/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.7444 (1.2606) lr 1.7290e-03 eta 1 day, 12:12:37
epoch [18/50] batch [1760/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.4423 (1.2603) lr 1.7290e-03 eta 1 day, 12:11:54
epoch [18/50] batch [1780/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.1765 (1.2597) lr 1.7290e-03 eta 1 day, 12:11:16
epoch [18/50] batch [1800/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.4167 (1.2570) lr 1.7290e-03 eta 1 day, 12:10:34
epoch [18/50] batch [1820/2000] time 2.030 (2.029) data 0.000 (0.000) loss 2.2035 (1.2531) lr 1.7290e-03 eta 1 day, 12:09:54
epoch [18/50] batch [1840/2000] time 2.051 (2.029) data 0.000 (0.000) loss 2.5075 (1.2556) lr 1.7290e-03 eta 1 day, 12:09:19
epoch [18/50] batch [1860/2000] time 2.027 (2.029) data 0.000 (0.000) loss 2.1574 (1.2580) lr 1.7290e-03 eta 1 day, 12:08:38
epoch [18/50] batch [1880/2000] time 1.994 (2.029) data 0.000 (0.000) loss 0.5941 (1.2569) lr 1.7290e-03 eta 1 day, 12:07:55
epoch [18/50] batch [1900/2000] time 2.049 (2.029) data 0.000 (0.000) loss 2.1664 (1.2550) lr 1.7290e-03 eta 1 day, 12:07:13
epoch [18/50] batch [1920/2000] time 1.999 (2.029) data 0.000 (0.000) loss 1.8208 (1.2597) lr 1.7290e-03 eta 1 day, 12:06:29
epoch [18/50] batch [1940/2000] time 2.055 (2.029) data 0.000 (0.000) loss 4.2934 (1.2610) lr 1.7290e-03 eta 1 day, 12:05:53
epoch [18/50] batch [1960/2000] time 2.049 (2.029) data 0.000 (0.000) loss 0.2869 (1.2590) lr 1.7290e-03 eta 1 day, 12:05:07
epoch [18/50] batch [1980/2000] time 2.027 (2.029) data 0.000 (0.000) loss 2.1559 (1.2589) lr 1.7290e-03 eta 1 day, 12:04:26
epoch [18/50] batch [2000/2000] time 1.996 (2.029) data 0.000 (0.000) loss 2.2368 (1.2578) lr 1.6845e-03 eta 1 day, 12:03:44
epoch [19/50] batch [20/2000] time 2.000 (2.057) data 0.000 (0.028) loss 1.1516 (1.3584) lr 1.6845e-03 eta 1 day, 12:33:56
epoch [19/50] batch [40/2000] time 2.055 (2.048) data 0.000 (0.014) loss 1.5983 (1.6327) lr 1.6845e-03 eta 1 day, 12:23:40
epoch [19/50] batch [60/2000] time 2.051 (2.043) data 0.001 (0.009) loss 0.5161 (1.3921) lr 1.6845e-03 eta 1 day, 12:17:35
epoch [19/50] batch [80/2000] time 2.050 (2.039) data 0.000 (0.007) loss 2.2238 (1.3846) lr 1.6845e-03 eta 1 day, 12:12:04
epoch [19/50] batch [100/2000] time 1.998 (2.035) data 0.000 (0.006) loss 1.4525 (1.3490) lr 1.6845e-03 eta 1 day, 12:07:37
epoch [19/50] batch [120/2000] time 2.029 (2.035) data 0.000 (0.005) loss 0.4049 (1.3022) lr 1.6845e-03 eta 1 day, 12:06:10
epoch [19/50] batch [140/2000] time 2.026 (2.033) data 0.000 (0.004) loss 2.9394 (1.3121) lr 1.6845e-03 eta 1 day, 12:04:02
epoch [19/50] batch [160/2000] time 1.996 (2.033) data 0.000 (0.004) loss 1.0348 (1.2731) lr 1.6845e-03 eta 1 day, 12:03:02
epoch [19/50] batch [180/2000] time 1.972 (2.032) data 0.000 (0.003) loss 0.5457 (1.3169) lr 1.6845e-03 eta 1 day, 12:01:19
epoch [19/50] batch [200/2000] time 1.996 (2.032) data 0.000 (0.003) loss 1.7597 (1.3081) lr 1.6845e-03 eta 1 day, 12:00:31
epoch [19/50] batch [220/2000] time 1.998 (2.031) data 0.000 (0.003) loss 1.4108 (1.3029) lr 1.6845e-03 eta 1 day, 11:59:24
epoch [19/50] batch [240/2000] time 2.050 (2.032) data 0.000 (0.002) loss 0.4264 (1.3029) lr 1.6845e-03 eta 1 day, 11:58:48
epoch [19/50] batch [260/2000] time 1.997 (2.031) data 0.000 (0.002) loss 1.0456 (1.2976) lr 1.6845e-03 eta 1 day, 11:57:13
epoch [19/50] batch [280/2000] time 2.031 (2.031) data 0.000 (0.002) loss 1.5224 (1.2817) lr 1.6845e-03 eta 1 day, 11:56:54
epoch [19/50] batch [300/2000] time 1.997 (2.031) data 0.000 (0.002) loss 1.2254 (1.2807) lr 1.6845e-03 eta 1 day, 11:56:27
epoch [19/50] batch [320/2000] time 2.000 (2.031) data 0.000 (0.002) loss 0.2833 (1.2894) lr 1.6845e-03 eta 1 day, 11:55:50
epoch [19/50] batch [340/2000] time 2.029 (2.031) data 0.000 (0.002) loss 0.0986 (1.2950) lr 1.6845e-03 eta 1 day, 11:55:09
epoch [19/50] batch [360/2000] time 2.049 (2.031) data 0.000 (0.002) loss 0.6140 (1.2941) lr 1.6845e-03 eta 1 day, 11:54:37
epoch [19/50] batch [380/2000] time 2.049 (2.031) data 0.000 (0.002) loss 1.7163 (1.2859) lr 1.6845e-03 eta 1 day, 11:53:46
epoch [19/50] batch [400/2000] time 2.054 (2.031) data 0.000 (0.002) loss 0.4140 (1.2867) lr 1.6845e-03 eta 1 day, 11:52:54
epoch [19/50] batch [420/2000] time 2.055 (2.031) data 0.000 (0.002) loss 2.1478 (1.2900) lr 1.6845e-03 eta 1 day, 11:52:14
epoch [19/50] batch [440/2000] time 2.032 (2.031) data 0.000 (0.001) loss 0.5337 (1.2918) lr 1.6845e-03 eta 1 day, 11:51:30
epoch [19/50] batch [460/2000] time 2.060 (2.031) data 0.000 (0.001) loss 1.2571 (1.2810) lr 1.6845e-03 eta 1 day, 11:51:19
epoch [19/50] batch [480/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.6845 (1.2819) lr 1.6845e-03 eta 1 day, 11:50:30
epoch [19/50] batch [500/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.1432 (1.2940) lr 1.6845e-03 eta 1 day, 11:49:38
epoch [19/50] batch [520/2000] time 2.030 (2.031) data 0.000 (0.001) loss 0.3780 (1.2891) lr 1.6845e-03 eta 1 day, 11:48:31
epoch [19/50] batch [540/2000] time 2.059 (2.031) data 0.000 (0.001) loss 0.9103 (1.2767) lr 1.6845e-03 eta 1 day, 11:47:46
epoch [19/50] batch [560/2000] time 2.028 (2.031) data 0.000 (0.001) loss 2.1065 (1.2816) lr 1.6845e-03 eta 1 day, 11:47:07
epoch [19/50] batch [580/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.5681 (1.2814) lr 1.6845e-03 eta 1 day, 11:46:07
epoch [19/50] batch [600/2000] time 2.026 (2.030) data 0.001 (0.001) loss 0.3310 (1.2857) lr 1.6845e-03 eta 1 day, 11:45:29
epoch [19/50] batch [620/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.4321 (1.2738) lr 1.6845e-03 eta 1 day, 11:44:43
epoch [19/50] batch [640/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.3580 (1.2646) lr 1.6845e-03 eta 1 day, 11:43:53
epoch [19/50] batch [660/2000] time 2.029 (2.030) data 0.000 (0.001) loss 2.9915 (1.2666) lr 1.6845e-03 eta 1 day, 11:43:19
epoch [19/50] batch [680/2000] time 1.994 (2.030) data 0.000 (0.001) loss 1.9349 (1.2667) lr 1.6845e-03 eta 1 day, 11:42:27
epoch [19/50] batch [700/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.1903 (1.2699) lr 1.6845e-03 eta 1 day, 11:41:47
epoch [19/50] batch [720/2000] time 2.048 (2.030) data 0.000 (0.001) loss 0.8745 (1.2649) lr 1.6845e-03 eta 1 day, 11:40:59
epoch [19/50] batch [740/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.1953 (1.2717) lr 1.6845e-03 eta 1 day, 11:40:28
epoch [19/50] batch [760/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.4029 (1.2659) lr 1.6845e-03 eta 1 day, 11:39:38
epoch [19/50] batch [780/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.2707 (1.2676) lr 1.6845e-03 eta 1 day, 11:38:57
epoch [19/50] batch [800/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.2086 (1.2680) lr 1.6845e-03 eta 1 day, 11:38:20
epoch [19/50] batch [820/2000] time 1.995 (2.030) data 0.000 (0.001) loss 0.2452 (1.2647) lr 1.6845e-03 eta 1 day, 11:37:26
epoch [19/50] batch [840/2000] time 2.051 (2.030) data 0.000 (0.001) loss 2.9025 (1.2609) lr 1.6845e-03 eta 1 day, 11:36:49
epoch [19/50] batch [860/2000] time 2.052 (2.030) data 0.000 (0.001) loss 2.2117 (1.2614) lr 1.6845e-03 eta 1 day, 11:36:08
epoch [19/50] batch [880/2000] time 2.001 (2.030) data 0.000 (0.001) loss 0.0781 (1.2610) lr 1.6845e-03 eta 1 day, 11:35:30
epoch [19/50] batch [900/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.1464 (1.2632) lr 1.6845e-03 eta 1 day, 11:34:52
epoch [19/50] batch [920/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.3791 (1.2624) lr 1.6845e-03 eta 1 day, 11:34:15
epoch [19/50] batch [940/2000] time 2.052 (2.030) data 0.000 (0.001) loss 3.4980 (1.2626) lr 1.6845e-03 eta 1 day, 11:33:35
epoch [19/50] batch [960/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.3040 (1.2596) lr 1.6845e-03 eta 1 day, 11:33:00
epoch [19/50] batch [980/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.8147 (1.2567) lr 1.6845e-03 eta 1 day, 11:32:28
epoch [19/50] batch [1000/2000] time 2.056 (2.030) data 0.000 (0.001) loss 2.3176 (1.2519) lr 1.6845e-03 eta 1 day, 11:31:40
epoch [19/50] batch [1020/2000] time 1.996 (2.030) data 0.000 (0.001) loss 2.3861 (1.2538) lr 1.6845e-03 eta 1 day, 11:30:54
epoch [19/50] batch [1040/2000] time 2.051 (2.030) data 0.000 (0.001) loss 2.5340 (1.2601) lr 1.6845e-03 eta 1 day, 11:30:14
epoch [19/50] batch [1060/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.5048 (1.2618) lr 1.6845e-03 eta 1 day, 11:29:28
epoch [19/50] batch [1080/2000] time 2.057 (2.030) data 0.000 (0.001) loss 0.3375 (1.2624) lr 1.6845e-03 eta 1 day, 11:28:49
epoch [19/50] batch [1100/2000] time 2.059 (2.030) data 0.000 (0.001) loss 2.0729 (1.2643) lr 1.6845e-03 eta 1 day, 11:28:22
epoch [19/50] batch [1120/2000] time 2.032 (2.030) data 0.000 (0.001) loss 2.3060 (1.2628) lr 1.6845e-03 eta 1 day, 11:27:49
epoch [19/50] batch [1140/2000] time 2.059 (2.030) data 0.001 (0.001) loss 2.3143 (1.2664) lr 1.6845e-03 eta 1 day, 11:27:10
epoch [19/50] batch [1160/2000] time 2.054 (2.031) data 0.000 (0.001) loss 0.5531 (1.2643) lr 1.6845e-03 eta 1 day, 11:26:37
epoch [19/50] batch [1180/2000] time 2.026 (2.031) data 0.000 (0.001) loss 2.2540 (1.2664) lr 1.6845e-03 eta 1 day, 11:26:00
epoch [19/50] batch [1200/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.1031 (1.2686) lr 1.6845e-03 eta 1 day, 11:25:19
epoch [19/50] batch [1220/2000] time 2.026 (2.031) data 0.000 (0.001) loss 1.7901 (1.2745) lr 1.6845e-03 eta 1 day, 11:24:38
epoch [19/50] batch [1240/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.9022 (1.2748) lr 1.6845e-03 eta 1 day, 11:23:50
epoch [19/50] batch [1260/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.7958 (1.2732) lr 1.6845e-03 eta 1 day, 11:23:12
epoch [19/50] batch [1280/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.0662 (1.2736) lr 1.6845e-03 eta 1 day, 11:22:22
epoch [19/50] batch [1300/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.3992 (1.2694) lr 1.6845e-03 eta 1 day, 11:21:38
epoch [19/50] batch [1320/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.2449 (1.2662) lr 1.6845e-03 eta 1 day, 11:20:54
epoch [19/50] batch [1340/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.6037 (1.2635) lr 1.6845e-03 eta 1 day, 11:20:12
epoch [19/50] batch [1360/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.4696 (1.2638) lr 1.6845e-03 eta 1 day, 11:19:38
epoch [19/50] batch [1380/2000] time 2.052 (2.030) data 0.000 (0.001) loss 2.4169 (1.2672) lr 1.6845e-03 eta 1 day, 11:18:59
epoch [19/50] batch [1400/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.2007 (1.2627) lr 1.6845e-03 eta 1 day, 11:18:21
epoch [19/50] batch [1420/2000] time 1.995 (2.030) data 0.000 (0.001) loss 0.4776 (1.2579) lr 1.6845e-03 eta 1 day, 11:17:41
epoch [19/50] batch [1440/2000] time 2.024 (2.030) data 0.000 (0.001) loss 0.4336 (1.2577) lr 1.6845e-03 eta 1 day, 11:16:54
epoch [19/50] batch [1460/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.0500 (1.2591) lr 1.6845e-03 eta 1 day, 11:16:10
epoch [19/50] batch [1480/2000] time 1.973 (2.030) data 0.000 (0.001) loss 0.9089 (1.2610) lr 1.6845e-03 eta 1 day, 11:15:27
epoch [19/50] batch [1500/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.7347 (1.2606) lr 1.6845e-03 eta 1 day, 11:14:45
epoch [19/50] batch [1520/2000] time 2.026 (2.030) data 0.000 (0.001) loss 1.5187 (1.2640) lr 1.6845e-03 eta 1 day, 11:14:00
epoch [19/50] batch [1540/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.6161 (1.2622) lr 1.6845e-03 eta 1 day, 11:13:21
epoch [19/50] batch [1560/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.5987 (1.2659) lr 1.6845e-03 eta 1 day, 11:12:41
epoch [19/50] batch [1580/2000] time 2.028 (2.030) data 0.000 (0.001) loss 2.8750 (1.2621) lr 1.6845e-03 eta 1 day, 11:12:02
epoch [19/50] batch [1600/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.7060 (1.2639) lr 1.6845e-03 eta 1 day, 11:11:22
epoch [19/50] batch [1620/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.3989 (1.2622) lr 1.6845e-03 eta 1 day, 11:10:44
epoch [19/50] batch [1640/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.2409 (1.2643) lr 1.6845e-03 eta 1 day, 11:10:08
epoch [19/50] batch [1660/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.7392 (1.2667) lr 1.6845e-03 eta 1 day, 11:09:29
epoch [19/50] batch [1680/2000] time 2.050 (2.030) data 0.001 (0.001) loss 1.2703 (1.2677) lr 1.6845e-03 eta 1 day, 11:08:44
epoch [19/50] batch [1700/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.9818 (1.2695) lr 1.6845e-03 eta 1 day, 11:08:01
epoch [19/50] batch [1720/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.3002 (1.2694) lr 1.6845e-03 eta 1 day, 11:07:21
epoch [19/50] batch [1740/2000] time 2.058 (2.030) data 0.000 (0.001) loss 2.9394 (1.2713) lr 1.6845e-03 eta 1 day, 11:06:47
epoch [19/50] batch [1760/2000] time 2.034 (2.030) data 0.000 (0.001) loss 2.4256 (1.2718) lr 1.6845e-03 eta 1 day, 11:05:59
epoch [19/50] batch [1780/2000] time 2.056 (2.030) data 0.000 (0.001) loss 2.3404 (1.2724) lr 1.6845e-03 eta 1 day, 11:05:22
epoch [19/50] batch [1800/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.3925 (1.2707) lr 1.6845e-03 eta 1 day, 11:04:41
epoch [19/50] batch [1820/2000] time 1.972 (2.030) data 0.000 (0.001) loss 2.0536 (1.2694) lr 1.6845e-03 eta 1 day, 11:03:54
epoch [19/50] batch [1840/2000] time 1.997 (2.030) data 0.000 (0.000) loss 1.4183 (1.2675) lr 1.6845e-03 eta 1 day, 11:03:11
epoch [19/50] batch [1860/2000] time 1.973 (2.030) data 0.000 (0.000) loss 0.8617 (1.2629) lr 1.6845e-03 eta 1 day, 11:02:28
epoch [19/50] batch [1880/2000] time 2.049 (2.030) data 0.000 (0.000) loss 1.1773 (1.2633) lr 1.6845e-03 eta 1 day, 11:01:43
epoch [19/50] batch [1900/2000] time 2.026 (2.030) data 0.000 (0.000) loss 1.8401 (1.2638) lr 1.6845e-03 eta 1 day, 11:00:58
epoch [19/50] batch [1920/2000] time 2.053 (2.030) data 0.000 (0.000) loss 2.5931 (1.2635) lr 1.6845e-03 eta 1 day, 11:00:22
epoch [19/50] batch [1940/2000] time 2.057 (2.030) data 0.000 (0.000) loss 0.3554 (1.2633) lr 1.6845e-03 eta 1 day, 10:59:44
epoch [19/50] batch [1960/2000] time 2.053 (2.030) data 0.000 (0.000) loss 2.2340 (1.2677) lr 1.6845e-03 eta 1 day, 10:59:08
epoch [19/50] batch [1980/2000] time 2.028 (2.030) data 0.000 (0.000) loss 0.3760 (1.2675) lr 1.6845e-03 eta 1 day, 10:58:26
epoch [19/50] batch [2000/2000] time 2.024 (2.030) data 0.000 (0.000) loss 0.2804 (1.2665) lr 1.6374e-03 eta 1 day, 10:57:42
epoch [20/50] batch [20/2000] time 2.053 (2.062) data 0.000 (0.027) loss 0.3929 (0.9999) lr 1.6374e-03 eta 1 day, 11:29:46
epoch [20/50] batch [40/2000] time 2.054 (2.045) data 0.000 (0.014) loss 1.3309 (1.1612) lr 1.6374e-03 eta 1 day, 11:11:34
epoch [20/50] batch [60/2000] time 2.029 (2.038) data 0.001 (0.009) loss 1.2667 (1.2849) lr 1.6374e-03 eta 1 day, 11:03:39
epoch [20/50] batch [80/2000] time 2.050 (2.037) data 0.000 (0.007) loss 1.0272 (1.3596) lr 1.6374e-03 eta 1 day, 11:02:07
epoch [20/50] batch [100/2000] time 2.027 (2.037) data 0.000 (0.006) loss 0.8591 (1.3830) lr 1.6374e-03 eta 1 day, 11:01:08
epoch [20/50] batch [120/2000] time 1.999 (2.036) data 0.000 (0.005) loss 1.2098 (1.4045) lr 1.6374e-03 eta 1 day, 10:59:21
epoch [20/50] batch [140/2000] time 2.050 (2.034) data 0.000 (0.004) loss 0.5093 (1.3773) lr 1.6374e-03 eta 1 day, 10:57:23
epoch [20/50] batch [160/2000] time 1.999 (2.034) data 0.000 (0.004) loss 0.0893 (1.4003) lr 1.6374e-03 eta 1 day, 10:55:53
epoch [20/50] batch [180/2000] time 2.055 (2.034) data 0.000 (0.003) loss 2.5412 (1.3683) lr 1.6374e-03 eta 1 day, 10:55:49
epoch [20/50] batch [200/2000] time 1.999 (2.034) data 0.000 (0.003) loss 2.2132 (1.3882) lr 1.6374e-03 eta 1 day, 10:54:54
epoch [20/50] batch [220/2000] time 2.052 (2.033) data 0.000 (0.003) loss 0.0829 (1.3542) lr 1.6374e-03 eta 1 day, 10:53:48
epoch [20/50] batch [240/2000] time 2.026 (2.033) data 0.000 (0.002) loss 0.4635 (1.3479) lr 1.6374e-03 eta 1 day, 10:52:52
epoch [20/50] batch [260/2000] time 2.001 (2.033) data 0.000 (0.002) loss 0.5929 (1.3505) lr 1.6374e-03 eta 1 day, 10:51:34
epoch [20/50] batch [280/2000] time 2.028 (2.033) data 0.000 (0.002) loss 0.9339 (1.3363) lr 1.6374e-03 eta 1 day, 10:50:58
epoch [20/50] batch [300/2000] time 2.030 (2.032) data 0.000 (0.002) loss 0.3850 (1.3195) lr 1.6374e-03 eta 1 day, 10:50:01
epoch [20/50] batch [320/2000] time 2.054 (2.032) data 0.000 (0.002) loss 2.9918 (1.3194) lr 1.6374e-03 eta 1 day, 10:49:06
epoch [20/50] batch [340/2000] time 2.029 (2.032) data 0.000 (0.002) loss 0.8090 (1.3220) lr 1.6374e-03 eta 1 day, 10:48:35
epoch [20/50] batch [360/2000] time 2.002 (2.032) data 0.000 (0.002) loss 4.1274 (1.3381) lr 1.6374e-03 eta 1 day, 10:47:58
epoch [20/50] batch [380/2000] time 2.051 (2.032) data 0.000 (0.002) loss 1.9752 (1.3482) lr 1.6374e-03 eta 1 day, 10:47:16
epoch [20/50] batch [400/2000] time 1.998 (2.032) data 0.000 (0.002) loss 0.8472 (1.3366) lr 1.6374e-03 eta 1 day, 10:46:09
epoch [20/50] batch [420/2000] time 2.032 (2.032) data 0.000 (0.001) loss 0.2583 (1.3216) lr 1.6374e-03 eta 1 day, 10:45:40
epoch [20/50] batch [440/2000] time 2.054 (2.032) data 0.000 (0.001) loss 0.3307 (1.3250) lr 1.6374e-03 eta 1 day, 10:45:10
epoch [20/50] batch [460/2000] time 2.054 (2.032) data 0.000 (0.001) loss 1.8186 (1.3243) lr 1.6374e-03 eta 1 day, 10:44:30
epoch [20/50] batch [480/2000] time 2.057 (2.033) data 0.000 (0.001) loss 2.4019 (1.3170) lr 1.6374e-03 eta 1 day, 10:44:04
epoch [20/50] batch [500/2000] time 2.000 (2.033) data 0.000 (0.001) loss 1.3100 (1.3205) lr 1.6374e-03 eta 1 day, 10:43:21
epoch [20/50] batch [520/2000] time 2.058 (2.033) data 0.000 (0.001) loss 0.8223 (1.3155) lr 1.6374e-03 eta 1 day, 10:42:40
epoch [20/50] batch [540/2000] time 2.000 (2.032) data 0.000 (0.001) loss 1.4651 (1.3170) lr 1.6374e-03 eta 1 day, 10:41:51
epoch [20/50] batch [560/2000] time 2.049 (2.032) data 0.000 (0.001) loss 1.1191 (1.3093) lr 1.6374e-03 eta 1 day, 10:41:04
epoch [20/50] batch [580/2000] time 2.051 (2.032) data 0.000 (0.001) loss 1.5583 (1.2995) lr 1.6374e-03 eta 1 day, 10:40:20
epoch [20/50] batch [600/2000] time 1.997 (2.032) data 0.001 (0.001) loss 0.1398 (1.3013) lr 1.6374e-03 eta 1 day, 10:39:41
epoch [20/50] batch [620/2000] time 2.052 (2.032) data 0.000 (0.001) loss 0.8914 (1.3043) lr 1.6374e-03 eta 1 day, 10:38:48
epoch [20/50] batch [640/2000] time 2.029 (2.032) data 0.000 (0.001) loss 0.7488 (1.3024) lr 1.6374e-03 eta 1 day, 10:37:54
epoch [20/50] batch [660/2000] time 2.001 (2.031) data 0.000 (0.001) loss 1.2543 (1.3037) lr 1.6374e-03 eta 1 day, 10:36:49
epoch [20/50] batch [680/2000] time 2.055 (2.032) data 0.000 (0.001) loss 2.2334 (1.2940) lr 1.6374e-03 eta 1 day, 10:36:22
epoch [20/50] batch [700/2000] time 2.029 (2.032) data 0.000 (0.001) loss 4.0954 (1.2952) lr 1.6374e-03 eta 1 day, 10:35:37
epoch [20/50] batch [720/2000] time 2.000 (2.031) data 0.000 (0.001) loss 1.3172 (1.2967) lr 1.6374e-03 eta 1 day, 10:34:45
epoch [20/50] batch [740/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.8497 (1.2963) lr 1.6374e-03 eta 1 day, 10:33:56
epoch [20/50] batch [760/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.7254 (1.2943) lr 1.6374e-03 eta 1 day, 10:33:07
epoch [20/50] batch [780/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.6442 (1.2963) lr 1.6374e-03 eta 1 day, 10:32:16
epoch [20/50] batch [800/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.5978 (1.2980) lr 1.6374e-03 eta 1 day, 10:31:40
epoch [20/50] batch [820/2000] time 2.000 (2.031) data 0.000 (0.001) loss 2.0192 (1.3028) lr 1.6374e-03 eta 1 day, 10:30:45
epoch [20/50] batch [840/2000] time 2.054 (2.031) data 0.000 (0.001) loss 0.2872 (1.3010) lr 1.6374e-03 eta 1 day, 10:30:06
epoch [20/50] batch [860/2000] time 2.055 (2.031) data 0.000 (0.001) loss 1.9904 (1.3062) lr 1.6374e-03 eta 1 day, 10:29:21
epoch [20/50] batch [880/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.0494 (1.3016) lr 1.6374e-03 eta 1 day, 10:28:33
epoch [20/50] batch [900/2000] time 2.030 (2.031) data 0.000 (0.001) loss 0.9476 (1.2927) lr 1.6374e-03 eta 1 day, 10:27:47
epoch [20/50] batch [920/2000] time 2.030 (2.031) data 0.000 (0.001) loss 0.4776 (1.2891) lr 1.6374e-03 eta 1 day, 10:27:09
epoch [20/50] batch [940/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.9889 (1.2866) lr 1.6374e-03 eta 1 day, 10:26:26
epoch [20/50] batch [960/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.4937 (1.2823) lr 1.6374e-03 eta 1 day, 10:25:47
epoch [20/50] batch [980/2000] time 2.032 (2.031) data 0.000 (0.001) loss 0.3504 (1.2755) lr 1.6374e-03 eta 1 day, 10:25:01
epoch [20/50] batch [1000/2000] time 2.026 (2.030) data 0.000 (0.001) loss 3.9835 (1.2855) lr 1.6374e-03 eta 1 day, 10:24:14
epoch [20/50] batch [1020/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.0563 (1.2824) lr 1.6374e-03 eta 1 day, 10:23:27
epoch [20/50] batch [1040/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.7129 (1.2798) lr 1.6374e-03 eta 1 day, 10:22:40
epoch [20/50] batch [1060/2000] time 2.048 (2.030) data 0.000 (0.001) loss 0.6546 (1.2749) lr 1.6374e-03 eta 1 day, 10:21:57
epoch [20/50] batch [1080/2000] time 2.046 (2.030) data 0.000 (0.001) loss 1.6542 (1.2730) lr 1.6374e-03 eta 1 day, 10:21:13
epoch [20/50] batch [1100/2000] time 1.994 (2.030) data 0.000 (0.001) loss 0.1236 (1.2767) lr 1.6374e-03 eta 1 day, 10:20:27
epoch [20/50] batch [1120/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.6599 (1.2746) lr 1.6374e-03 eta 1 day, 10:19:48
epoch [20/50] batch [1140/2000] time 2.055 (2.030) data 0.001 (0.001) loss 1.7180 (1.2702) lr 1.6374e-03 eta 1 day, 10:19:06
epoch [20/50] batch [1160/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.9894 (1.2769) lr 1.6374e-03 eta 1 day, 10:18:33
epoch [20/50] batch [1180/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.9619 (1.2727) lr 1.6374e-03 eta 1 day, 10:17:41
epoch [20/50] batch [1200/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.5076 (1.2692) lr 1.6374e-03 eta 1 day, 10:17:07
epoch [20/50] batch [1220/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.3875 (1.2649) lr 1.6374e-03 eta 1 day, 10:16:23
epoch [20/50] batch [1240/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.9654 (1.2691) lr 1.6374e-03 eta 1 day, 10:15:46
epoch [20/50] batch [1260/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.6553 (1.2622) lr 1.6374e-03 eta 1 day, 10:15:01
epoch [20/50] batch [1280/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.4073 (1.2611) lr 1.6374e-03 eta 1 day, 10:14:24
epoch [20/50] batch [1300/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.8419 (1.2566) lr 1.6374e-03 eta 1 day, 10:13:50
epoch [20/50] batch [1320/2000] time 2.050 (2.030) data 0.000 (0.001) loss 2.8437 (1.2595) lr 1.6374e-03 eta 1 day, 10:13:06
epoch [20/50] batch [1340/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.0933 (1.2586) lr 1.6374e-03 eta 1 day, 10:12:24
epoch [20/50] batch [1360/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.5246 (1.2542) lr 1.6374e-03 eta 1 day, 10:11:45
epoch [20/50] batch [1380/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.6726 (1.2548) lr 1.6374e-03 eta 1 day, 10:11:00
epoch [20/50] batch [1400/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.3532 (1.2564) lr 1.6374e-03 eta 1 day, 10:10:19
epoch [20/50] batch [1420/2000] time 2.056 (2.030) data 0.000 (0.001) loss 1.3304 (1.2597) lr 1.6374e-03 eta 1 day, 10:09:39
epoch [20/50] batch [1440/2000] time 2.004 (2.030) data 0.000 (0.001) loss 0.9688 (1.2586) lr 1.6374e-03 eta 1 day, 10:08:58
epoch [20/50] batch [1460/2000] time 2.002 (2.030) data 0.000 (0.001) loss 1.9804 (1.2585) lr 1.6374e-03 eta 1 day, 10:08:18
epoch [20/50] batch [1480/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.4401 (1.2588) lr 1.6374e-03 eta 1 day, 10:07:43
epoch [20/50] batch [1500/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.3826 (1.2558) lr 1.6374e-03 eta 1 day, 10:07:02
epoch [20/50] batch [1520/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.0664 (1.2543) lr 1.6374e-03 eta 1 day, 10:06:27
epoch [20/50] batch [1540/2000] time 1.999 (2.030) data 0.000 (0.001) loss 2.6527 (1.2588) lr 1.6374e-03 eta 1 day, 10:05:44
epoch [20/50] batch [1560/2000] time 2.052 (2.030) data 0.000 (0.001) loss 3.0645 (1.2611) lr 1.6374e-03 eta 1 day, 10:05:06
epoch [20/50] batch [1580/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.2200 (1.2556) lr 1.6374e-03 eta 1 day, 10:04:28
epoch [20/50] batch [1600/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.0036 (1.2517) lr 1.6374e-03 eta 1 day, 10:03:46
epoch [20/50] batch [1620/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.3443 (1.2520) lr 1.6374e-03 eta 1 day, 10:03:02
epoch [20/50] batch [1640/2000] time 2.032 (2.030) data 0.000 (0.001) loss 0.7064 (1.2491) lr 1.6374e-03 eta 1 day, 10:02:19
epoch [20/50] batch [1660/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.7500 (1.2520) lr 1.6374e-03 eta 1 day, 10:01:42
epoch [20/50] batch [1680/2000] time 2.054 (2.030) data 0.001 (0.001) loss 1.7620 (1.2503) lr 1.6374e-03 eta 1 day, 10:01:00
epoch [20/50] batch [1700/2000] time 2.056 (2.030) data 0.000 (0.001) loss 1.6785 (1.2502) lr 1.6374e-03 eta 1 day, 10:00:24
epoch [20/50] batch [1720/2000] time 2.034 (2.030) data 0.000 (0.001) loss 1.2518 (1.2510) lr 1.6374e-03 eta 1 day, 9:59:47
epoch [20/50] batch [1740/2000] time 2.058 (2.030) data 0.000 (0.001) loss 0.6764 (1.2521) lr 1.6374e-03 eta 1 day, 9:59:12
epoch [20/50] batch [1760/2000] time 2.052 (2.030) data 0.000 (0.001) loss 2.3476 (1.2513) lr 1.6374e-03 eta 1 day, 9:58:32
epoch [20/50] batch [1780/2000] time 2.004 (2.030) data 0.000 (0.001) loss 1.3915 (1.2503) lr 1.6374e-03 eta 1 day, 9:57:53
epoch [20/50] batch [1800/2000] time 2.054 (2.030) data 0.000 (0.001) loss 2.0378 (1.2497) lr 1.6374e-03 eta 1 day, 9:57:12
epoch [20/50] batch [1820/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.3597 (1.2534) lr 1.6374e-03 eta 1 day, 9:56:29
epoch [20/50] batch [1840/2000] time 2.029 (2.030) data 0.000 (0.000) loss 1.6338 (1.2522) lr 1.6374e-03 eta 1 day, 9:55:49
epoch [20/50] batch [1860/2000] time 2.027 (2.030) data 0.000 (0.000) loss 0.2677 (1.2560) lr 1.6374e-03 eta 1 day, 9:55:03
epoch [20/50] batch [1880/2000] time 2.050 (2.030) data 0.000 (0.000) loss 1.1427 (1.2565) lr 1.6374e-03 eta 1 day, 9:54:21
epoch [20/50] batch [1900/2000] time 2.024 (2.030) data 0.000 (0.000) loss 0.7431 (1.2593) lr 1.6374e-03 eta 1 day, 9:53:41
epoch [20/50] batch [1920/2000] time 1.994 (2.030) data 0.000 (0.000) loss 1.0041 (1.2579) lr 1.6374e-03 eta 1 day, 9:52:52
epoch [20/50] batch [1940/2000] time 2.050 (2.030) data 0.000 (0.000) loss 2.9682 (1.2563) lr 1.6374e-03 eta 1 day, 9:52:11
epoch [20/50] batch [1960/2000] time 2.032 (2.030) data 0.000 (0.000) loss 1.4289 (1.2553) lr 1.6374e-03 eta 1 day, 9:51:29
epoch [20/50] batch [1980/2000] time 2.049 (2.030) data 0.000 (0.000) loss 1.3229 (1.2586) lr 1.6374e-03 eta 1 day, 9:50:50
epoch [20/50] batch [2000/2000] time 2.029 (2.030) data 0.000 (0.000) loss 0.2695 (1.2558) lr 1.5878e-03 eta 1 day, 9:50:07
epoch [21/50] batch [20/2000] time 1.997 (2.047) data 0.000 (0.027) loss 1.3580 (1.2656) lr 1.5878e-03 eta 1 day, 10:06:21
epoch [21/50] batch [40/2000] time 2.052 (2.035) data 0.000 (0.014) loss 0.6224 (1.2000) lr 1.5878e-03 eta 1 day, 9:53:18
epoch [21/50] batch [60/2000] time 2.027 (2.033) data 0.001 (0.009) loss 0.7869 (1.2047) lr 1.5878e-03 eta 1 day, 9:50:47
epoch [21/50] batch [80/2000] time 1.997 (2.031) data 0.000 (0.007) loss 0.2764 (1.2597) lr 1.5878e-03 eta 1 day, 9:48:14
epoch [21/50] batch [100/2000] time 2.027 (2.032) data 0.000 (0.006) loss 0.0665 (1.2013) lr 1.5878e-03 eta 1 day, 9:48:55
epoch [21/50] batch [120/2000] time 2.053 (2.031) data 0.000 (0.005) loss 0.2502 (1.1894) lr 1.5878e-03 eta 1 day, 9:46:48
epoch [21/50] batch [140/2000] time 2.028 (2.031) data 0.000 (0.004) loss 0.9652 (1.2042) lr 1.5878e-03 eta 1 day, 9:46:13
epoch [21/50] batch [160/2000] time 2.051 (2.031) data 0.000 (0.004) loss 1.4955 (1.1928) lr 1.5878e-03 eta 1 day, 9:45:25
epoch [21/50] batch [180/2000] time 2.027 (2.031) data 0.000 (0.003) loss 2.0191 (1.1807) lr 1.5878e-03 eta 1 day, 9:44:41
epoch [21/50] batch [200/2000] time 2.030 (2.030) data 0.000 (0.003) loss 2.6986 (1.1785) lr 1.5878e-03 eta 1 day, 9:43:40
epoch [21/50] batch [220/2000] time 2.036 (2.031) data 0.000 (0.003) loss 0.4625 (1.1841) lr 1.5878e-03 eta 1 day, 9:43:06
epoch [21/50] batch [240/2000] time 2.057 (2.031) data 0.000 (0.002) loss 1.8333 (1.1820) lr 1.5878e-03 eta 1 day, 9:43:02
epoch [21/50] batch [260/2000] time 2.054 (2.031) data 0.000 (0.002) loss 1.2916 (1.1724) lr 1.5878e-03 eta 1 day, 9:42:26
epoch [21/50] batch [280/2000] time 2.027 (2.031) data 0.000 (0.002) loss 1.1746 (1.1796) lr 1.5878e-03 eta 1 day, 9:41:12
epoch [21/50] batch [300/2000] time 2.051 (2.030) data 0.000 (0.002) loss 1.0753 (1.1861) lr 1.5878e-03 eta 1 day, 9:39:52
epoch [21/50] batch [320/2000] time 1.999 (2.030) data 0.000 (0.002) loss 1.8146 (1.1817) lr 1.5878e-03 eta 1 day, 9:39:12
epoch [21/50] batch [340/2000] time 2.050 (2.030) data 0.000 (0.002) loss 1.0778 (1.1836) lr 1.5878e-03 eta 1 day, 9:38:31
epoch [21/50] batch [360/2000] time 2.054 (2.030) data 0.000 (0.002) loss 0.3538 (1.1790) lr 1.5878e-03 eta 1 day, 9:37:44
epoch [21/50] batch [380/2000] time 2.028 (2.030) data 0.000 (0.002) loss 2.5778 (1.1969) lr 1.5878e-03 eta 1 day, 9:37:29
epoch [21/50] batch [400/2000] time 2.051 (2.030) data 0.000 (0.002) loss 0.2977 (1.2054) lr 1.5878e-03 eta 1 day, 9:36:51
epoch [21/50] batch [420/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.8355 (1.1976) lr 1.5878e-03 eta 1 day, 9:35:54
epoch [21/50] batch [440/2000] time 2.029 (2.030) data 0.000 (0.001) loss 2.4314 (1.1935) lr 1.5878e-03 eta 1 day, 9:35:13
epoch [21/50] batch [460/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.4945 (1.1875) lr 1.5878e-03 eta 1 day, 9:34:20
epoch [21/50] batch [480/2000] time 2.053 (2.030) data 0.001 (0.001) loss 2.1957 (1.2029) lr 1.5878e-03 eta 1 day, 9:33:46
epoch [21/50] batch [500/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.8713 (1.2186) lr 1.5878e-03 eta 1 day, 9:32:56
epoch [21/50] batch [520/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.6954 (1.2251) lr 1.5878e-03 eta 1 day, 9:31:50
epoch [21/50] batch [540/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.3517 (1.2302) lr 1.5878e-03 eta 1 day, 9:31:04
epoch [21/50] batch [560/2000] time 2.058 (2.030) data 0.000 (0.001) loss 0.0728 (1.2333) lr 1.5878e-03 eta 1 day, 9:30:34
epoch [21/50] batch [580/2000] time 2.060 (2.030) data 0.000 (0.001) loss 2.5466 (1.2357) lr 1.5878e-03 eta 1 day, 9:30:13
epoch [21/50] batch [600/2000] time 2.061 (2.030) data 0.001 (0.001) loss 1.0274 (1.2383) lr 1.5878e-03 eta 1 day, 9:29:42
epoch [21/50] batch [620/2000] time 2.052 (2.030) data 0.000 (0.001) loss 3.1934 (1.2512) lr 1.5878e-03 eta 1 day, 9:29:06
epoch [21/50] batch [640/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.1522 (1.2418) lr 1.5878e-03 eta 1 day, 9:28:13
epoch [21/50] batch [660/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.5957 (1.2462) lr 1.5878e-03 eta 1 day, 9:27:20
epoch [21/50] batch [680/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.3983 (1.2348) lr 1.5878e-03 eta 1 day, 9:26:52
epoch [21/50] batch [700/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.1010 (1.2350) lr 1.5878e-03 eta 1 day, 9:26:15
epoch [21/50] batch [720/2000] time 1.974 (2.030) data 0.000 (0.001) loss 0.1045 (1.2264) lr 1.5878e-03 eta 1 day, 9:25:32
epoch [21/50] batch [740/2000] time 2.053 (2.030) data 0.000 (0.001) loss 2.8333 (1.2333) lr 1.5878e-03 eta 1 day, 9:25:01
epoch [21/50] batch [760/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.8188 (1.2276) lr 1.5878e-03 eta 1 day, 9:24:00
epoch [21/50] batch [780/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.6012 (1.2334) lr 1.5878e-03 eta 1 day, 9:23:13
epoch [21/50] batch [800/2000] time 2.025 (2.030) data 0.000 (0.001) loss 0.8143 (1.2404) lr 1.5878e-03 eta 1 day, 9:22:34
epoch [21/50] batch [820/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.4704 (1.2438) lr 1.5878e-03 eta 1 day, 9:21:51
epoch [21/50] batch [840/2000] time 2.033 (2.030) data 0.000 (0.001) loss 0.8036 (1.2460) lr 1.5878e-03 eta 1 day, 9:21:07
epoch [21/50] batch [860/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.1597 (1.2496) lr 1.5878e-03 eta 1 day, 9:20:25
epoch [21/50] batch [880/2000] time 2.051 (2.030) data 0.000 (0.001) loss 3.5493 (1.2616) lr 1.5878e-03 eta 1 day, 9:19:45
epoch [21/50] batch [900/2000] time 2.056 (2.030) data 0.000 (0.001) loss 0.3513 (1.2616) lr 1.5878e-03 eta 1 day, 9:19:06
epoch [21/50] batch [920/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.4110 (1.2551) lr 1.5878e-03 eta 1 day, 9:18:22
epoch [21/50] batch [940/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.2029 (1.2542) lr 1.5878e-03 eta 1 day, 9:17:45
epoch [21/50] batch [960/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.9853 (1.2539) lr 1.5878e-03 eta 1 day, 9:17:03
epoch [21/50] batch [980/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.2153 (1.2547) lr 1.5878e-03 eta 1 day, 9:16:20
epoch [21/50] batch [1000/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.0710 (1.2516) lr 1.5878e-03 eta 1 day, 9:15:38
epoch [21/50] batch [1020/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.7261 (1.2526) lr 1.5878e-03 eta 1 day, 9:14:54
epoch [21/50] batch [1040/2000] time 2.054 (2.029) data 0.000 (0.001) loss 3.2401 (1.2589) lr 1.5878e-03 eta 1 day, 9:14:15
epoch [21/50] batch [1060/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.0237 (1.2578) lr 1.5878e-03 eta 1 day, 9:13:36
epoch [21/50] batch [1080/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.0185 (1.2550) lr 1.5878e-03 eta 1 day, 9:12:56
epoch [21/50] batch [1100/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.6936 (1.2557) lr 1.5878e-03 eta 1 day, 9:12:13
epoch [21/50] batch [1120/2000] time 2.054 (2.030) data 0.000 (0.001) loss 4.7903 (1.2563) lr 1.5878e-03 eta 1 day, 9:11:42
epoch [21/50] batch [1140/2000] time 2.031 (2.030) data 0.001 (0.001) loss 1.6622 (1.2515) lr 1.5878e-03 eta 1 day, 9:10:58
epoch [21/50] batch [1160/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.5608 (1.2617) lr 1.5878e-03 eta 1 day, 9:10:18
epoch [21/50] batch [1180/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.3860 (1.2587) lr 1.5878e-03 eta 1 day, 9:09:34
epoch [21/50] batch [1200/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.1059 (1.2575) lr 1.5878e-03 eta 1 day, 9:08:59
epoch [21/50] batch [1220/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.5188 (1.2594) lr 1.5878e-03 eta 1 day, 9:08:23
epoch [21/50] batch [1240/2000] time 1.979 (2.030) data 0.000 (0.001) loss 0.3381 (1.2578) lr 1.5878e-03 eta 1 day, 9:07:39
epoch [21/50] batch [1260/2000] time 1.975 (2.030) data 0.000 (0.001) loss 1.5027 (1.2541) lr 1.5878e-03 eta 1 day, 9:07:01
epoch [21/50] batch [1280/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.3355 (1.2525) lr 1.5878e-03 eta 1 day, 9:06:21
epoch [21/50] batch [1300/2000] time 1.974 (2.030) data 0.000 (0.001) loss 1.5198 (1.2462) lr 1.5878e-03 eta 1 day, 9:05:35
epoch [21/50] batch [1320/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.3383 (1.2434) lr 1.5878e-03 eta 1 day, 9:04:46
epoch [21/50] batch [1340/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.7900 (1.2394) lr 1.5878e-03 eta 1 day, 9:04:04
epoch [21/50] batch [1360/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.1817 (1.2406) lr 1.5878e-03 eta 1 day, 9:03:23
epoch [21/50] batch [1380/2000] time 2.026 (2.029) data 0.000 (0.001) loss 4.5237 (1.2470) lr 1.5878e-03 eta 1 day, 9:02:38
epoch [21/50] batch [1400/2000] time 1.994 (2.029) data 0.000 (0.001) loss 3.4512 (1.2481) lr 1.5878e-03 eta 1 day, 9:01:56
epoch [21/50] batch [1420/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.8570 (1.2554) lr 1.5878e-03 eta 1 day, 9:01:14
epoch [21/50] batch [1440/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.4622 (1.2538) lr 1.5878e-03 eta 1 day, 9:00:33
epoch [21/50] batch [1460/2000] time 1.975 (2.029) data 0.000 (0.001) loss 1.1104 (1.2564) lr 1.5878e-03 eta 1 day, 8:59:45
epoch [21/50] batch [1480/2000] time 2.001 (2.029) data 0.000 (0.001) loss 1.5397 (1.2529) lr 1.5878e-03 eta 1 day, 8:59:08
epoch [21/50] batch [1500/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.8230 (1.2549) lr 1.5878e-03 eta 1 day, 8:58:23
epoch [21/50] batch [1520/2000] time 2.000 (2.029) data 0.000 (0.001) loss 1.2720 (1.2541) lr 1.5878e-03 eta 1 day, 8:57:50
epoch [21/50] batch [1540/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.8645 (1.2563) lr 1.5878e-03 eta 1 day, 8:57:12
epoch [21/50] batch [1560/2000] time 2.027 (2.029) data 0.000 (0.001) loss 2.7097 (1.2590) lr 1.5878e-03 eta 1 day, 8:56:25
epoch [21/50] batch [1580/2000] time 1.994 (2.029) data 0.000 (0.001) loss 1.1147 (1.2615) lr 1.5878e-03 eta 1 day, 8:55:41
epoch [21/50] batch [1600/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.3058 (1.2585) lr 1.5878e-03 eta 1 day, 8:55:02
epoch [21/50] batch [1620/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.9726 (1.2608) lr 1.5878e-03 eta 1 day, 8:54:27
epoch [21/50] batch [1640/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.5573 (1.2628) lr 1.5878e-03 eta 1 day, 8:53:46
epoch [21/50] batch [1660/2000] time 1.999 (2.029) data 0.000 (0.001) loss 2.2447 (1.2609) lr 1.5878e-03 eta 1 day, 8:53:07
epoch [21/50] batch [1680/2000] time 1.999 (2.029) data 0.001 (0.001) loss 1.5441 (1.2581) lr 1.5878e-03 eta 1 day, 8:52:23
epoch [21/50] batch [1700/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.0355 (1.2561) lr 1.5878e-03 eta 1 day, 8:51:43
epoch [21/50] batch [1720/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.0658 (1.2568) lr 1.5878e-03 eta 1 day, 8:51:03
epoch [21/50] batch [1740/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.5845 (1.2578) lr 1.5878e-03 eta 1 day, 8:50:24
epoch [21/50] batch [1760/2000] time 1.971 (2.029) data 0.000 (0.001) loss 1.7104 (1.2579) lr 1.5878e-03 eta 1 day, 8:49:44
epoch [21/50] batch [1780/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.8280 (1.2559) lr 1.5878e-03 eta 1 day, 8:48:59
epoch [21/50] batch [1800/2000] time 2.049 (2.029) data 0.000 (0.000) loss 2.3600 (1.2583) lr 1.5878e-03 eta 1 day, 8:48:15
epoch [21/50] batch [1820/2000] time 2.048 (2.029) data 0.000 (0.000) loss 4.2526 (1.2596) lr 1.5878e-03 eta 1 day, 8:47:34
epoch [21/50] batch [1840/2000] time 2.052 (2.029) data 0.000 (0.000) loss 3.2337 (1.2594) lr 1.5878e-03 eta 1 day, 8:46:51
epoch [21/50] batch [1860/2000] time 2.029 (2.029) data 0.000 (0.000) loss 0.5534 (1.2555) lr 1.5878e-03 eta 1 day, 8:46:11
epoch [21/50] batch [1880/2000] time 2.048 (2.029) data 0.000 (0.000) loss 0.2435 (1.2577) lr 1.5878e-03 eta 1 day, 8:45:31
epoch [21/50] batch [1900/2000] time 2.030 (2.029) data 0.000 (0.000) loss 0.6840 (1.2548) lr 1.5878e-03 eta 1 day, 8:44:47
epoch [21/50] batch [1920/2000] time 2.051 (2.029) data 0.000 (0.000) loss 0.2759 (1.2557) lr 1.5878e-03 eta 1 day, 8:44:04
epoch [21/50] batch [1940/2000] time 2.001 (2.029) data 0.000 (0.000) loss 1.0568 (1.2531) lr 1.5878e-03 eta 1 day, 8:43:22
epoch [21/50] batch [1960/2000] time 1.998 (2.029) data 0.000 (0.000) loss 0.5322 (1.2517) lr 1.5878e-03 eta 1 day, 8:42:39
epoch [21/50] batch [1980/2000] time 2.031 (2.029) data 0.000 (0.000) loss 0.1646 (1.2484) lr 1.5878e-03 eta 1 day, 8:41:59
epoch [21/50] batch [2000/2000] time 2.052 (2.029) data 0.000 (0.000) loss 0.8034 (1.2451) lr 1.5358e-03 eta 1 day, 8:41:18
epoch [22/50] batch [20/2000] time 2.030 (2.061) data 0.000 (0.027) loss 0.6088 (1.0958) lr 1.5358e-03 eta 1 day, 9:11:36
epoch [22/50] batch [40/2000] time 2.050 (2.043) data 0.000 (0.013) loss 3.1335 (1.2508) lr 1.5358e-03 eta 1 day, 8:53:58
epoch [22/50] batch [60/2000] time 2.056 (2.041) data 0.001 (0.009) loss 0.8238 (1.4355) lr 1.5358e-03 eta 1 day, 8:50:59
epoch [22/50] batch [80/2000] time 2.054 (2.037) data 0.000 (0.007) loss 1.1219 (1.3971) lr 1.5358e-03 eta 1 day, 8:46:41
epoch [22/50] batch [100/2000] time 1.999 (2.035) data 0.000 (0.005) loss 0.1810 (1.3245) lr 1.5358e-03 eta 1 day, 8:43:29
epoch [22/50] batch [120/2000] time 2.050 (2.034) data 0.000 (0.005) loss 0.4642 (1.3694) lr 1.5358e-03 eta 1 day, 8:42:04
epoch [22/50] batch [140/2000] time 2.050 (2.035) data 0.000 (0.004) loss 0.2742 (1.3392) lr 1.5358e-03 eta 1 day, 8:42:15
epoch [22/50] batch [160/2000] time 1.999 (2.034) data 0.000 (0.003) loss 0.2684 (1.3014) lr 1.5358e-03 eta 1 day, 8:40:32
epoch [22/50] batch [180/2000] time 1.998 (2.034) data 0.000 (0.003) loss 0.4997 (1.2796) lr 1.5358e-03 eta 1 day, 8:40:10
epoch [22/50] batch [200/2000] time 2.049 (2.034) data 0.000 (0.003) loss 2.3341 (1.2933) lr 1.5358e-03 eta 1 day, 8:39:15
epoch [22/50] batch [220/2000] time 2.051 (2.033) data 0.000 (0.003) loss 2.3723 (1.2946) lr 1.5358e-03 eta 1 day, 8:37:38
epoch [22/50] batch [240/2000] time 2.049 (2.033) data 0.000 (0.002) loss 0.4558 (1.2844) lr 1.5358e-03 eta 1 day, 8:37:00
epoch [22/50] batch [260/2000] time 2.028 (2.032) data 0.000 (0.002) loss 2.1281 (1.2870) lr 1.5358e-03 eta 1 day, 8:35:53
epoch [22/50] batch [280/2000] time 2.049 (2.032) data 0.000 (0.002) loss 0.5396 (1.2879) lr 1.5358e-03 eta 1 day, 8:35:00
epoch [22/50] batch [300/2000] time 1.996 (2.032) data 0.000 (0.002) loss 1.1352 (1.2876) lr 1.5358e-03 eta 1 day, 8:34:07
epoch [22/50] batch [320/2000] time 2.029 (2.032) data 0.000 (0.002) loss 1.0958 (1.2777) lr 1.5358e-03 eta 1 day, 8:33:22
epoch [22/50] batch [340/2000] time 2.049 (2.032) data 0.000 (0.002) loss 1.2891 (1.2802) lr 1.5358e-03 eta 1 day, 8:32:47
epoch [22/50] batch [360/2000] time 2.052 (2.032) data 0.000 (0.002) loss 0.2612 (1.2677) lr 1.5358e-03 eta 1 day, 8:31:58
epoch [22/50] batch [380/2000] time 2.055 (2.032) data 0.000 (0.002) loss 0.2874 (1.2592) lr 1.5358e-03 eta 1 day, 8:30:58
epoch [22/50] batch [400/2000] time 2.053 (2.031) data 0.000 (0.002) loss 1.5982 (1.2675) lr 1.5358e-03 eta 1 day, 8:30:12
epoch [22/50] batch [420/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.5809 (1.2700) lr 1.5358e-03 eta 1 day, 8:29:33
epoch [22/50] batch [440/2000] time 2.052 (2.031) data 0.000 (0.001) loss 2.3353 (1.2791) lr 1.5358e-03 eta 1 day, 8:28:41
epoch [22/50] batch [460/2000] time 2.052 (2.031) data 0.000 (0.001) loss 6.1864 (1.3078) lr 1.5358e-03 eta 1 day, 8:28:06
epoch [22/50] batch [480/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.2885 (1.2965) lr 1.5358e-03 eta 1 day, 8:27:19
epoch [22/50] batch [500/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.3517 (1.2888) lr 1.5358e-03 eta 1 day, 8:26:40
epoch [22/50] batch [520/2000] time 2.000 (2.031) data 0.000 (0.001) loss 3.8241 (1.2864) lr 1.5358e-03 eta 1 day, 8:26:02
epoch [22/50] batch [540/2000] time 2.026 (2.032) data 0.000 (0.001) loss 0.6101 (1.2683) lr 1.5358e-03 eta 1 day, 8:25:31
epoch [22/50] batch [560/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.5480 (1.2502) lr 1.5358e-03 eta 1 day, 8:24:42
epoch [22/50] batch [580/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.4634 (1.2435) lr 1.5358e-03 eta 1 day, 8:23:55
epoch [22/50] batch [600/2000] time 2.052 (2.031) data 0.001 (0.001) loss 2.0507 (1.2381) lr 1.5358e-03 eta 1 day, 8:22:53
epoch [22/50] batch [620/2000] time 1.974 (2.031) data 0.000 (0.001) loss 1.7671 (1.2353) lr 1.5358e-03 eta 1 day, 8:22:12
epoch [22/50] batch [640/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.0559 (1.2218) lr 1.5358e-03 eta 1 day, 8:21:25
epoch [22/50] batch [660/2000] time 1.994 (2.031) data 0.000 (0.001) loss 1.5423 (1.2282) lr 1.5358e-03 eta 1 day, 8:20:41
epoch [22/50] batch [680/2000] time 2.049 (2.031) data 0.000 (0.001) loss 0.5335 (1.2348) lr 1.5358e-03 eta 1 day, 8:20:00
epoch [22/50] batch [700/2000] time 1.994 (2.031) data 0.000 (0.001) loss 0.7493 (1.2296) lr 1.5358e-03 eta 1 day, 8:19:12
epoch [22/50] batch [720/2000] time 2.052 (2.031) data 0.000 (0.001) loss 2.7133 (1.2317) lr 1.5358e-03 eta 1 day, 8:18:37
epoch [22/50] batch [740/2000] time 2.053 (2.031) data 0.000 (0.001) loss 2.4358 (1.2280) lr 1.5358e-03 eta 1 day, 8:18:00
epoch [22/50] batch [760/2000] time 2.049 (2.031) data 0.000 (0.001) loss 0.7298 (1.2343) lr 1.5358e-03 eta 1 day, 8:17:11
epoch [22/50] batch [780/2000] time 2.031 (2.031) data 0.000 (0.001) loss 0.6283 (1.2358) lr 1.5358e-03 eta 1 day, 8:16:33
epoch [22/50] batch [800/2000] time 2.003 (2.031) data 0.000 (0.001) loss 0.6398 (1.2370) lr 1.5358e-03 eta 1 day, 8:15:56
epoch [22/50] batch [820/2000] time 2.002 (2.031) data 0.000 (0.001) loss 0.6668 (1.2321) lr 1.5358e-03 eta 1 day, 8:15:18
epoch [22/50] batch [840/2000] time 1.996 (2.031) data 0.000 (0.001) loss 0.5734 (1.2382) lr 1.5358e-03 eta 1 day, 8:14:36
epoch [22/50] batch [860/2000] time 2.025 (2.031) data 0.000 (0.001) loss 0.8124 (1.2447) lr 1.5358e-03 eta 1 day, 8:13:47
epoch [22/50] batch [880/2000] time 2.050 (2.031) data 0.000 (0.001) loss 2.0059 (1.2448) lr 1.5358e-03 eta 1 day, 8:13:07
epoch [22/50] batch [900/2000] time 1.997 (2.031) data 0.000 (0.001) loss 1.5256 (1.2395) lr 1.5358e-03 eta 1 day, 8:12:29
epoch [22/50] batch [920/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.0709 (1.2468) lr 1.5358e-03 eta 1 day, 8:11:38
epoch [22/50] batch [940/2000] time 2.051 (2.030) data 0.000 (0.001) loss 2.7746 (1.2535) lr 1.5358e-03 eta 1 day, 8:10:48
epoch [22/50] batch [960/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.6810 (1.2529) lr 1.5358e-03 eta 1 day, 8:10:08
epoch [22/50] batch [980/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.7919 (1.2492) lr 1.5358e-03 eta 1 day, 8:09:25
epoch [22/50] batch [1000/2000] time 2.050 (2.030) data 0.000 (0.001) loss 2.1992 (1.2535) lr 1.5358e-03 eta 1 day, 8:08:43
epoch [22/50] batch [1020/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.7034 (1.2492) lr 1.5358e-03 eta 1 day, 8:07:58
epoch [22/50] batch [1040/2000] time 2.029 (2.030) data 0.000 (0.001) loss 2.4727 (1.2524) lr 1.5358e-03 eta 1 day, 8:07:09
epoch [22/50] batch [1060/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.1149 (1.2492) lr 1.5358e-03 eta 1 day, 8:06:26
epoch [22/50] batch [1080/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.4191 (1.2520) lr 1.5358e-03 eta 1 day, 8:05:45
epoch [22/50] batch [1100/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.6108 (1.2497) lr 1.5358e-03 eta 1 day, 8:05:07
epoch [22/50] batch [1120/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.8176 (1.2464) lr 1.5358e-03 eta 1 day, 8:04:25
epoch [22/50] batch [1140/2000] time 2.001 (2.030) data 0.001 (0.001) loss 3.2678 (1.2433) lr 1.5358e-03 eta 1 day, 8:03:44
epoch [22/50] batch [1160/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.9369 (1.2454) lr 1.5358e-03 eta 1 day, 8:03:00
epoch [22/50] batch [1180/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.5349 (1.2458) lr 1.5358e-03 eta 1 day, 8:02:12
epoch [22/50] batch [1200/2000] time 1.974 (2.030) data 0.000 (0.001) loss 1.8091 (1.2435) lr 1.5358e-03 eta 1 day, 8:01:25
epoch [22/50] batch [1220/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.4074 (1.2459) lr 1.5358e-03 eta 1 day, 8:00:44
epoch [22/50] batch [1240/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.6289 (1.2489) lr 1.5358e-03 eta 1 day, 7:59:59
epoch [22/50] batch [1260/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.2692 (1.2488) lr 1.5358e-03 eta 1 day, 7:59:11
epoch [22/50] batch [1280/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.7504 (1.2512) lr 1.5358e-03 eta 1 day, 7:58:31
epoch [22/50] batch [1300/2000] time 1.997 (2.029) data 0.000 (0.001) loss 2.3503 (1.2482) lr 1.5358e-03 eta 1 day, 7:57:47
epoch [22/50] batch [1320/2000] time 1.975 (2.029) data 0.000 (0.001) loss 0.8045 (1.2486) lr 1.5358e-03 eta 1 day, 7:57:05
epoch [22/50] batch [1340/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.4083 (1.2457) lr 1.5358e-03 eta 1 day, 7:56:22
epoch [22/50] batch [1360/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.1293 (1.2452) lr 1.5358e-03 eta 1 day, 7:55:37
epoch [22/50] batch [1380/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.5342 (1.2444) lr 1.5358e-03 eta 1 day, 7:54:58
epoch [22/50] batch [1400/2000] time 2.051 (2.029) data 0.000 (0.001) loss 2.8857 (1.2461) lr 1.5358e-03 eta 1 day, 7:54:17
epoch [22/50] batch [1420/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.5678 (1.2474) lr 1.5358e-03 eta 1 day, 7:53:35
epoch [22/50] batch [1440/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.8506 (1.2458) lr 1.5358e-03 eta 1 day, 7:52:56
epoch [22/50] batch [1460/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.3780 (1.2452) lr 1.5358e-03 eta 1 day, 7:52:14
epoch [22/50] batch [1480/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.6841 (1.2478) lr 1.5358e-03 eta 1 day, 7:51:32
epoch [22/50] batch [1500/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.9383 (1.2468) lr 1.5358e-03 eta 1 day, 7:50:52
epoch [22/50] batch [1520/2000] time 2.003 (2.029) data 0.000 (0.001) loss 1.8348 (1.2468) lr 1.5358e-03 eta 1 day, 7:50:07
epoch [22/50] batch [1540/2000] time 2.003 (2.029) data 0.000 (0.001) loss 1.2687 (1.2491) lr 1.5358e-03 eta 1 day, 7:49:30
epoch [22/50] batch [1560/2000] time 2.000 (2.029) data 0.000 (0.001) loss 2.4991 (1.2525) lr 1.5358e-03 eta 1 day, 7:48:52
epoch [22/50] batch [1580/2000] time 2.052 (2.029) data 0.000 (0.001) loss 3.0106 (1.2544) lr 1.5358e-03 eta 1 day, 7:48:13
epoch [22/50] batch [1600/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.4280 (1.2486) lr 1.5358e-03 eta 1 day, 7:47:30
epoch [22/50] batch [1620/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.9251 (1.2472) lr 1.5358e-03 eta 1 day, 7:46:49
epoch [22/50] batch [1640/2000] time 2.025 (2.029) data 0.000 (0.001) loss 3.0386 (1.2517) lr 1.5358e-03 eta 1 day, 7:46:05
epoch [22/50] batch [1660/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.9032 (1.2540) lr 1.5358e-03 eta 1 day, 7:45:24
epoch [22/50] batch [1680/2000] time 2.027 (2.029) data 0.001 (0.001) loss 1.2001 (1.2537) lr 1.5358e-03 eta 1 day, 7:44:40
epoch [22/50] batch [1700/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.4963 (1.2522) lr 1.5358e-03 eta 1 day, 7:43:55
epoch [22/50] batch [1720/2000] time 2.049 (2.029) data 0.000 (0.001) loss 2.9987 (1.2545) lr 1.5358e-03 eta 1 day, 7:43:22
epoch [22/50] batch [1740/2000] time 2.026 (2.029) data 0.000 (0.001) loss 3.6071 (1.2566) lr 1.5358e-03 eta 1 day, 7:42:34
epoch [22/50] batch [1760/2000] time 1.973 (2.029) data 0.000 (0.000) loss 1.1373 (1.2549) lr 1.5358e-03 eta 1 day, 7:41:57
epoch [22/50] batch [1780/2000] time 1.999 (2.029) data 0.000 (0.000) loss 1.8100 (1.2571) lr 1.5358e-03 eta 1 day, 7:41:18
epoch [22/50] batch [1800/2000] time 2.031 (2.029) data 0.000 (0.000) loss 0.6786 (1.2586) lr 1.5358e-03 eta 1 day, 7:40:40
epoch [22/50] batch [1820/2000] time 2.051 (2.029) data 0.000 (0.000) loss 1.2947 (1.2589) lr 1.5358e-03 eta 1 day, 7:39:58
epoch [22/50] batch [1840/2000] time 2.053 (2.029) data 0.000 (0.000) loss 1.7974 (1.2560) lr 1.5358e-03 eta 1 day, 7:39:20
epoch [22/50] batch [1860/2000] time 2.052 (2.029) data 0.000 (0.000) loss 0.4643 (1.2595) lr 1.5358e-03 eta 1 day, 7:38:43
epoch [22/50] batch [1880/2000] time 2.050 (2.029) data 0.000 (0.000) loss 3.2749 (1.2635) lr 1.5358e-03 eta 1 day, 7:37:58
epoch [22/50] batch [1900/2000] time 2.052 (2.029) data 0.000 (0.000) loss 0.1303 (1.2616) lr 1.5358e-03 eta 1 day, 7:37:20
epoch [22/50] batch [1920/2000] time 2.050 (2.029) data 0.000 (0.000) loss 1.3452 (1.2640) lr 1.5358e-03 eta 1 day, 7:36:38
epoch [22/50] batch [1940/2000] time 1.998 (2.029) data 0.000 (0.000) loss 0.4998 (1.2652) lr 1.5358e-03 eta 1 day, 7:36:00
epoch [22/50] batch [1960/2000] time 2.027 (2.029) data 0.000 (0.000) loss 0.4490 (1.2637) lr 1.5358e-03 eta 1 day, 7:35:19
epoch [22/50] batch [1980/2000] time 2.026 (2.029) data 0.000 (0.000) loss 1.2378 (1.2667) lr 1.5358e-03 eta 1 day, 7:34:39
epoch [22/50] batch [2000/2000] time 2.050 (2.029) data 0.000 (0.000) loss 0.0771 (1.2634) lr 1.4818e-03 eta 1 day, 7:33:57
epoch [23/50] batch [20/2000] time 1.999 (2.056) data 0.000 (0.026) loss 2.6476 (1.3726) lr 1.4818e-03 eta 1 day, 7:58:19
epoch [23/50] batch [40/2000] time 1.996 (2.041) data 0.000 (0.013) loss 1.0955 (1.4136) lr 1.4818e-03 eta 1 day, 7:43:26
epoch [23/50] batch [60/2000] time 2.051 (2.037) data 0.001 (0.009) loss 1.1459 (1.4313) lr 1.4818e-03 eta 1 day, 7:39:12
epoch [23/50] batch [80/2000] time 1.997 (2.032) data 0.000 (0.007) loss 0.3955 (1.3429) lr 1.4818e-03 eta 1 day, 7:34:12
epoch [23/50] batch [100/2000] time 2.001 (2.031) data 0.000 (0.005) loss 0.9818 (1.2585) lr 1.4818e-03 eta 1 day, 7:32:26
epoch [23/50] batch [120/2000] time 2.054 (2.030) data 0.000 (0.005) loss 3.0210 (1.2574) lr 1.4818e-03 eta 1 day, 7:30:51
epoch [23/50] batch [140/2000] time 2.000 (2.030) data 0.000 (0.004) loss 1.3488 (1.2603) lr 1.4818e-03 eta 1 day, 7:30:10
epoch [23/50] batch [160/2000] time 2.026 (2.031) data 0.000 (0.003) loss 2.2053 (1.2413) lr 1.4818e-03 eta 1 day, 7:30:03
epoch [23/50] batch [180/2000] time 1.998 (2.030) data 0.000 (0.003) loss 1.5821 (1.2109) lr 1.4818e-03 eta 1 day, 7:28:53
epoch [23/50] batch [200/2000] time 2.028 (2.029) data 0.000 (0.003) loss 1.5585 (1.2046) lr 1.4818e-03 eta 1 day, 7:27:20
epoch [23/50] batch [220/2000] time 1.997 (2.029) data 0.000 (0.003) loss 1.6039 (1.2435) lr 1.4818e-03 eta 1 day, 7:26:39
epoch [23/50] batch [240/2000] time 2.029 (2.030) data 0.000 (0.002) loss 2.5319 (1.2716) lr 1.4818e-03 eta 1 day, 7:26:16
epoch [23/50] batch [260/2000] time 2.051 (2.030) data 0.000 (0.002) loss 2.8083 (1.2639) lr 1.4818e-03 eta 1 day, 7:25:38
epoch [23/50] batch [280/2000] time 2.053 (2.030) data 0.000 (0.002) loss 0.2694 (1.2533) lr 1.4818e-03 eta 1 day, 7:24:46
epoch [23/50] batch [300/2000] time 2.028 (2.029) data 0.000 (0.002) loss 2.7630 (1.2501) lr 1.4818e-03 eta 1 day, 7:23:54
epoch [23/50] batch [320/2000] time 2.029 (2.029) data 0.000 (0.002) loss 1.2075 (1.2474) lr 1.4818e-03 eta 1 day, 7:23:15
epoch [23/50] batch [340/2000] time 2.029 (2.029) data 0.000 (0.002) loss 2.5983 (1.2502) lr 1.4818e-03 eta 1 day, 7:22:16
epoch [23/50] batch [360/2000] time 2.054 (2.029) data 0.000 (0.002) loss 2.0579 (1.2512) lr 1.4818e-03 eta 1 day, 7:21:50
epoch [23/50] batch [380/2000] time 2.000 (2.029) data 0.000 (0.002) loss 0.1941 (1.2450) lr 1.4818e-03 eta 1 day, 7:21:02
epoch [23/50] batch [400/2000] time 2.053 (2.029) data 0.000 (0.002) loss 3.1692 (1.2572) lr 1.4818e-03 eta 1 day, 7:20:26
epoch [23/50] batch [420/2000] time 2.025 (2.030) data 0.000 (0.001) loss 0.2348 (1.2391) lr 1.4818e-03 eta 1 day, 7:20:02
epoch [23/50] batch [440/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.2502 (1.2374) lr 1.4818e-03 eta 1 day, 7:19:11
epoch [23/50] batch [460/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.8762 (1.2350) lr 1.4818e-03 eta 1 day, 7:18:45
epoch [23/50] batch [480/2000] time 2.057 (2.030) data 0.000 (0.001) loss 1.1165 (1.2359) lr 1.4818e-03 eta 1 day, 7:18:04
epoch [23/50] batch [500/2000] time 2.032 (2.030) data 0.000 (0.001) loss 0.9573 (1.2447) lr 1.4818e-03 eta 1 day, 7:17:39
epoch [23/50] batch [520/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.5863 (1.2535) lr 1.4818e-03 eta 1 day, 7:16:47
epoch [23/50] batch [540/2000] time 2.029 (2.030) data 0.000 (0.001) loss 2.6236 (1.2457) lr 1.4818e-03 eta 1 day, 7:16:12
epoch [23/50] batch [560/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.5036 (1.2417) lr 1.4818e-03 eta 1 day, 7:15:22
epoch [23/50] batch [580/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.8558 (1.2319) lr 1.4818e-03 eta 1 day, 7:14:21
epoch [23/50] batch [600/2000] time 1.995 (2.029) data 0.001 (0.001) loss 1.9670 (1.2407) lr 1.4818e-03 eta 1 day, 7:13:41
epoch [23/50] batch [620/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.5040 (1.2369) lr 1.4818e-03 eta 1 day, 7:12:52
epoch [23/50] batch [640/2000] time 1.999 (2.029) data 0.000 (0.001) loss 1.3593 (1.2318) lr 1.4818e-03 eta 1 day, 7:12:11
epoch [23/50] batch [660/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.5545 (1.2365) lr 1.4818e-03 eta 1 day, 7:11:29
epoch [23/50] batch [680/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.0842 (1.2447) lr 1.4818e-03 eta 1 day, 7:10:57
epoch [23/50] batch [700/2000] time 1.994 (2.029) data 0.000 (0.001) loss 1.0028 (1.2417) lr 1.4818e-03 eta 1 day, 7:10:08
epoch [23/50] batch [720/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.6258 (1.2456) lr 1.4818e-03 eta 1 day, 7:09:35
epoch [23/50] batch [740/2000] time 2.025 (2.029) data 0.000 (0.001) loss 1.5473 (1.2462) lr 1.4818e-03 eta 1 day, 7:08:53
epoch [23/50] batch [760/2000] time 2.051 (2.029) data 0.000 (0.001) loss 2.2099 (1.2444) lr 1.4818e-03 eta 1 day, 7:08:12
epoch [23/50] batch [780/2000] time 2.027 (2.029) data 0.000 (0.001) loss 2.1551 (1.2330) lr 1.4818e-03 eta 1 day, 7:07:29
epoch [23/50] batch [800/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.7052 (1.2341) lr 1.4818e-03 eta 1 day, 7:06:38
epoch [23/50] batch [820/2000] time 1.998 (2.029) data 0.000 (0.001) loss 2.1323 (1.2307) lr 1.4818e-03 eta 1 day, 7:05:59
epoch [23/50] batch [840/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.4638 (1.2339) lr 1.4818e-03 eta 1 day, 7:05:25
epoch [23/50] batch [860/2000] time 1.999 (2.029) data 0.000 (0.001) loss 1.0986 (1.2334) lr 1.4818e-03 eta 1 day, 7:04:41
epoch [23/50] batch [880/2000] time 2.031 (2.029) data 0.000 (0.001) loss 3.1441 (1.2360) lr 1.4818e-03 eta 1 day, 7:03:46
epoch [23/50] batch [900/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.6149 (1.2377) lr 1.4818e-03 eta 1 day, 7:03:07
epoch [23/50] batch [920/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.1135 (1.2399) lr 1.4818e-03 eta 1 day, 7:02:16
epoch [23/50] batch [940/2000] time 1.997 (2.029) data 0.000 (0.001) loss 3.2192 (1.2392) lr 1.4818e-03 eta 1 day, 7:01:40
epoch [23/50] batch [960/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.7700 (1.2405) lr 1.4818e-03 eta 1 day, 7:01:00
epoch [23/50] batch [980/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.3789 (1.2381) lr 1.4818e-03 eta 1 day, 7:00:16
epoch [23/50] batch [1000/2000] time 1.971 (2.029) data 0.000 (0.001) loss 2.3040 (1.2358) lr 1.4818e-03 eta 1 day, 6:59:28
epoch [23/50] batch [1020/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.8393 (1.2325) lr 1.4818e-03 eta 1 day, 6:58:49
epoch [23/50] batch [1040/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.0855 (1.2350) lr 1.4818e-03 eta 1 day, 6:58:17
epoch [23/50] batch [1060/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.6136 (1.2355) lr 1.4818e-03 eta 1 day, 6:57:38
epoch [23/50] batch [1080/2000] time 2.053 (2.029) data 0.000 (0.001) loss 4.9291 (1.2343) lr 1.4818e-03 eta 1 day, 6:56:51
epoch [23/50] batch [1100/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.0292 (1.2306) lr 1.4818e-03 eta 1 day, 6:56:14
epoch [23/50] batch [1120/2000] time 2.047 (2.029) data 0.000 (0.001) loss 0.1327 (1.2327) lr 1.4818e-03 eta 1 day, 6:55:37
epoch [23/50] batch [1140/2000] time 1.973 (2.029) data 0.001 (0.001) loss 0.9164 (1.2280) lr 1.4818e-03 eta 1 day, 6:54:54
epoch [23/50] batch [1160/2000] time 1.973 (2.029) data 0.000 (0.001) loss 1.6553 (1.2290) lr 1.4818e-03 eta 1 day, 6:54:13
epoch [23/50] batch [1180/2000] time 1.973 (2.029) data 0.000 (0.001) loss 0.6349 (1.2268) lr 1.4818e-03 eta 1 day, 6:53:29
epoch [23/50] batch [1200/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.2624 (1.2214) lr 1.4818e-03 eta 1 day, 6:52:51
epoch [23/50] batch [1220/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.3798 (1.2221) lr 1.4818e-03 eta 1 day, 6:52:05
epoch [23/50] batch [1240/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.4183 (1.2192) lr 1.4818e-03 eta 1 day, 6:51:26
epoch [23/50] batch [1260/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.2634 (1.2167) lr 1.4818e-03 eta 1 day, 6:50:46
epoch [23/50] batch [1280/2000] time 1.995 (2.028) data 0.000 (0.001) loss 1.2412 (1.2203) lr 1.4818e-03 eta 1 day, 6:49:55
epoch [23/50] batch [1300/2000] time 2.052 (2.028) data 0.000 (0.001) loss 1.1409 (1.2189) lr 1.4818e-03 eta 1 day, 6:49:15
epoch [23/50] batch [1320/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.3587 (1.2187) lr 1.4818e-03 eta 1 day, 6:48:34
epoch [23/50] batch [1340/2000] time 2.028 (2.028) data 0.000 (0.001) loss 2.8403 (1.2192) lr 1.4818e-03 eta 1 day, 6:47:52
epoch [23/50] batch [1360/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.8119 (1.2151) lr 1.4818e-03 eta 1 day, 6:47:18
epoch [23/50] batch [1380/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.7616 (1.2225) lr 1.4818e-03 eta 1 day, 6:46:40
epoch [23/50] batch [1400/2000] time 2.052 (2.029) data 0.000 (0.001) loss 2.8495 (1.2197) lr 1.4818e-03 eta 1 day, 6:45:59
epoch [23/50] batch [1420/2000] time 1.972 (2.029) data 0.000 (0.001) loss 0.4146 (1.2167) lr 1.4818e-03 eta 1 day, 6:45:22
epoch [23/50] batch [1440/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.9774 (1.2163) lr 1.4818e-03 eta 1 day, 6:44:41
epoch [23/50] batch [1460/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.4326 (1.2161) lr 1.4818e-03 eta 1 day, 6:44:02
epoch [23/50] batch [1480/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.9346 (1.2161) lr 1.4818e-03 eta 1 day, 6:43:22
epoch [23/50] batch [1500/2000] time 1.975 (2.029) data 0.000 (0.001) loss 0.9543 (1.2157) lr 1.4818e-03 eta 1 day, 6:42:41
epoch [23/50] batch [1520/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.3486 (1.2185) lr 1.4818e-03 eta 1 day, 6:42:06
epoch [23/50] batch [1540/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.5998 (1.2181) lr 1.4818e-03 eta 1 day, 6:41:23
epoch [23/50] batch [1560/2000] time 2.053 (2.029) data 0.000 (0.001) loss 2.2433 (1.2206) lr 1.4818e-03 eta 1 day, 6:40:42
epoch [23/50] batch [1580/2000] time 2.001 (2.029) data 0.000 (0.001) loss 1.0731 (1.2178) lr 1.4818e-03 eta 1 day, 6:39:56
epoch [23/50] batch [1600/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.7759 (1.2148) lr 1.4818e-03 eta 1 day, 6:39:16
epoch [23/50] batch [1620/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.2320 (1.2187) lr 1.4818e-03 eta 1 day, 6:38:39
epoch [23/50] batch [1640/2000] time 2.001 (2.029) data 0.000 (0.001) loss 0.4312 (1.2208) lr 1.4818e-03 eta 1 day, 6:37:58
epoch [23/50] batch [1660/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.6061 (1.2208) lr 1.4818e-03 eta 1 day, 6:37:19
epoch [23/50] batch [1680/2000] time 2.049 (2.029) data 0.001 (0.001) loss 0.4284 (1.2222) lr 1.4818e-03 eta 1 day, 6:36:42
epoch [23/50] batch [1700/2000] time 2.028 (2.029) data 0.000 (0.001) loss 2.9788 (1.2237) lr 1.4818e-03 eta 1 day, 6:36:05
epoch [23/50] batch [1720/2000] time 2.030 (2.029) data 0.000 (0.001) loss 1.5847 (1.2270) lr 1.4818e-03 eta 1 day, 6:35:26
epoch [23/50] batch [1740/2000] time 2.052 (2.029) data 0.000 (0.001) loss 2.8294 (1.2310) lr 1.4818e-03 eta 1 day, 6:34:48
epoch [23/50] batch [1760/2000] time 2.056 (2.029) data 0.000 (0.001) loss 1.5982 (1.2284) lr 1.4818e-03 eta 1 day, 6:34:12
epoch [23/50] batch [1780/2000] time 2.001 (2.029) data 0.000 (0.000) loss 0.4921 (1.2291) lr 1.4818e-03 eta 1 day, 6:33:37
epoch [23/50] batch [1800/2000] time 1.995 (2.029) data 0.000 (0.000) loss 1.3630 (1.2277) lr 1.4818e-03 eta 1 day, 6:32:56
epoch [23/50] batch [1820/2000] time 1.995 (2.029) data 0.000 (0.000) loss 0.5911 (1.2249) lr 1.4818e-03 eta 1 day, 6:32:14
epoch [23/50] batch [1840/2000] time 2.047 (2.029) data 0.000 (0.000) loss 1.1667 (1.2221) lr 1.4818e-03 eta 1 day, 6:31:30
epoch [23/50] batch [1860/2000] time 2.049 (2.029) data 0.000 (0.000) loss 0.1179 (1.2239) lr 1.4818e-03 eta 1 day, 6:30:49
epoch [23/50] batch [1880/2000] time 2.047 (2.029) data 0.000 (0.000) loss 0.6989 (1.2251) lr 1.4818e-03 eta 1 day, 6:30:07
epoch [23/50] batch [1900/2000] time 2.049 (2.029) data 0.000 (0.000) loss 0.4976 (1.2225) lr 1.4818e-03 eta 1 day, 6:29:25
epoch [23/50] batch [1920/2000] time 2.024 (2.029) data 0.000 (0.000) loss 2.3879 (1.2276) lr 1.4818e-03 eta 1 day, 6:28:42
epoch [23/50] batch [1940/2000] time 2.051 (2.029) data 0.000 (0.000) loss 1.6385 (1.2267) lr 1.4818e-03 eta 1 day, 6:27:58
epoch [23/50] batch [1960/2000] time 2.025 (2.029) data 0.000 (0.000) loss 1.8852 (1.2255) lr 1.4818e-03 eta 1 day, 6:27:15
epoch [23/50] batch [1980/2000] time 1.994 (2.029) data 0.000 (0.000) loss 0.0927 (1.2234) lr 1.4818e-03 eta 1 day, 6:26:33
epoch [23/50] batch [2000/2000] time 1.972 (2.029) data 0.000 (0.000) loss 0.7764 (1.2228) lr 1.4258e-03 eta 1 day, 6:25:51
epoch [24/50] batch [20/2000] time 1.997 (2.057) data 0.000 (0.027) loss 1.2417 (1.3199) lr 1.4258e-03 eta 1 day, 6:50:19
epoch [24/50] batch [40/2000] time 2.029 (2.046) data 0.000 (0.013) loss 1.5355 (1.2472) lr 1.4258e-03 eta 1 day, 6:39:38
epoch [24/50] batch [60/2000] time 1.998 (2.038) data 0.001 (0.009) loss 0.8539 (1.2172) lr 1.4258e-03 eta 1 day, 6:31:57
epoch [24/50] batch [80/2000] time 2.000 (2.037) data 0.000 (0.007) loss 0.7490 (1.2079) lr 1.4258e-03 eta 1 day, 6:30:18
epoch [24/50] batch [100/2000] time 2.052 (2.036) data 0.000 (0.005) loss 1.0766 (1.2340) lr 1.4258e-03 eta 1 day, 6:29:03
epoch [24/50] batch [120/2000] time 1.997 (2.032) data 0.000 (0.005) loss 2.9362 (1.2449) lr 1.4258e-03 eta 1 day, 6:25:09
epoch [24/50] batch [140/2000] time 2.050 (2.032) data 0.000 (0.004) loss 1.5923 (1.2751) lr 1.4258e-03 eta 1 day, 6:24:25
epoch [24/50] batch [160/2000] time 1.975 (2.032) data 0.000 (0.004) loss 0.6591 (1.2682) lr 1.4258e-03 eta 1 day, 6:23:02
epoch [24/50] batch [180/2000] time 2.050 (2.031) data 0.000 (0.003) loss 1.2166 (1.2502) lr 1.4258e-03 eta 1 day, 6:21:30
epoch [24/50] batch [200/2000] time 2.050 (2.031) data 0.000 (0.003) loss 0.9591 (1.2074) lr 1.4258e-03 eta 1 day, 6:21:09
epoch [24/50] batch [220/2000] time 2.050 (2.031) data 0.000 (0.003) loss 1.1905 (1.2400) lr 1.4258e-03 eta 1 day, 6:20:50
epoch [24/50] batch [240/2000] time 2.052 (2.032) data 0.000 (0.002) loss 1.1557 (1.2206) lr 1.4258e-03 eta 1 day, 6:20:15
epoch [24/50] batch [260/2000] time 2.052 (2.032) data 0.000 (0.002) loss 0.7755 (1.2154) lr 1.4258e-03 eta 1 day, 6:20:10
epoch [24/50] batch [280/2000] time 2.052 (2.032) data 0.000 (0.002) loss 1.0705 (1.2042) lr 1.4258e-03 eta 1 day, 6:18:59
epoch [24/50] batch [300/2000] time 2.050 (2.032) data 0.000 (0.002) loss 0.4201 (1.2123) lr 1.4258e-03 eta 1 day, 6:18:14
epoch [24/50] batch [320/2000] time 2.052 (2.031) data 0.000 (0.002) loss 0.3873 (1.1964) lr 1.4258e-03 eta 1 day, 6:17:25
epoch [24/50] batch [340/2000] time 2.050 (2.031) data 0.000 (0.002) loss 2.7758 (1.2098) lr 1.4258e-03 eta 1 day, 6:16:10
epoch [24/50] batch [360/2000] time 2.050 (2.031) data 0.000 (0.002) loss 1.9423 (1.2293) lr 1.4258e-03 eta 1 day, 6:15:34
epoch [24/50] batch [380/2000] time 1.997 (2.031) data 0.000 (0.002) loss 1.7050 (1.2320) lr 1.4258e-03 eta 1 day, 6:14:55
epoch [24/50] batch [400/2000] time 2.032 (2.031) data 0.000 (0.002) loss 0.6598 (1.2112) lr 1.4258e-03 eta 1 day, 6:14:12
epoch [24/50] batch [420/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.5757 (1.2128) lr 1.4258e-03 eta 1 day, 6:13:14
epoch [24/50] batch [440/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.0298 (1.2132) lr 1.4258e-03 eta 1 day, 6:12:30
epoch [24/50] batch [460/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.8582 (1.1978) lr 1.4258e-03 eta 1 day, 6:11:40
epoch [24/50] batch [480/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.2126 (1.2047) lr 1.4258e-03 eta 1 day, 6:11:09
epoch [24/50] batch [500/2000] time 2.027 (2.031) data 0.000 (0.001) loss 1.2937 (1.2207) lr 1.4258e-03 eta 1 day, 6:10:38
epoch [24/50] batch [520/2000] time 1.999 (2.031) data 0.000 (0.001) loss 1.4967 (1.2166) lr 1.4258e-03 eta 1 day, 6:09:57
epoch [24/50] batch [540/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.7751 (1.2242) lr 1.4258e-03 eta 1 day, 6:09:12
epoch [24/50] batch [560/2000] time 2.049 (2.031) data 0.000 (0.001) loss 0.2419 (1.2232) lr 1.4258e-03 eta 1 day, 6:08:30
epoch [24/50] batch [580/2000] time 2.027 (2.030) data 0.000 (0.001) loss 4.0363 (1.2308) lr 1.4258e-03 eta 1 day, 6:07:35
epoch [24/50] batch [600/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.4992 (1.2322) lr 1.4258e-03 eta 1 day, 6:06:43
epoch [24/50] batch [620/2000] time 2.048 (2.030) data 0.000 (0.001) loss 1.6753 (1.2288) lr 1.4258e-03 eta 1 day, 6:06:00
epoch [24/50] batch [640/2000] time 2.025 (2.030) data 0.000 (0.001) loss 3.1072 (1.2417) lr 1.4258e-03 eta 1 day, 6:05:17
epoch [24/50] batch [660/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.2282 (1.2460) lr 1.4258e-03 eta 1 day, 6:04:19
epoch [24/50] batch [680/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.7873 (1.2448) lr 1.4258e-03 eta 1 day, 6:03:43
epoch [24/50] batch [700/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.1464 (1.2373) lr 1.4258e-03 eta 1 day, 6:02:55
epoch [24/50] batch [720/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.4651 (1.2388) lr 1.4258e-03 eta 1 day, 6:02:16
epoch [24/50] batch [740/2000] time 2.059 (2.029) data 0.000 (0.001) loss 2.7964 (1.2376) lr 1.4258e-03 eta 1 day, 6:01:30
epoch [24/50] batch [760/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.1808 (1.2269) lr 1.4258e-03 eta 1 day, 6:00:46
epoch [24/50] batch [780/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.2677 (1.2194) lr 1.4258e-03 eta 1 day, 6:00:07
epoch [24/50] batch [800/2000] time 2.053 (2.029) data 0.000 (0.001) loss 3.2588 (1.2165) lr 1.4258e-03 eta 1 day, 5:59:29
epoch [24/50] batch [820/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.7219 (1.2188) lr 1.4258e-03 eta 1 day, 5:58:47
epoch [24/50] batch [840/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.7366 (1.2239) lr 1.4258e-03 eta 1 day, 5:58:02
epoch [24/50] batch [860/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.8681 (1.2212) lr 1.4258e-03 eta 1 day, 5:57:18
epoch [24/50] batch [880/2000] time 1.974 (2.029) data 0.000 (0.001) loss 0.5436 (1.2184) lr 1.4258e-03 eta 1 day, 5:56:34
epoch [24/50] batch [900/2000] time 2.024 (2.029) data 0.000 (0.001) loss 0.6391 (1.2145) lr 1.4258e-03 eta 1 day, 5:55:52
epoch [24/50] batch [920/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.7082 (1.2129) lr 1.4258e-03 eta 1 day, 5:55:13
epoch [24/50] batch [940/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.9458 (1.2106) lr 1.4258e-03 eta 1 day, 5:54:37
epoch [24/50] batch [960/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.3255 (1.2103) lr 1.4258e-03 eta 1 day, 5:53:46
epoch [24/50] batch [980/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.9712 (1.2117) lr 1.4258e-03 eta 1 day, 5:53:05
epoch [24/50] batch [1000/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.0532 (1.2107) lr 1.4258e-03 eta 1 day, 5:52:25
epoch [24/50] batch [1020/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.4143 (1.2066) lr 1.4258e-03 eta 1 day, 5:51:49
epoch [24/50] batch [1040/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.2837 (1.2134) lr 1.4258e-03 eta 1 day, 5:51:09
epoch [24/50] batch [1060/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.1689 (1.2145) lr 1.4258e-03 eta 1 day, 5:50:21
epoch [24/50] batch [1080/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.8125 (1.2087) lr 1.4258e-03 eta 1 day, 5:49:38
epoch [24/50] batch [1100/2000] time 1.977 (2.029) data 0.000 (0.001) loss 2.9665 (1.2134) lr 1.4258e-03 eta 1 day, 5:48:52
epoch [24/50] batch [1120/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.7655 (1.2132) lr 1.4258e-03 eta 1 day, 5:48:07
epoch [24/50] batch [1140/2000] time 2.030 (2.029) data 0.001 (0.001) loss 0.1338 (1.2098) lr 1.4258e-03 eta 1 day, 5:47:17
epoch [24/50] batch [1160/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.6655 (1.2134) lr 1.4258e-03 eta 1 day, 5:46:42
epoch [24/50] batch [1180/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.2024 (1.2117) lr 1.4258e-03 eta 1 day, 5:46:03
epoch [24/50] batch [1200/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.9986 (1.2137) lr 1.4258e-03 eta 1 day, 5:45:18
epoch [24/50] batch [1220/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.0813 (1.2124) lr 1.4258e-03 eta 1 day, 5:44:41
epoch [24/50] batch [1240/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.5239 (1.2130) lr 1.4258e-03 eta 1 day, 5:44:06
epoch [24/50] batch [1260/2000] time 2.028 (2.029) data 0.000 (0.001) loss 3.2452 (1.2201) lr 1.4258e-03 eta 1 day, 5:43:26
epoch [24/50] batch [1280/2000] time 2.023 (2.029) data 0.000 (0.001) loss 0.1638 (1.2199) lr 1.4258e-03 eta 1 day, 5:42:49
epoch [24/50] batch [1300/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.0812 (1.2216) lr 1.4258e-03 eta 1 day, 5:42:09
epoch [24/50] batch [1320/2000] time 2.025 (2.029) data 0.000 (0.001) loss 0.4638 (1.2199) lr 1.4258e-03 eta 1 day, 5:41:23
epoch [24/50] batch [1340/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.8720 (1.2186) lr 1.4258e-03 eta 1 day, 5:40:47
epoch [24/50] batch [1360/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.2983 (1.2182) lr 1.4258e-03 eta 1 day, 5:40:04
epoch [24/50] batch [1380/2000] time 2.024 (2.029) data 0.000 (0.001) loss 1.2372 (1.2160) lr 1.4258e-03 eta 1 day, 5:39:23
epoch [24/50] batch [1400/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.8210 (1.2160) lr 1.4258e-03 eta 1 day, 5:38:41
epoch [24/50] batch [1420/2000] time 2.025 (2.029) data 0.000 (0.001) loss 0.9433 (1.2153) lr 1.4258e-03 eta 1 day, 5:37:55
epoch [24/50] batch [1440/2000] time 2.052 (2.029) data 0.000 (0.001) loss 3.4828 (1.2152) lr 1.4258e-03 eta 1 day, 5:37:14
epoch [24/50] batch [1460/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.6101 (1.2142) lr 1.4258e-03 eta 1 day, 5:36:30
epoch [24/50] batch [1480/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.7431 (1.2170) lr 1.4258e-03 eta 1 day, 5:35:44
epoch [24/50] batch [1500/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.9329 (1.2215) lr 1.4258e-03 eta 1 day, 5:35:00
epoch [24/50] batch [1520/2000] time 2.000 (2.029) data 0.000 (0.001) loss 1.8239 (1.2179) lr 1.4258e-03 eta 1 day, 5:34:18
epoch [24/50] batch [1540/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.8178 (1.2203) lr 1.4258e-03 eta 1 day, 5:33:36
epoch [24/50] batch [1560/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.1128 (1.2174) lr 1.4258e-03 eta 1 day, 5:32:57
epoch [24/50] batch [1580/2000] time 2.025 (2.029) data 0.000 (0.001) loss 1.0728 (1.2203) lr 1.4258e-03 eta 1 day, 5:32:15
epoch [24/50] batch [1600/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.6285 (1.2210) lr 1.4258e-03 eta 1 day, 5:31:33
epoch [24/50] batch [1620/2000] time 1.974 (2.028) data 0.000 (0.001) loss 0.3609 (1.2256) lr 1.4258e-03 eta 1 day, 5:30:51
epoch [24/50] batch [1640/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.6451 (1.2312) lr 1.4258e-03 eta 1 day, 5:30:17
epoch [24/50] batch [1660/2000] time 1.974 (2.028) data 0.000 (0.001) loss 0.5426 (1.2315) lr 1.4258e-03 eta 1 day, 5:29:30
epoch [24/50] batch [1680/2000] time 2.052 (2.029) data 0.001 (0.001) loss 0.5730 (1.2281) lr 1.4258e-03 eta 1 day, 5:28:52
epoch [24/50] batch [1700/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.1825 (1.2297) lr 1.4258e-03 eta 1 day, 5:28:10
epoch [24/50] batch [1720/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.4496 (1.2342) lr 1.4258e-03 eta 1 day, 5:27:30
epoch [24/50] batch [1740/2000] time 1.998 (2.028) data 0.000 (0.001) loss 1.6417 (1.2345) lr 1.4258e-03 eta 1 day, 5:26:45
epoch [24/50] batch [1760/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.0777 (1.2327) lr 1.4258e-03 eta 1 day, 5:26:03
epoch [24/50] batch [1780/2000] time 1.997 (2.028) data 0.000 (0.000) loss 1.0521 (1.2313) lr 1.4258e-03 eta 1 day, 5:25:21
epoch [24/50] batch [1800/2000] time 1.996 (2.028) data 0.000 (0.000) loss 2.6116 (1.2317) lr 1.4258e-03 eta 1 day, 5:24:39
epoch [24/50] batch [1820/2000] time 2.031 (2.028) data 0.000 (0.000) loss 0.0871 (1.2313) lr 1.4258e-03 eta 1 day, 5:24:02
epoch [24/50] batch [1840/2000] time 1.999 (2.028) data 0.000 (0.000) loss 0.1793 (1.2321) lr 1.4258e-03 eta 1 day, 5:23:23
epoch [24/50] batch [1860/2000] time 1.973 (2.028) data 0.000 (0.000) loss 4.0285 (1.2299) lr 1.4258e-03 eta 1 day, 5:22:42
epoch [24/50] batch [1880/2000] time 2.050 (2.028) data 0.000 (0.000) loss 3.1874 (1.2326) lr 1.4258e-03 eta 1 day, 5:21:58
epoch [24/50] batch [1900/2000] time 1.997 (2.028) data 0.000 (0.000) loss 1.2687 (1.2330) lr 1.4258e-03 eta 1 day, 5:21:17
epoch [24/50] batch [1920/2000] time 2.026 (2.028) data 0.000 (0.000) loss 0.3625 (1.2329) lr 1.4258e-03 eta 1 day, 5:20:37
epoch [24/50] batch [1940/2000] time 2.050 (2.028) data 0.000 (0.000) loss 3.9042 (1.2369) lr 1.4258e-03 eta 1 day, 5:19:55
epoch [24/50] batch [1960/2000] time 1.994 (2.028) data 0.000 (0.000) loss 1.4701 (1.2348) lr 1.4258e-03 eta 1 day, 5:19:14
epoch [24/50] batch [1980/2000] time 2.025 (2.028) data 0.000 (0.000) loss 0.8095 (1.2318) lr 1.4258e-03 eta 1 day, 5:18:34
epoch [24/50] batch [2000/2000] time 1.999 (2.028) data 0.000 (0.000) loss 0.9118 (1.2338) lr 1.3681e-03 eta 1 day, 5:17:53
epoch [25/50] batch [20/2000] time 2.052 (2.060) data 0.000 (0.027) loss 2.3539 (1.1624) lr 1.3681e-03 eta 1 day, 5:44:20
epoch [25/50] batch [40/2000] time 2.030 (2.045) data 0.000 (0.014) loss 1.4935 (1.1560) lr 1.3681e-03 eta 1 day, 5:30:56
epoch [25/50] batch [60/2000] time 1.997 (2.039) data 0.000 (0.009) loss 1.2108 (1.2601) lr 1.3681e-03 eta 1 day, 5:25:05
epoch [25/50] batch [80/2000] time 2.047 (2.037) data 0.000 (0.007) loss 2.0995 (1.2144) lr 1.3681e-03 eta 1 day, 5:22:51
epoch [25/50] batch [100/2000] time 2.049 (2.036) data 0.000 (0.006) loss 2.4382 (1.2586) lr 1.3681e-03 eta 1 day, 5:21:11
epoch [25/50] batch [120/2000] time 2.049 (2.035) data 0.000 (0.005) loss 0.1604 (1.2061) lr 1.3681e-03 eta 1 day, 5:19:39
epoch [25/50] batch [140/2000] time 2.028 (2.034) data 0.000 (0.004) loss 2.5469 (1.1758) lr 1.3681e-03 eta 1 day, 5:17:57
epoch [25/50] batch [160/2000] time 1.994 (2.033) data 0.000 (0.004) loss 1.7195 (1.1533) lr 1.3681e-03 eta 1 day, 5:16:27
epoch [25/50] batch [180/2000] time 2.026 (2.032) data 0.000 (0.003) loss 1.7949 (1.1401) lr 1.3681e-03 eta 1 day, 5:14:43
epoch [25/50] batch [200/2000] time 1.998 (2.031) data 0.000 (0.003) loss 0.5123 (1.1619) lr 1.3681e-03 eta 1 day, 5:13:22
epoch [25/50] batch [220/2000] time 1.996 (2.030) data 0.000 (0.003) loss 2.5890 (1.1619) lr 1.3681e-03 eta 1 day, 5:11:57
epoch [25/50] batch [240/2000] time 2.030 (2.030) data 0.000 (0.002) loss 0.3094 (1.1591) lr 1.3681e-03 eta 1 day, 5:11:01
epoch [25/50] batch [260/2000] time 2.054 (2.029) data 0.000 (0.002) loss 0.5357 (1.1459) lr 1.3681e-03 eta 1 day, 5:09:59
epoch [25/50] batch [280/2000] time 2.029 (2.029) data 0.000 (0.002) loss 0.6680 (1.1842) lr 1.3681e-03 eta 1 day, 5:09:13
epoch [25/50] batch [300/2000] time 1.977 (2.029) data 0.000 (0.002) loss 1.5778 (1.1627) lr 1.3681e-03 eta 1 day, 5:08:10
epoch [25/50] batch [320/2000] time 1.997 (2.029) data 0.000 (0.002) loss 3.7516 (1.1742) lr 1.3681e-03 eta 1 day, 5:07:50
epoch [25/50] batch [340/2000] time 2.050 (2.029) data 0.000 (0.002) loss 1.2306 (1.1564) lr 1.3681e-03 eta 1 day, 5:07:04
epoch [25/50] batch [360/2000] time 1.998 (2.029) data 0.000 (0.002) loss 0.1054 (1.1589) lr 1.3681e-03 eta 1 day, 5:06:22
epoch [25/50] batch [380/2000] time 2.054 (2.029) data 0.000 (0.002) loss 0.2696 (1.1531) lr 1.3681e-03 eta 1 day, 5:05:46
epoch [25/50] batch [400/2000] time 2.052 (2.029) data 0.000 (0.002) loss 1.5447 (1.1633) lr 1.3681e-03 eta 1 day, 5:05:02
epoch [25/50] batch [420/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.2718 (1.1522) lr 1.3681e-03 eta 1 day, 5:04:07
epoch [25/50] batch [440/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.4044 (1.1406) lr 1.3681e-03 eta 1 day, 5:03:15
epoch [25/50] batch [460/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.7540 (1.1555) lr 1.3681e-03 eta 1 day, 5:02:30
epoch [25/50] batch [480/2000] time 1.998 (2.029) data 0.000 (0.001) loss 3.8906 (1.1761) lr 1.3681e-03 eta 1 day, 5:01:49
epoch [25/50] batch [500/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.8535 (1.1703) lr 1.3681e-03 eta 1 day, 5:01:09
epoch [25/50] batch [520/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.1576 (1.1779) lr 1.3681e-03 eta 1 day, 5:00:39
epoch [25/50] batch [540/2000] time 2.002 (2.029) data 0.000 (0.001) loss 0.3718 (1.1719) lr 1.3681e-03 eta 1 day, 4:59:52
epoch [25/50] batch [560/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.4970 (1.1667) lr 1.3681e-03 eta 1 day, 4:59:16
epoch [25/50] batch [580/2000] time 2.000 (2.029) data 0.000 (0.001) loss 2.8644 (1.1626) lr 1.3681e-03 eta 1 day, 4:58:41
epoch [25/50] batch [600/2000] time 1.999 (2.029) data 0.001 (0.001) loss 1.9022 (1.1635) lr 1.3681e-03 eta 1 day, 4:58:02
epoch [25/50] batch [620/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.7957 (1.1609) lr 1.3681e-03 eta 1 day, 4:57:05
epoch [25/50] batch [640/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.8696 (1.1665) lr 1.3681e-03 eta 1 day, 4:56:32
epoch [25/50] batch [660/2000] time 2.030 (2.029) data 0.000 (0.001) loss 1.5891 (1.1725) lr 1.3681e-03 eta 1 day, 4:55:45
epoch [25/50] batch [680/2000] time 2.053 (2.028) data 0.000 (0.001) loss 1.2665 (1.1763) lr 1.3681e-03 eta 1 day, 4:55:02
epoch [25/50] batch [700/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.4566 (1.1765) lr 1.3681e-03 eta 1 day, 4:54:21
epoch [25/50] batch [720/2000] time 1.999 (2.028) data 0.000 (0.001) loss 1.8607 (1.1872) lr 1.3681e-03 eta 1 day, 4:53:41
epoch [25/50] batch [740/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.4305 (1.1978) lr 1.3681e-03 eta 1 day, 4:52:58
epoch [25/50] batch [760/2000] time 2.050 (2.028) data 0.000 (0.001) loss 3.5848 (1.1938) lr 1.3681e-03 eta 1 day, 4:52:10
epoch [25/50] batch [780/2000] time 2.026 (2.028) data 0.000 (0.001) loss 1.5701 (1.1957) lr 1.3681e-03 eta 1 day, 4:51:21
epoch [25/50] batch [800/2000] time 2.051 (2.028) data 0.000 (0.001) loss 3.1760 (1.2006) lr 1.3681e-03 eta 1 day, 4:50:39
epoch [25/50] batch [820/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.1665 (1.1981) lr 1.3681e-03 eta 1 day, 4:50:03
epoch [25/50] batch [840/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.3243 (1.1962) lr 1.3681e-03 eta 1 day, 4:49:26
epoch [25/50] batch [860/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.7025 (1.1890) lr 1.3681e-03 eta 1 day, 4:48:40
epoch [25/50] batch [880/2000] time 2.029 (2.028) data 0.000 (0.001) loss 0.5001 (1.1941) lr 1.3681e-03 eta 1 day, 4:47:57
epoch [25/50] batch [900/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.9486 (1.1909) lr 1.3681e-03 eta 1 day, 4:47:19
epoch [25/50] batch [920/2000] time 2.025 (2.028) data 0.000 (0.001) loss 0.1422 (1.1948) lr 1.3681e-03 eta 1 day, 4:46:34
epoch [25/50] batch [940/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.0015 (1.1984) lr 1.3681e-03 eta 1 day, 4:45:58
epoch [25/50] batch [960/2000] time 1.996 (2.028) data 0.000 (0.001) loss 1.0130 (1.1946) lr 1.3681e-03 eta 1 day, 4:45:17
epoch [25/50] batch [980/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.8836 (1.1991) lr 1.3681e-03 eta 1 day, 4:44:35
epoch [25/50] batch [1000/2000] time 2.030 (2.028) data 0.000 (0.001) loss 1.4865 (1.2034) lr 1.3681e-03 eta 1 day, 4:44:02
epoch [25/50] batch [1020/2000] time 2.032 (2.028) data 0.000 (0.001) loss 0.1376 (1.2008) lr 1.3681e-03 eta 1 day, 4:43:15
epoch [25/50] batch [1040/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.6764 (1.2020) lr 1.3681e-03 eta 1 day, 4:42:33
epoch [25/50] batch [1060/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.8789 (1.1985) lr 1.3681e-03 eta 1 day, 4:41:49
epoch [25/50] batch [1080/2000] time 1.973 (2.028) data 0.000 (0.001) loss 1.3372 (1.1980) lr 1.3681e-03 eta 1 day, 4:41:19
epoch [25/50] batch [1100/2000] time 2.028 (2.028) data 0.000 (0.001) loss 1.1382 (1.1965) lr 1.3681e-03 eta 1 day, 4:40:39
epoch [25/50] batch [1120/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.5884 (1.1940) lr 1.3681e-03 eta 1 day, 4:39:57
epoch [25/50] batch [1140/2000] time 2.052 (2.028) data 0.001 (0.001) loss 0.9584 (1.1973) lr 1.3681e-03 eta 1 day, 4:39:15
epoch [25/50] batch [1160/2000] time 1.996 (2.028) data 0.000 (0.001) loss 1.2053 (1.1938) lr 1.3681e-03 eta 1 day, 4:38:37
epoch [25/50] batch [1180/2000] time 2.027 (2.028) data 0.000 (0.001) loss 2.2888 (1.1991) lr 1.3681e-03 eta 1 day, 4:38:01
epoch [25/50] batch [1200/2000] time 1.999 (2.028) data 0.000 (0.001) loss 0.7816 (1.1946) lr 1.3681e-03 eta 1 day, 4:37:24
epoch [25/50] batch [1220/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.7557 (1.1903) lr 1.3681e-03 eta 1 day, 4:36:38
epoch [25/50] batch [1240/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.3591 (1.1887) lr 1.3681e-03 eta 1 day, 4:35:59
epoch [25/50] batch [1260/2000] time 2.027 (2.028) data 0.000 (0.001) loss 2.9760 (1.1927) lr 1.3681e-03 eta 1 day, 4:35:19
epoch [25/50] batch [1280/2000] time 2.031 (2.028) data 0.000 (0.001) loss 0.6740 (1.1932) lr 1.3681e-03 eta 1 day, 4:34:37
epoch [25/50] batch [1300/2000] time 2.004 (2.028) data 0.000 (0.001) loss 2.0667 (1.1990) lr 1.3681e-03 eta 1 day, 4:33:54
epoch [25/50] batch [1320/2000] time 2.035 (2.028) data 0.000 (0.001) loss 1.0086 (1.1983) lr 1.3681e-03 eta 1 day, 4:33:22
epoch [25/50] batch [1340/2000] time 2.007 (2.029) data 0.000 (0.001) loss 1.4399 (1.2010) lr 1.3681e-03 eta 1 day, 4:32:46
epoch [25/50] batch [1360/2000] time 2.055 (2.029) data 0.000 (0.001) loss 1.2961 (1.2010) lr 1.3681e-03 eta 1 day, 4:32:05
epoch [25/50] batch [1380/2000] time 2.031 (2.029) data 0.000 (0.001) loss 0.8254 (1.2049) lr 1.3681e-03 eta 1 day, 4:31:24
epoch [25/50] batch [1400/2000] time 2.054 (2.028) data 0.000 (0.001) loss 0.4501 (1.2033) lr 1.3681e-03 eta 1 day, 4:30:41
epoch [25/50] batch [1420/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.3691 (1.2085) lr 1.3681e-03 eta 1 day, 4:30:03
epoch [25/50] batch [1440/2000] time 2.052 (2.029) data 0.000 (0.001) loss 2.5499 (1.2105) lr 1.3681e-03 eta 1 day, 4:29:27
epoch [25/50] batch [1460/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.7966 (1.2110) lr 1.3681e-03 eta 1 day, 4:28:48
epoch [25/50] batch [1480/2000] time 2.030 (2.029) data 0.000 (0.001) loss 2.2651 (1.2091) lr 1.3681e-03 eta 1 day, 4:28:13
epoch [25/50] batch [1500/2000] time 1.996 (2.029) data 0.000 (0.001) loss 2.2657 (1.2145) lr 1.3681e-03 eta 1 day, 4:27:31
epoch [25/50] batch [1520/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.1121 (1.2131) lr 1.3681e-03 eta 1 day, 4:26:50
epoch [25/50] batch [1540/2000] time 2.052 (2.029) data 0.000 (0.001) loss 2.1314 (1.2129) lr 1.3681e-03 eta 1 day, 4:26:12
epoch [25/50] batch [1560/2000] time 2.029 (2.029) data 0.000 (0.001) loss 3.7295 (1.2146) lr 1.3681e-03 eta 1 day, 4:25:29
epoch [25/50] batch [1580/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.7217 (1.2133) lr 1.3681e-03 eta 1 day, 4:24:48
epoch [25/50] batch [1600/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.2189 (1.2113) lr 1.3681e-03 eta 1 day, 4:24:04
epoch [25/50] batch [1620/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.6779 (1.2146) lr 1.3681e-03 eta 1 day, 4:23:20
epoch [25/50] batch [1640/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.1834 (1.2144) lr 1.3681e-03 eta 1 day, 4:22:40
epoch [25/50] batch [1660/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.8912 (1.2121) lr 1.3681e-03 eta 1 day, 4:21:55
epoch [25/50] batch [1680/2000] time 2.029 (2.028) data 0.001 (0.001) loss 0.6295 (1.2101) lr 1.3681e-03 eta 1 day, 4:21:12
epoch [25/50] batch [1700/2000] time 2.000 (2.029) data 0.000 (0.001) loss 1.4553 (1.2103) lr 1.3681e-03 eta 1 day, 4:20:34
epoch [25/50] batch [1720/2000] time 2.051 (2.029) data 0.000 (0.001) loss 2.3330 (1.2154) lr 1.3681e-03 eta 1 day, 4:19:55
epoch [25/50] batch [1740/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.6438 (1.2166) lr 1.3681e-03 eta 1 day, 4:19:14
epoch [25/50] batch [1760/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.5780 (1.2150) lr 1.3681e-03 eta 1 day, 4:18:32
epoch [25/50] batch [1780/2000] time 1.996 (2.028) data 0.000 (0.001) loss 0.7683 (1.2142) lr 1.3681e-03 eta 1 day, 4:17:46
epoch [25/50] batch [1800/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.3292 (1.2159) lr 1.3681e-03 eta 1 day, 4:17:03
epoch [25/50] batch [1820/2000] time 2.052 (2.028) data 0.000 (0.001) loss 1.1719 (1.2134) lr 1.3681e-03 eta 1 day, 4:16:25
epoch [25/50] batch [1840/2000] time 1.998 (2.028) data 0.000 (0.000) loss 0.4176 (1.2128) lr 1.3681e-03 eta 1 day, 4:15:40
epoch [25/50] batch [1860/2000] time 2.032 (2.028) data 0.000 (0.000) loss 2.5812 (1.2147) lr 1.3681e-03 eta 1 day, 4:15:02
epoch [25/50] batch [1880/2000] time 2.002 (2.028) data 0.000 (0.000) loss 1.0425 (1.2147) lr 1.3681e-03 eta 1 day, 4:14:22
epoch [25/50] batch [1900/2000] time 2.034 (2.028) data 0.000 (0.000) loss 2.2295 (1.2119) lr 1.3681e-03 eta 1 day, 4:13:43
epoch [25/50] batch [1920/2000] time 2.053 (2.028) data 0.000 (0.000) loss 0.1532 (1.2107) lr 1.3681e-03 eta 1 day, 4:13:03
epoch [25/50] batch [1940/2000] time 2.059 (2.029) data 0.000 (0.000) loss 1.0113 (1.2125) lr 1.3681e-03 eta 1 day, 4:12:29
epoch [25/50] batch [1960/2000] time 2.004 (2.029) data 0.000 (0.000) loss 2.6726 (1.2122) lr 1.3681e-03 eta 1 day, 4:11:53
epoch [25/50] batch [1980/2000] time 2.054 (2.029) data 0.000 (0.000) loss 3.0205 (1.2127) lr 1.3681e-03 eta 1 day, 4:11:18
epoch [25/50] batch [2000/2000] time 2.026 (2.029) data 0.000 (0.000) loss 1.5298 (1.2146) lr 1.3090e-03 eta 1 day, 4:10:37
epoch [26/50] batch [20/2000] time 1.998 (2.062) data 0.000 (0.027) loss 0.5886 (1.0121) lr 1.3090e-03 eta 1 day, 4:37:33
epoch [26/50] batch [40/2000] time 1.977 (2.042) data 0.000 (0.014) loss 1.5649 (1.2029) lr 1.3090e-03 eta 1 day, 4:20:39
epoch [26/50] batch [60/2000] time 1.998 (2.040) data 0.001 (0.009) loss 1.0931 (1.2084) lr 1.3090e-03 eta 1 day, 4:17:39
epoch [26/50] batch [80/2000] time 2.001 (2.038) data 0.000 (0.007) loss 1.0487 (1.2984) lr 1.3090e-03 eta 1 day, 4:15:28
epoch [26/50] batch [100/2000] time 2.028 (2.036) data 0.000 (0.006) loss 3.9071 (1.3089) lr 1.3090e-03 eta 1 day, 4:13:08
epoch [26/50] batch [120/2000] time 2.051 (2.035) data 0.000 (0.005) loss 0.7367 (1.2596) lr 1.3090e-03 eta 1 day, 4:11:23
epoch [26/50] batch [140/2000] time 2.053 (2.035) data 0.000 (0.004) loss 2.1979 (1.2965) lr 1.3090e-03 eta 1 day, 4:10:47
epoch [26/50] batch [160/2000] time 1.998 (2.034) data 0.000 (0.004) loss 0.3573 (1.2404) lr 1.3090e-03 eta 1 day, 4:09:50
epoch [26/50] batch [180/2000] time 2.030 (2.034) data 0.000 (0.003) loss 1.0831 (1.2627) lr 1.3090e-03 eta 1 day, 4:08:30
epoch [26/50] batch [200/2000] time 2.053 (2.033) data 0.000 (0.003) loss 0.2730 (1.2608) lr 1.3090e-03 eta 1 day, 4:07:44
epoch [26/50] batch [220/2000] time 2.052 (2.033) data 0.000 (0.003) loss 1.1036 (1.2529) lr 1.3090e-03 eta 1 day, 4:07:04
epoch [26/50] batch [240/2000] time 1.997 (2.033) data 0.000 (0.002) loss 0.1207 (1.2426) lr 1.3090e-03 eta 1 day, 4:06:11
epoch [26/50] batch [260/2000] time 2.048 (2.033) data 0.000 (0.002) loss 1.1096 (1.2206) lr 1.3090e-03 eta 1 day, 4:05:25
epoch [26/50] batch [280/2000] time 2.027 (2.033) data 0.000 (0.002) loss 0.6370 (1.2191) lr 1.3090e-03 eta 1 day, 4:04:21
epoch [26/50] batch [300/2000] time 2.051 (2.033) data 0.000 (0.002) loss 1.0657 (1.2278) lr 1.3090e-03 eta 1 day, 4:03:41
epoch [26/50] batch [320/2000] time 2.052 (2.032) data 0.000 (0.002) loss 1.4992 (1.2250) lr 1.3090e-03 eta 1 day, 4:02:44
epoch [26/50] batch [340/2000] time 2.026 (2.032) data 0.000 (0.002) loss 0.5997 (1.2341) lr 1.3090e-03 eta 1 day, 4:01:49
epoch [26/50] batch [360/2000] time 2.049 (2.031) data 0.000 (0.002) loss 0.5889 (1.2205) lr 1.3090e-03 eta 1 day, 4:00:37
epoch [26/50] batch [380/2000] time 2.054 (2.031) data 0.000 (0.002) loss 2.5375 (1.2130) lr 1.3090e-03 eta 1 day, 4:00:00
epoch [26/50] batch [400/2000] time 2.052 (2.032) data 0.000 (0.002) loss 2.0691 (1.1993) lr 1.3090e-03 eta 1 day, 3:59:25
epoch [26/50] batch [420/2000] time 2.029 (2.032) data 0.000 (0.001) loss 0.3372 (1.1934) lr 1.3090e-03 eta 1 day, 3:58:44
epoch [26/50] batch [440/2000] time 2.029 (2.032) data 0.000 (0.001) loss 0.8592 (1.1982) lr 1.3090e-03 eta 1 day, 3:58:07
epoch [26/50] batch [460/2000] time 2.055 (2.032) data 0.000 (0.001) loss 0.2153 (1.1924) lr 1.3090e-03 eta 1 day, 3:57:26
epoch [26/50] batch [480/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.4552 (1.1852) lr 1.3090e-03 eta 1 day, 3:56:34
epoch [26/50] batch [500/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.0475 (1.1941) lr 1.3090e-03 eta 1 day, 3:55:34
epoch [26/50] batch [520/2000] time 2.048 (2.031) data 0.000 (0.001) loss 0.3354 (1.1880) lr 1.3090e-03 eta 1 day, 3:55:02
epoch [26/50] batch [540/2000] time 1.996 (2.031) data 0.000 (0.001) loss 0.3969 (1.1785) lr 1.3090e-03 eta 1 day, 3:54:07
epoch [26/50] batch [560/2000] time 2.049 (2.031) data 0.000 (0.001) loss 1.2382 (1.1764) lr 1.3090e-03 eta 1 day, 3:53:18
epoch [26/50] batch [580/2000] time 2.049 (2.031) data 0.000 (0.001) loss 0.3400 (1.1879) lr 1.3090e-03 eta 1 day, 3:52:32
epoch [26/50] batch [600/2000] time 2.029 (2.030) data 0.001 (0.001) loss 0.7160 (1.1831) lr 1.3090e-03 eta 1 day, 3:51:42
epoch [26/50] batch [620/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.0621 (1.1857) lr 1.3090e-03 eta 1 day, 3:51:01
epoch [26/50] batch [640/2000] time 1.974 (2.030) data 0.000 (0.001) loss 0.2615 (1.1976) lr 1.3090e-03 eta 1 day, 3:50:10
epoch [26/50] batch [660/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.5472 (1.1956) lr 1.3090e-03 eta 1 day, 3:49:28
epoch [26/50] batch [680/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.8839 (1.1953) lr 1.3090e-03 eta 1 day, 3:48:44
epoch [26/50] batch [700/2000] time 2.029 (2.030) data 0.000 (0.001) loss 2.4920 (1.1893) lr 1.3090e-03 eta 1 day, 3:48:07
epoch [26/50] batch [720/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.8946 (1.1961) lr 1.3090e-03 eta 1 day, 3:47:26
epoch [26/50] batch [740/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.3906 (1.1998) lr 1.3090e-03 eta 1 day, 3:46:48
epoch [26/50] batch [760/2000] time 2.053 (2.030) data 0.000 (0.001) loss 4.1559 (1.2024) lr 1.3090e-03 eta 1 day, 3:46:13
epoch [26/50] batch [780/2000] time 2.056 (2.030) data 0.000 (0.001) loss 2.1228 (1.2064) lr 1.3090e-03 eta 1 day, 3:45:30
epoch [26/50] batch [800/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.2308 (1.2018) lr 1.3090e-03 eta 1 day, 3:44:52
epoch [26/50] batch [820/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.5976 (1.2051) lr 1.3090e-03 eta 1 day, 3:44:11
epoch [26/50] batch [840/2000] time 1.975 (2.030) data 0.000 (0.001) loss 1.3257 (1.2032) lr 1.3090e-03 eta 1 day, 3:43:32
epoch [26/50] batch [860/2000] time 2.028 (2.030) data 0.000 (0.001) loss 1.8174 (1.2069) lr 1.3090e-03 eta 1 day, 3:42:45
epoch [26/50] batch [880/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.0094 (1.2039) lr 1.3090e-03 eta 1 day, 3:41:59
epoch [26/50] batch [900/2000] time 2.051 (2.030) data 0.000 (0.001) loss 2.4644 (1.2068) lr 1.3090e-03 eta 1 day, 3:41:19
epoch [26/50] batch [920/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.4085 (1.2031) lr 1.3090e-03 eta 1 day, 3:40:42
epoch [26/50] batch [940/2000] time 2.028 (2.030) data 0.000 (0.001) loss 1.5935 (1.2042) lr 1.3090e-03 eta 1 day, 3:40:02
epoch [26/50] batch [960/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.1305 (1.2089) lr 1.3090e-03 eta 1 day, 3:39:21
epoch [26/50] batch [980/2000] time 2.028 (2.030) data 0.000 (0.001) loss 1.8993 (1.2054) lr 1.3090e-03 eta 1 day, 3:38:37
epoch [26/50] batch [1000/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.1677 (1.2073) lr 1.3090e-03 eta 1 day, 3:37:58
epoch [26/50] batch [1020/2000] time 1.975 (2.030) data 0.000 (0.001) loss 1.5622 (1.2092) lr 1.3090e-03 eta 1 day, 3:37:20
epoch [26/50] batch [1040/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.8499 (1.2115) lr 1.3090e-03 eta 1 day, 3:36:39
epoch [26/50] batch [1060/2000] time 2.031 (2.030) data 0.000 (0.001) loss 1.3626 (1.2093) lr 1.3090e-03 eta 1 day, 3:36:00
epoch [26/50] batch [1080/2000] time 2.001 (2.030) data 0.000 (0.001) loss 4.9825 (1.2166) lr 1.3090e-03 eta 1 day, 3:35:17
epoch [26/50] batch [1100/2000] time 2.056 (2.030) data 0.000 (0.001) loss 2.1798 (1.2210) lr 1.3090e-03 eta 1 day, 3:34:36
epoch [26/50] batch [1120/2000] time 1.999 (2.030) data 0.000 (0.001) loss 2.0401 (1.2208) lr 1.3090e-03 eta 1 day, 3:33:54
epoch [26/50] batch [1140/2000] time 2.031 (2.030) data 0.001 (0.001) loss 0.9818 (1.2226) lr 1.3090e-03 eta 1 day, 3:33:19
epoch [26/50] batch [1160/2000] time 2.052 (2.030) data 0.000 (0.001) loss 3.6536 (1.2178) lr 1.3090e-03 eta 1 day, 3:32:47
epoch [26/50] batch [1180/2000] time 2.053 (2.030) data 0.000 (0.001) loss 2.5865 (1.2205) lr 1.3090e-03 eta 1 day, 3:32:02
epoch [26/50] batch [1200/2000] time 2.050 (2.030) data 0.000 (0.001) loss 2.4063 (1.2232) lr 1.3090e-03 eta 1 day, 3:31:25
epoch [26/50] batch [1220/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.3629 (1.2187) lr 1.3090e-03 eta 1 day, 3:30:43
epoch [26/50] batch [1240/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.8200 (1.2242) lr 1.3090e-03 eta 1 day, 3:30:02
epoch [26/50] batch [1260/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.7168 (1.2268) lr 1.3090e-03 eta 1 day, 3:29:18
epoch [26/50] batch [1280/2000] time 2.047 (2.030) data 0.000 (0.001) loss 1.0477 (1.2307) lr 1.3090e-03 eta 1 day, 3:28:39
epoch [26/50] batch [1300/2000] time 1.994 (2.030) data 0.000 (0.001) loss 0.7902 (1.2330) lr 1.3090e-03 eta 1 day, 3:27:54
epoch [26/50] batch [1320/2000] time 2.024 (2.030) data 0.000 (0.001) loss 0.8803 (1.2302) lr 1.3090e-03 eta 1 day, 3:27:10
epoch [26/50] batch [1340/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.9554 (1.2350) lr 1.3090e-03 eta 1 day, 3:26:26
epoch [26/50] batch [1360/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.2309 (1.2311) lr 1.3090e-03 eta 1 day, 3:25:45
epoch [26/50] batch [1380/2000] time 2.025 (2.030) data 0.000 (0.001) loss 2.5940 (1.2320) lr 1.3090e-03 eta 1 day, 3:25:05
epoch [26/50] batch [1400/2000] time 1.973 (2.030) data 0.000 (0.001) loss 1.7201 (1.2323) lr 1.3090e-03 eta 1 day, 3:24:23
epoch [26/50] batch [1420/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.4402 (1.2317) lr 1.3090e-03 eta 1 day, 3:23:45
epoch [26/50] batch [1440/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.3866 (1.2334) lr 1.3090e-03 eta 1 day, 3:23:05
epoch [26/50] batch [1460/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.1324 (1.2344) lr 1.3090e-03 eta 1 day, 3:22:31
epoch [26/50] batch [1480/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.9528 (1.2349) lr 1.3090e-03 eta 1 day, 3:21:48
epoch [26/50] batch [1500/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.0349 (1.2281) lr 1.3090e-03 eta 1 day, 3:21:11
epoch [26/50] batch [1520/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.6200 (1.2265) lr 1.3090e-03 eta 1 day, 3:20:32
epoch [26/50] batch [1540/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.0926 (1.2242) lr 1.3090e-03 eta 1 day, 3:19:52
epoch [26/50] batch [1560/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.5893 (1.2280) lr 1.3090e-03 eta 1 day, 3:19:16
epoch [26/50] batch [1580/2000] time 2.048 (2.030) data 0.000 (0.001) loss 0.5680 (1.2213) lr 1.3090e-03 eta 1 day, 3:18:31
epoch [26/50] batch [1600/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.9752 (1.2188) lr 1.3090e-03 eta 1 day, 3:17:51
epoch [26/50] batch [1620/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.0162 (1.2163) lr 1.3090e-03 eta 1 day, 3:17:08
epoch [26/50] batch [1640/2000] time 1.971 (2.030) data 0.000 (0.001) loss 1.5194 (1.2190) lr 1.3090e-03 eta 1 day, 3:16:21
epoch [26/50] batch [1660/2000] time 2.025 (2.030) data 0.000 (0.001) loss 0.8789 (1.2175) lr 1.3090e-03 eta 1 day, 3:15:42
epoch [26/50] batch [1680/2000] time 2.027 (2.030) data 0.001 (0.001) loss 1.4566 (1.2160) lr 1.3090e-03 eta 1 day, 3:14:57
epoch [26/50] batch [1700/2000] time 1.995 (2.030) data 0.000 (0.001) loss 1.3922 (1.2181) lr 1.3090e-03 eta 1 day, 3:14:14
epoch [26/50] batch [1720/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.5463 (1.2168) lr 1.3090e-03 eta 1 day, 3:13:32
epoch [26/50] batch [1740/2000] time 2.023 (2.030) data 0.000 (0.001) loss 0.1845 (1.2130) lr 1.3090e-03 eta 1 day, 3:12:50
epoch [26/50] batch [1760/2000] time 2.000 (2.030) data 0.000 (0.000) loss 1.2787 (1.2155) lr 1.3090e-03 eta 1 day, 3:12:07
epoch [26/50] batch [1780/2000] time 2.051 (2.030) data 0.000 (0.000) loss 0.9422 (1.2160) lr 1.3090e-03 eta 1 day, 3:11:29
epoch [26/50] batch [1800/2000] time 2.049 (2.030) data 0.000 (0.000) loss 1.5278 (1.2185) lr 1.3090e-03 eta 1 day, 3:10:47
epoch [26/50] batch [1820/2000] time 2.051 (2.030) data 0.000 (0.000) loss 0.2236 (1.2228) lr 1.3090e-03 eta 1 day, 3:10:08
epoch [26/50] batch [1840/2000] time 1.995 (2.030) data 0.000 (0.000) loss 0.2691 (1.2221) lr 1.3090e-03 eta 1 day, 3:09:26
epoch [26/50] batch [1860/2000] time 1.996 (2.030) data 0.000 (0.000) loss 0.8665 (1.2241) lr 1.3090e-03 eta 1 day, 3:08:42
epoch [26/50] batch [1880/2000] time 1.994 (2.030) data 0.000 (0.000) loss 0.2107 (1.2201) lr 1.3090e-03 eta 1 day, 3:07:57
epoch [26/50] batch [1900/2000] time 2.051 (2.030) data 0.000 (0.000) loss 2.8811 (1.2187) lr 1.3090e-03 eta 1 day, 3:07:16
epoch [26/50] batch [1920/2000] time 2.047 (2.030) data 0.000 (0.000) loss 0.0264 (1.2200) lr 1.3090e-03 eta 1 day, 3:06:33
epoch [26/50] batch [1940/2000] time 2.025 (2.030) data 0.000 (0.000) loss 1.1565 (1.2208) lr 1.3090e-03 eta 1 day, 3:05:47
epoch [26/50] batch [1960/2000] time 2.053 (2.030) data 0.000 (0.000) loss 1.0455 (1.2204) lr 1.3090e-03 eta 1 day, 3:05:08
epoch [26/50] batch [1980/2000] time 2.047 (2.030) data 0.000 (0.000) loss 0.9159 (1.2184) lr 1.3090e-03 eta 1 day, 3:04:29
epoch [26/50] batch [2000/2000] time 2.046 (2.030) data 0.000 (0.000) loss 1.2515 (1.2172) lr 1.2487e-03 eta 1 day, 3:03:47
epoch [27/50] batch [20/2000] time 2.048 (2.055) data 0.000 (0.027) loss 0.2685 (1.3183) lr 1.2487e-03 eta 1 day, 3:23:09
epoch [27/50] batch [40/2000] time 2.025 (2.040) data 0.000 (0.013) loss 0.5037 (1.4590) lr 1.2487e-03 eta 1 day, 3:10:25
epoch [27/50] batch [60/2000] time 1.995 (2.036) data 0.001 (0.009) loss 0.7751 (1.3448) lr 1.2487e-03 eta 1 day, 3:06:54
epoch [27/50] batch [80/2000] time 1.971 (2.037) data 0.000 (0.007) loss 1.4677 (1.3297) lr 1.2487e-03 eta 1 day, 3:06:38
epoch [27/50] batch [100/2000] time 2.048 (2.034) data 0.000 (0.005) loss 1.7228 (1.3182) lr 1.2487e-03 eta 1 day, 3:04:11
epoch [27/50] batch [120/2000] time 2.028 (2.033) data 0.000 (0.005) loss 0.4248 (1.2908) lr 1.2487e-03 eta 1 day, 3:02:37
epoch [27/50] batch [140/2000] time 2.050 (2.032) data 0.000 (0.004) loss 0.1055 (1.2925) lr 1.2487e-03 eta 1 day, 3:00:52
epoch [27/50] batch [160/2000] time 2.054 (2.032) data 0.000 (0.004) loss 0.1889 (1.3017) lr 1.2487e-03 eta 1 day, 2:59:57
epoch [27/50] batch [180/2000] time 2.049 (2.032) data 0.000 (0.003) loss 1.4953 (1.2910) lr 1.2487e-03 eta 1 day, 2:59:07
epoch [27/50] batch [200/2000] time 2.029 (2.032) data 0.000 (0.003) loss 0.3364 (1.2797) lr 1.2487e-03 eta 1 day, 2:58:40
epoch [27/50] batch [220/2000] time 2.053 (2.031) data 0.000 (0.003) loss 1.6330 (1.2596) lr 1.2487e-03 eta 1 day, 2:57:17
epoch [27/50] batch [240/2000] time 1.998 (2.031) data 0.000 (0.002) loss 0.3220 (1.2393) lr 1.2487e-03 eta 1 day, 2:56:34
epoch [27/50] batch [260/2000] time 2.053 (2.031) data 0.000 (0.002) loss 0.6798 (1.2373) lr 1.2487e-03 eta 1 day, 2:56:12
epoch [27/50] batch [280/2000] time 2.028 (2.031) data 0.000 (0.002) loss 2.4867 (1.2123) lr 1.2487e-03 eta 1 day, 2:55:20
epoch [27/50] batch [300/2000] time 2.049 (2.031) data 0.000 (0.002) loss 0.4819 (1.2115) lr 1.2487e-03 eta 1 day, 2:54:48
epoch [27/50] batch [320/2000] time 2.049 (2.031) data 0.000 (0.002) loss 0.5186 (1.1998) lr 1.2487e-03 eta 1 day, 2:54:10
epoch [27/50] batch [340/2000] time 1.997 (2.031) data 0.000 (0.002) loss 1.2550 (1.2094) lr 1.2487e-03 eta 1 day, 2:53:29
epoch [27/50] batch [360/2000] time 2.049 (2.031) data 0.000 (0.002) loss 0.6274 (1.2102) lr 1.2487e-03 eta 1 day, 2:52:39
epoch [27/50] batch [380/2000] time 1.992 (2.031) data 0.000 (0.002) loss 0.2887 (1.2046) lr 1.2487e-03 eta 1 day, 2:51:42
epoch [27/50] batch [400/2000] time 1.996 (2.031) data 0.000 (0.002) loss 1.6892 (1.1957) lr 1.2487e-03 eta 1 day, 2:50:53
epoch [27/50] batch [420/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.1394 (1.2018) lr 1.2487e-03 eta 1 day, 2:50:00
epoch [27/50] batch [440/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.7527 (1.2062) lr 1.2487e-03 eta 1 day, 2:49:19
epoch [27/50] batch [460/2000] time 1.971 (2.030) data 0.000 (0.001) loss 1.9459 (1.1994) lr 1.2487e-03 eta 1 day, 2:48:26
epoch [27/50] batch [480/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.4843 (1.1887) lr 1.2487e-03 eta 1 day, 2:47:47
epoch [27/50] batch [500/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.6619 (1.1864) lr 1.2487e-03 eta 1 day, 2:47:06
epoch [27/50] batch [520/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.3772 (1.1906) lr 1.2487e-03 eta 1 day, 2:46:07
epoch [27/50] batch [540/2000] time 1.999 (2.030) data 0.000 (0.001) loss 2.3112 (1.1983) lr 1.2487e-03 eta 1 day, 2:45:29
epoch [27/50] batch [560/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.9862 (1.1993) lr 1.2487e-03 eta 1 day, 2:44:43
epoch [27/50] batch [580/2000] time 1.973 (2.030) data 0.000 (0.001) loss 1.5236 (1.1972) lr 1.2487e-03 eta 1 day, 2:44:00
epoch [27/50] batch [600/2000] time 2.000 (2.029) data 0.001 (0.001) loss 1.0343 (1.2132) lr 1.2487e-03 eta 1 day, 2:43:14
epoch [27/50] batch [620/2000] time 1.972 (2.029) data 0.000 (0.001) loss 3.3430 (1.2124) lr 1.2487e-03 eta 1 day, 2:42:28
epoch [27/50] batch [640/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.1367 (1.2066) lr 1.2487e-03 eta 1 day, 2:41:46
epoch [27/50] batch [660/2000] time 2.026 (2.029) data 0.000 (0.001) loss 2.5683 (1.2168) lr 1.2487e-03 eta 1 day, 2:41:15
epoch [27/50] batch [680/2000] time 1.972 (2.029) data 0.000 (0.001) loss 2.0683 (1.2330) lr 1.2487e-03 eta 1 day, 2:40:26
epoch [27/50] batch [700/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.1695 (1.2366) lr 1.2487e-03 eta 1 day, 2:39:40
epoch [27/50] batch [720/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.1941 (1.2424) lr 1.2487e-03 eta 1 day, 2:38:57
epoch [27/50] batch [740/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.2966 (1.2403) lr 1.2487e-03 eta 1 day, 2:38:20
epoch [27/50] batch [760/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.7681 (1.2356) lr 1.2487e-03 eta 1 day, 2:37:34
epoch [27/50] batch [780/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.9509 (1.2380) lr 1.2487e-03 eta 1 day, 2:36:50
epoch [27/50] batch [800/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.1483 (1.2291) lr 1.2487e-03 eta 1 day, 2:36:11
epoch [27/50] batch [820/2000] time 1.972 (2.029) data 0.000 (0.001) loss 0.2250 (1.2294) lr 1.2487e-03 eta 1 day, 2:35:28
epoch [27/50] batch [840/2000] time 2.025 (2.029) data 0.000 (0.001) loss 2.0453 (1.2283) lr 1.2487e-03 eta 1 day, 2:34:47
epoch [27/50] batch [860/2000] time 2.024 (2.029) data 0.000 (0.001) loss 0.7541 (1.2324) lr 1.2487e-03 eta 1 day, 2:34:04
epoch [27/50] batch [880/2000] time 2.023 (2.029) data 0.000 (0.001) loss 0.1209 (1.2342) lr 1.2487e-03 eta 1 day, 2:33:17
epoch [27/50] batch [900/2000] time 2.047 (2.029) data 0.000 (0.001) loss 0.0479 (1.2275) lr 1.2487e-03 eta 1 day, 2:32:35
epoch [27/50] batch [920/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.0618 (1.2266) lr 1.2487e-03 eta 1 day, 2:31:51
epoch [27/50] batch [940/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.8280 (1.2285) lr 1.2487e-03 eta 1 day, 2:31:17
epoch [27/50] batch [960/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.4761 (1.2235) lr 1.2487e-03 eta 1 day, 2:30:33
epoch [27/50] batch [980/2000] time 1.998 (2.029) data 0.000 (0.001) loss 3.1569 (1.2205) lr 1.2487e-03 eta 1 day, 2:29:47
epoch [27/50] batch [1000/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.4102 (1.2217) lr 1.2487e-03 eta 1 day, 2:29:04
epoch [27/50] batch [1020/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.1764 (1.2187) lr 1.2487e-03 eta 1 day, 2:28:27
epoch [27/50] batch [1040/2000] time 2.029 (2.029) data 0.000 (0.001) loss 2.3685 (1.2147) lr 1.2487e-03 eta 1 day, 2:27:50
epoch [27/50] batch [1060/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.7533 (1.2187) lr 1.2487e-03 eta 1 day, 2:27:08
epoch [27/50] batch [1080/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.2796 (1.2209) lr 1.2487e-03 eta 1 day, 2:26:26
epoch [27/50] batch [1100/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.8932 (1.2208) lr 1.2487e-03 eta 1 day, 2:25:46
epoch [27/50] batch [1120/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.5418 (1.2181) lr 1.2487e-03 eta 1 day, 2:25:00
epoch [27/50] batch [1140/2000] time 1.997 (2.029) data 0.001 (0.001) loss 1.9377 (1.2211) lr 1.2487e-03 eta 1 day, 2:24:20
epoch [27/50] batch [1160/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.2939 (1.2246) lr 1.2487e-03 eta 1 day, 2:23:39
epoch [27/50] batch [1180/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.1804 (1.2318) lr 1.2487e-03 eta 1 day, 2:22:59
epoch [27/50] batch [1200/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.3047 (1.2305) lr 1.2487e-03 eta 1 day, 2:22:22
epoch [27/50] batch [1220/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.9833 (1.2291) lr 1.2487e-03 eta 1 day, 2:21:41
epoch [27/50] batch [1240/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.8499 (1.2305) lr 1.2487e-03 eta 1 day, 2:21:02
epoch [27/50] batch [1260/2000] time 1.998 (2.029) data 0.000 (0.001) loss 2.0863 (1.2337) lr 1.2487e-03 eta 1 day, 2:20:21
epoch [27/50] batch [1280/2000] time 2.031 (2.029) data 0.000 (0.001) loss 0.4084 (1.2304) lr 1.2487e-03 eta 1 day, 2:19:39
epoch [27/50] batch [1300/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.2968 (1.2297) lr 1.2487e-03 eta 1 day, 2:19:00
epoch [27/50] batch [1320/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.3151 (1.2290) lr 1.2487e-03 eta 1 day, 2:18:20
epoch [27/50] batch [1340/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.5991 (1.2308) lr 1.2487e-03 eta 1 day, 2:17:32
epoch [27/50] batch [1360/2000] time 2.031 (2.029) data 0.000 (0.001) loss 1.0508 (1.2289) lr 1.2487e-03 eta 1 day, 2:16:53
epoch [27/50] batch [1380/2000] time 1.975 (2.029) data 0.000 (0.001) loss 0.4518 (1.2288) lr 1.2487e-03 eta 1 day, 2:16:15
epoch [27/50] batch [1400/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.3640 (1.2236) lr 1.2487e-03 eta 1 day, 2:15:36
epoch [27/50] batch [1420/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.3324 (1.2264) lr 1.2487e-03 eta 1 day, 2:15:02
epoch [27/50] batch [1440/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.5828 (1.2335) lr 1.2487e-03 eta 1 day, 2:14:21
epoch [27/50] batch [1460/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.5877 (1.2318) lr 1.2487e-03 eta 1 day, 2:13:44
epoch [27/50] batch [1480/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.4772 (1.2302) lr 1.2487e-03 eta 1 day, 2:13:05
epoch [27/50] batch [1500/2000] time 2.028 (2.029) data 0.000 (0.001) loss 3.1343 (1.2310) lr 1.2487e-03 eta 1 day, 2:12:20
epoch [27/50] batch [1520/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.9041 (1.2345) lr 1.2487e-03 eta 1 day, 2:11:44
epoch [27/50] batch [1540/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.5349 (1.2317) lr 1.2487e-03 eta 1 day, 2:11:03
epoch [27/50] batch [1560/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.0720 (1.2341) lr 1.2487e-03 eta 1 day, 2:10:26
epoch [27/50] batch [1580/2000] time 2.000 (2.029) data 0.000 (0.001) loss 1.5590 (1.2344) lr 1.2487e-03 eta 1 day, 2:09:48
epoch [27/50] batch [1600/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.8505 (1.2343) lr 1.2487e-03 eta 1 day, 2:09:03
epoch [27/50] batch [1620/2000] time 2.055 (2.029) data 0.000 (0.001) loss 2.3444 (1.2409) lr 1.2487e-03 eta 1 day, 2:08:22
epoch [27/50] batch [1640/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.8069 (1.2383) lr 1.2487e-03 eta 1 day, 2:07:41
epoch [27/50] batch [1660/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.1848 (1.2404) lr 1.2487e-03 eta 1 day, 2:07:01
epoch [27/50] batch [1680/2000] time 2.000 (2.029) data 0.001 (0.001) loss 0.4007 (1.2355) lr 1.2487e-03 eta 1 day, 2:06:18
epoch [27/50] batch [1700/2000] time 2.003 (2.029) data 0.000 (0.001) loss 0.3026 (1.2401) lr 1.2487e-03 eta 1 day, 2:05:38
epoch [27/50] batch [1720/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.2320 (1.2399) lr 1.2487e-03 eta 1 day, 2:05:02
epoch [27/50] batch [1740/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.4608 (1.2407) lr 1.2487e-03 eta 1 day, 2:04:20
epoch [27/50] batch [1760/2000] time 2.049 (2.029) data 0.000 (0.001) loss 2.1450 (1.2392) lr 1.2487e-03 eta 1 day, 2:03:37
epoch [27/50] batch [1780/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.3914 (1.2385) lr 1.2487e-03 eta 1 day, 2:02:56
epoch [27/50] batch [1800/2000] time 2.051 (2.029) data 0.000 (0.000) loss 0.1973 (1.2378) lr 1.2487e-03 eta 1 day, 2:02:15
epoch [27/50] batch [1820/2000] time 2.050 (2.029) data 0.000 (0.000) loss 0.0429 (1.2366) lr 1.2487e-03 eta 1 day, 2:01:38
epoch [27/50] batch [1840/2000] time 2.000 (2.029) data 0.000 (0.000) loss 0.8694 (1.2370) lr 1.2487e-03 eta 1 day, 2:00:54
epoch [27/50] batch [1860/2000] time 2.025 (2.029) data 0.000 (0.000) loss 0.8382 (1.2383) lr 1.2487e-03 eta 1 day, 2:00:14
epoch [27/50] batch [1880/2000] time 2.048 (2.029) data 0.000 (0.000) loss 0.3255 (1.2328) lr 1.2487e-03 eta 1 day, 1:59:33
epoch [27/50] batch [1900/2000] time 1.999 (2.029) data 0.000 (0.000) loss 1.8227 (1.2306) lr 1.2487e-03 eta 1 day, 1:58:54
epoch [27/50] batch [1920/2000] time 2.000 (2.029) data 0.000 (0.000) loss 0.9247 (1.2295) lr 1.2487e-03 eta 1 day, 1:58:14
epoch [27/50] batch [1940/2000] time 1.998 (2.029) data 0.000 (0.000) loss 0.9813 (1.2257) lr 1.2487e-03 eta 1 day, 1:57:33
epoch [27/50] batch [1960/2000] time 2.029 (2.029) data 0.000 (0.000) loss 0.8314 (1.2252) lr 1.2487e-03 eta 1 day, 1:56:52
epoch [27/50] batch [1980/2000] time 1.998 (2.029) data 0.000 (0.000) loss 4.1175 (1.2249) lr 1.2487e-03 eta 1 day, 1:56:12
epoch [27/50] batch [2000/2000] time 1.996 (2.029) data 0.000 (0.000) loss 0.2138 (1.2193) lr 1.1874e-03 eta 1 day, 1:55:29
epoch [28/50] batch [20/2000] time 1.997 (2.051) data 0.000 (0.027) loss 1.2583 (1.5438) lr 1.1874e-03 eta 1 day, 2:12:03
epoch [28/50] batch [40/2000] time 2.051 (2.041) data 0.000 (0.014) loss 1.3511 (1.4961) lr 1.1874e-03 eta 1 day, 2:03:15
epoch [28/50] batch [60/2000] time 2.051 (2.036) data 0.001 (0.009) loss 0.1323 (1.3356) lr 1.1874e-03 eta 1 day, 1:58:40
epoch [28/50] batch [80/2000] time 1.975 (2.034) data 0.000 (0.007) loss 1.2885 (1.2393) lr 1.1874e-03 eta 1 day, 1:56:45
epoch [28/50] batch [100/2000] time 1.999 (2.033) data 0.000 (0.006) loss 0.9901 (1.2034) lr 1.1874e-03 eta 1 day, 1:55:07
epoch [28/50] batch [120/2000] time 2.028 (2.032) data 0.000 (0.005) loss 1.0354 (1.1783) lr 1.1874e-03 eta 1 day, 1:53:39
epoch [28/50] batch [140/2000] time 2.027 (2.032) data 0.000 (0.004) loss 1.0863 (1.1480) lr 1.1874e-03 eta 1 day, 1:52:52
epoch [28/50] batch [160/2000] time 2.027 (2.031) data 0.000 (0.004) loss 1.4190 (1.1670) lr 1.1874e-03 eta 1 day, 1:51:33
epoch [28/50] batch [180/2000] time 1.996 (2.030) data 0.000 (0.003) loss 0.4215 (1.1942) lr 1.1874e-03 eta 1 day, 1:50:01
epoch [28/50] batch [200/2000] time 2.028 (2.030) data 0.000 (0.003) loss 0.5081 (1.1775) lr 1.1874e-03 eta 1 day, 1:49:25
epoch [28/50] batch [220/2000] time 2.029 (2.029) data 0.000 (0.003) loss 3.2903 (1.1768) lr 1.1874e-03 eta 1 day, 1:48:27
epoch [28/50] batch [240/2000] time 2.030 (2.029) data 0.000 (0.002) loss 0.5274 (1.1669) lr 1.1874e-03 eta 1 day, 1:47:23
epoch [28/50] batch [260/2000] time 2.030 (2.029) data 0.000 (0.002) loss 0.5353 (1.1706) lr 1.1874e-03 eta 1 day, 1:47:01
epoch [28/50] batch [280/2000] time 2.049 (2.030) data 0.000 (0.002) loss 1.0092 (1.1511) lr 1.1874e-03 eta 1 day, 1:46:46
epoch [28/50] batch [300/2000] time 1.996 (2.030) data 0.000 (0.002) loss 2.5695 (1.1661) lr 1.1874e-03 eta 1 day, 1:45:59
epoch [28/50] batch [320/2000] time 1.998 (2.030) data 0.000 (0.002) loss 1.6443 (1.1692) lr 1.1874e-03 eta 1 day, 1:45:13
epoch [28/50] batch [340/2000] time 2.052 (2.030) data 0.000 (0.002) loss 0.2874 (1.1598) lr 1.1874e-03 eta 1 day, 1:44:38
epoch [28/50] batch [360/2000] time 2.028 (2.029) data 0.000 (0.002) loss 1.2471 (1.1515) lr 1.1874e-03 eta 1 day, 1:43:42
epoch [28/50] batch [380/2000] time 2.054 (2.029) data 0.000 (0.002) loss 1.4137 (1.1580) lr 1.1874e-03 eta 1 day, 1:42:53
epoch [28/50] batch [400/2000] time 1.996 (2.029) data 0.000 (0.002) loss 0.5368 (1.1509) lr 1.1874e-03 eta 1 day, 1:42:15
epoch [28/50] batch [420/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.6483 (1.1527) lr 1.1874e-03 eta 1 day, 1:41:26
epoch [28/50] batch [440/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.8023 (1.1498) lr 1.1874e-03 eta 1 day, 1:40:53
epoch [28/50] batch [460/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.1226 (1.1330) lr 1.1874e-03 eta 1 day, 1:40:06
epoch [28/50] batch [480/2000] time 2.002 (2.029) data 0.000 (0.001) loss 0.2298 (1.1235) lr 1.1874e-03 eta 1 day, 1:39:14
epoch [28/50] batch [500/2000] time 1.976 (2.029) data 0.000 (0.001) loss 0.4934 (1.1350) lr 1.1874e-03 eta 1 day, 1:38:35
epoch [28/50] batch [520/2000] time 2.055 (2.029) data 0.000 (0.001) loss 1.3187 (1.1396) lr 1.1874e-03 eta 1 day, 1:37:52
epoch [28/50] batch [540/2000] time 1.998 (2.029) data 0.000 (0.001) loss 4.0515 (1.1343) lr 1.1874e-03 eta 1 day, 1:37:04
epoch [28/50] batch [560/2000] time 2.029 (2.029) data 0.000 (0.001) loss 2.3564 (1.1305) lr 1.1874e-03 eta 1 day, 1:36:19
epoch [28/50] batch [580/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.2267 (1.1223) lr 1.1874e-03 eta 1 day, 1:35:33
epoch [28/50] batch [600/2000] time 2.028 (2.028) data 0.001 (0.001) loss 1.0536 (1.1296) lr 1.1874e-03 eta 1 day, 1:34:48
epoch [28/50] batch [620/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.8657 (1.1384) lr 1.1874e-03 eta 1 day, 1:34:06
epoch [28/50] batch [640/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.1242 (1.1389) lr 1.1874e-03 eta 1 day, 1:33:18
epoch [28/50] batch [660/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.1530 (1.1503) lr 1.1874e-03 eta 1 day, 1:32:43
epoch [28/50] batch [680/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.2412 (1.1555) lr 1.1874e-03 eta 1 day, 1:31:59
epoch [28/50] batch [700/2000] time 1.978 (2.028) data 0.000 (0.001) loss 2.4934 (1.1623) lr 1.1874e-03 eta 1 day, 1:31:17
epoch [28/50] batch [720/2000] time 2.025 (2.028) data 0.000 (0.001) loss 2.2418 (1.1669) lr 1.1874e-03 eta 1 day, 1:30:35
epoch [28/50] batch [740/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.0815 (1.1623) lr 1.1874e-03 eta 1 day, 1:29:53
epoch [28/50] batch [760/2000] time 2.000 (2.028) data 0.000 (0.001) loss 1.6839 (1.1699) lr 1.1874e-03 eta 1 day, 1:29:24
epoch [28/50] batch [780/2000] time 2.054 (2.028) data 0.000 (0.001) loss 2.9670 (1.1777) lr 1.1874e-03 eta 1 day, 1:28:47
epoch [28/50] batch [800/2000] time 2.027 (2.028) data 0.000 (0.001) loss 1.5685 (1.1764) lr 1.1874e-03 eta 1 day, 1:28:03
epoch [28/50] batch [820/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.4508 (1.1809) lr 1.1874e-03 eta 1 day, 1:27:24
epoch [28/50] batch [840/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.7559 (1.1811) lr 1.1874e-03 eta 1 day, 1:26:48
epoch [28/50] batch [860/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.3685 (1.1776) lr 1.1874e-03 eta 1 day, 1:26:12
epoch [28/50] batch [880/2000] time 2.024 (2.029) data 0.000 (0.001) loss 0.2879 (1.1695) lr 1.1874e-03 eta 1 day, 1:25:28
epoch [28/50] batch [900/2000] time 1.997 (2.029) data 0.000 (0.001) loss 2.6533 (1.1685) lr 1.1874e-03 eta 1 day, 1:24:49
epoch [28/50] batch [920/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.5345 (1.1675) lr 1.1874e-03 eta 1 day, 1:24:13
epoch [28/50] batch [940/2000] time 2.026 (2.029) data 0.000 (0.001) loss 2.0863 (1.1672) lr 1.1874e-03 eta 1 day, 1:23:30
epoch [28/50] batch [960/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.2586 (1.1727) lr 1.1874e-03 eta 1 day, 1:22:47
epoch [28/50] batch [980/2000] time 2.002 (2.029) data 0.000 (0.001) loss 0.4827 (1.1715) lr 1.1874e-03 eta 1 day, 1:22:05
epoch [28/50] batch [1000/2000] time 1.977 (2.029) data 0.000 (0.001) loss 1.0360 (1.1772) lr 1.1874e-03 eta 1 day, 1:21:25
epoch [28/50] batch [1020/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.3671 (1.1784) lr 1.1874e-03 eta 1 day, 1:20:48
epoch [28/50] batch [1040/2000] time 1.974 (2.029) data 0.000 (0.001) loss 0.7001 (1.1747) lr 1.1874e-03 eta 1 day, 1:20:01
epoch [28/50] batch [1060/2000] time 2.054 (2.028) data 0.000 (0.001) loss 3.3986 (1.1809) lr 1.1874e-03 eta 1 day, 1:19:19
epoch [28/50] batch [1080/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.7399 (1.1832) lr 1.1874e-03 eta 1 day, 1:18:34
epoch [28/50] batch [1100/2000] time 2.028 (2.028) data 0.000 (0.001) loss 1.1706 (1.1926) lr 1.1874e-03 eta 1 day, 1:17:55
epoch [28/50] batch [1120/2000] time 2.050 (2.029) data 0.000 (0.001) loss 3.1657 (1.1980) lr 1.1874e-03 eta 1 day, 1:17:21
epoch [28/50] batch [1140/2000] time 2.050 (2.029) data 0.001 (0.001) loss 2.4492 (1.1977) lr 1.1874e-03 eta 1 day, 1:16:39
epoch [28/50] batch [1160/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.4695 (1.2015) lr 1.1874e-03 eta 1 day, 1:15:57
epoch [28/50] batch [1180/2000] time 2.055 (2.029) data 0.000 (0.001) loss 0.0968 (1.1969) lr 1.1874e-03 eta 1 day, 1:15:22
epoch [28/50] batch [1200/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.7560 (1.1994) lr 1.1874e-03 eta 1 day, 1:14:40
epoch [28/50] batch [1220/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.9934 (1.1961) lr 1.1874e-03 eta 1 day, 1:14:02
epoch [28/50] batch [1240/2000] time 1.996 (2.028) data 0.000 (0.001) loss 0.2376 (1.1979) lr 1.1874e-03 eta 1 day, 1:13:15
epoch [28/50] batch [1260/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.5655 (1.2008) lr 1.1874e-03 eta 1 day, 1:12:35
epoch [28/50] batch [1280/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.8959 (1.2064) lr 1.1874e-03 eta 1 day, 1:11:55
epoch [28/50] batch [1300/2000] time 2.001 (2.029) data 0.000 (0.001) loss 0.8646 (1.2058) lr 1.1874e-03 eta 1 day, 1:11:13
epoch [28/50] batch [1320/2000] time 2.054 (2.029) data 0.000 (0.001) loss 2.9935 (1.2104) lr 1.1874e-03 eta 1 day, 1:10:38
epoch [28/50] batch [1340/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.0138 (1.2043) lr 1.1874e-03 eta 1 day, 1:10:00
epoch [28/50] batch [1360/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.7358 (1.2023) lr 1.1874e-03 eta 1 day, 1:09:23
epoch [28/50] batch [1380/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.0463 (1.2044) lr 1.1874e-03 eta 1 day, 1:08:44
epoch [28/50] batch [1400/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.8218 (1.2076) lr 1.1874e-03 eta 1 day, 1:08:05
epoch [28/50] batch [1420/2000] time 2.035 (2.029) data 0.000 (0.001) loss 2.5295 (1.2059) lr 1.1874e-03 eta 1 day, 1:07:27
epoch [28/50] batch [1440/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.4271 (1.2075) lr 1.1874e-03 eta 1 day, 1:06:47
epoch [28/50] batch [1460/2000] time 2.051 (2.029) data 0.000 (0.001) loss 3.1398 (1.2102) lr 1.1874e-03 eta 1 day, 1:06:07
epoch [28/50] batch [1480/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.0150 (1.2076) lr 1.1874e-03 eta 1 day, 1:05:28
epoch [28/50] batch [1500/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.5228 (1.2044) lr 1.1874e-03 eta 1 day, 1:04:45
epoch [28/50] batch [1520/2000] time 2.052 (2.029) data 0.000 (0.001) loss 2.0886 (1.2074) lr 1.1874e-03 eta 1 day, 1:04:00
epoch [28/50] batch [1540/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.0425 (1.2102) lr 1.1874e-03 eta 1 day, 1:03:20
epoch [28/50] batch [1560/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.1075 (1.2069) lr 1.1874e-03 eta 1 day, 1:02:36
epoch [28/50] batch [1580/2000] time 2.001 (2.029) data 0.000 (0.001) loss 0.9491 (1.2080) lr 1.1874e-03 eta 1 day, 1:02:00
epoch [28/50] batch [1600/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.3084 (1.2101) lr 1.1874e-03 eta 1 day, 1:01:23
epoch [28/50] batch [1620/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.3592 (1.2096) lr 1.1874e-03 eta 1 day, 1:00:39
epoch [28/50] batch [1640/2000] time 1.995 (2.029) data 0.000 (0.001) loss 0.2700 (1.2109) lr 1.1874e-03 eta 1 day, 0:59:58
epoch [28/50] batch [1660/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.6794 (1.2121) lr 1.1874e-03 eta 1 day, 0:59:19
epoch [28/50] batch [1680/2000] time 2.049 (2.029) data 0.001 (0.001) loss 0.4682 (1.2178) lr 1.1874e-03 eta 1 day, 0:58:39
epoch [28/50] batch [1700/2000] time 1.974 (2.029) data 0.000 (0.001) loss 1.2264 (1.2165) lr 1.1874e-03 eta 1 day, 0:57:53
epoch [28/50] batch [1720/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.4443 (1.2185) lr 1.1874e-03 eta 1 day, 0:57:12
epoch [28/50] batch [1740/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.7353 (1.2208) lr 1.1874e-03 eta 1 day, 0:56:28
epoch [28/50] batch [1760/2000] time 1.999 (2.029) data 0.000 (0.001) loss 1.2916 (1.2201) lr 1.1874e-03 eta 1 day, 0:55:48
epoch [28/50] batch [1780/2000] time 2.056 (2.029) data 0.000 (0.000) loss 0.7319 (1.2221) lr 1.1874e-03 eta 1 day, 0:55:09
epoch [28/50] batch [1800/2000] time 2.052 (2.029) data 0.000 (0.000) loss 1.5923 (1.2207) lr 1.1874e-03 eta 1 day, 0:54:30
epoch [28/50] batch [1820/2000] time 2.026 (2.029) data 0.000 (0.000) loss 0.4021 (1.2212) lr 1.1874e-03 eta 1 day, 0:53:51
epoch [28/50] batch [1840/2000] time 2.051 (2.029) data 0.000 (0.000) loss 1.0424 (1.2218) lr 1.1874e-03 eta 1 day, 0:53:12
epoch [28/50] batch [1860/2000] time 1.996 (2.029) data 0.000 (0.000) loss 0.3002 (1.2228) lr 1.1874e-03 eta 1 day, 0:52:30
epoch [28/50] batch [1880/2000] time 2.049 (2.029) data 0.000 (0.000) loss 0.4176 (1.2237) lr 1.1874e-03 eta 1 day, 0:51:46
epoch [28/50] batch [1900/2000] time 2.028 (2.029) data 0.000 (0.000) loss 0.3074 (1.2237) lr 1.1874e-03 eta 1 day, 0:51:02
epoch [28/50] batch [1920/2000] time 2.050 (2.029) data 0.000 (0.000) loss 2.0267 (1.2265) lr 1.1874e-03 eta 1 day, 0:50:19
epoch [28/50] batch [1940/2000] time 2.051 (2.029) data 0.000 (0.000) loss 2.8781 (1.2285) lr 1.1874e-03 eta 1 day, 0:49:36
epoch [28/50] batch [1960/2000] time 2.050 (2.029) data 0.000 (0.000) loss 0.4016 (1.2300) lr 1.1874e-03 eta 1 day, 0:48:55
epoch [28/50] batch [1980/2000] time 2.048 (2.028) data 0.000 (0.000) loss 0.2070 (1.2279) lr 1.1874e-03 eta 1 day, 0:48:12
epoch [28/50] batch [2000/2000] time 1.993 (2.028) data 0.000 (0.000) loss 0.8082 (1.2261) lr 1.1253e-03 eta 1 day, 0:47:32
epoch [29/50] batch [20/2000] time 1.997 (2.055) data 0.000 (0.027) loss 1.0405 (1.2140) lr 1.1253e-03 eta 1 day, 1:06:01
epoch [29/50] batch [40/2000] time 2.049 (2.043) data 0.000 (0.014) loss 2.4875 (1.0746) lr 1.1253e-03 eta 1 day, 0:56:47
epoch [29/50] batch [60/2000] time 2.027 (2.039) data 0.001 (0.009) loss 0.0917 (1.1832) lr 1.1253e-03 eta 1 day, 0:53:16
epoch [29/50] batch [80/2000] time 1.996 (2.037) data 0.000 (0.007) loss 3.6178 (1.2406) lr 1.1253e-03 eta 1 day, 0:51:16
epoch [29/50] batch [100/2000] time 2.029 (2.035) data 0.000 (0.006) loss 0.5216 (1.1978) lr 1.1253e-03 eta 1 day, 0:48:45
epoch [29/50] batch [120/2000] time 2.051 (2.035) data 0.000 (0.005) loss 2.2871 (1.2350) lr 1.1253e-03 eta 1 day, 0:47:57
epoch [29/50] batch [140/2000] time 2.052 (2.034) data 0.000 (0.004) loss 0.0816 (1.1904) lr 1.1253e-03 eta 1 day, 0:46:31
epoch [29/50] batch [160/2000] time 1.998 (2.033) data 0.000 (0.004) loss 1.4098 (1.1816) lr 1.1253e-03 eta 1 day, 0:45:26
epoch [29/50] batch [180/2000] time 2.027 (2.033) data 0.000 (0.003) loss 2.0231 (1.1979) lr 1.1253e-03 eta 1 day, 0:44:50
epoch [29/50] batch [200/2000] time 2.058 (2.033) data 0.000 (0.003) loss 2.0234 (1.2157) lr 1.1253e-03 eta 1 day, 0:43:52
epoch [29/50] batch [220/2000] time 2.056 (2.033) data 0.000 (0.003) loss 1.3341 (1.2189) lr 1.1253e-03 eta 1 day, 0:43:22
epoch [29/50] batch [240/2000] time 1.997 (2.033) data 0.000 (0.002) loss 1.7194 (1.2490) lr 1.1253e-03 eta 1 day, 0:42:24
epoch [29/50] batch [260/2000] time 2.050 (2.032) data 0.000 (0.002) loss 1.0519 (1.2477) lr 1.1253e-03 eta 1 day, 0:41:19
epoch [29/50] batch [280/2000] time 2.051 (2.031) data 0.000 (0.002) loss 0.2592 (1.2314) lr 1.1253e-03 eta 1 day, 0:40:16
epoch [29/50] batch [300/2000] time 2.048 (2.031) data 0.000 (0.002) loss 0.2010 (1.2290) lr 1.1253e-03 eta 1 day, 0:39:13
epoch [29/50] batch [320/2000] time 2.051 (2.031) data 0.000 (0.002) loss 1.5073 (1.2445) lr 1.1253e-03 eta 1 day, 0:38:23
epoch [29/50] batch [340/2000] time 2.026 (2.031) data 0.000 (0.002) loss 0.7064 (1.2439) lr 1.1253e-03 eta 1 day, 0:37:39
epoch [29/50] batch [360/2000] time 2.057 (2.031) data 0.000 (0.002) loss 2.6281 (1.2395) lr 1.1253e-03 eta 1 day, 0:36:56
epoch [29/50] batch [380/2000] time 2.048 (2.031) data 0.000 (0.002) loss 0.2754 (1.2322) lr 1.1253e-03 eta 1 day, 0:36:23
epoch [29/50] batch [400/2000] time 2.022 (2.031) data 0.000 (0.002) loss 1.0555 (1.2301) lr 1.1253e-03 eta 1 day, 0:35:38
epoch [29/50] batch [420/2000] time 2.024 (2.031) data 0.000 (0.001) loss 1.3832 (1.2343) lr 1.1253e-03 eta 1 day, 0:34:49
epoch [29/50] batch [440/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.8941 (1.2283) lr 1.1253e-03 eta 1 day, 0:33:57
epoch [29/50] batch [460/2000] time 2.001 (2.030) data 0.000 (0.001) loss 0.9306 (1.2381) lr 1.1253e-03 eta 1 day, 0:33:07
epoch [29/50] batch [480/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.2382 (1.2490) lr 1.1253e-03 eta 1 day, 0:32:24
epoch [29/50] batch [500/2000] time 2.048 (2.030) data 0.000 (0.001) loss 0.1868 (1.2394) lr 1.1253e-03 eta 1 day, 0:31:45
epoch [29/50] batch [520/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.7658 (1.2310) lr 1.1253e-03 eta 1 day, 0:31:05
epoch [29/50] batch [540/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.6042 (1.2455) lr 1.1253e-03 eta 1 day, 0:30:22
epoch [29/50] batch [560/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.2563 (1.2609) lr 1.1253e-03 eta 1 day, 0:29:39
epoch [29/50] batch [580/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.1344 (1.2585) lr 1.1253e-03 eta 1 day, 0:28:54
epoch [29/50] batch [600/2000] time 2.049 (2.030) data 0.001 (0.001) loss 0.4206 (1.2428) lr 1.1253e-03 eta 1 day, 0:28:14
epoch [29/50] batch [620/2000] time 1.973 (2.030) data 0.000 (0.001) loss 3.3524 (1.2428) lr 1.1253e-03 eta 1 day, 0:27:24
epoch [29/50] batch [640/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.2609 (1.2411) lr 1.1253e-03 eta 1 day, 0:26:48
epoch [29/50] batch [660/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.0470 (1.2450) lr 1.1253e-03 eta 1 day, 0:26:14
epoch [29/50] batch [680/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.7537 (1.2519) lr 1.1253e-03 eta 1 day, 0:25:35
epoch [29/50] batch [700/2000] time 2.029 (2.030) data 0.000 (0.001) loss 2.8433 (1.2567) lr 1.1253e-03 eta 1 day, 0:24:53
epoch [29/50] batch [720/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.6075 (1.2600) lr 1.1253e-03 eta 1 day, 0:24:17
epoch [29/50] batch [740/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.2948 (1.2525) lr 1.1253e-03 eta 1 day, 0:23:35
epoch [29/50] batch [760/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.2008 (1.2531) lr 1.1253e-03 eta 1 day, 0:22:53
epoch [29/50] batch [780/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.9325 (1.2564) lr 1.1253e-03 eta 1 day, 0:22:13
epoch [29/50] batch [800/2000] time 2.026 (2.030) data 0.000 (0.001) loss 3.8422 (1.2600) lr 1.1253e-03 eta 1 day, 0:21:37
epoch [29/50] batch [820/2000] time 2.055 (2.030) data 0.000 (0.001) loss 3.5534 (1.2762) lr 1.1253e-03 eta 1 day, 0:20:50
epoch [29/50] batch [840/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.0605 (1.2705) lr 1.1253e-03 eta 1 day, 0:20:13
epoch [29/50] batch [860/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.7151 (1.2696) lr 1.1253e-03 eta 1 day, 0:19:41
epoch [29/50] batch [880/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.3150 (1.2678) lr 1.1253e-03 eta 1 day, 0:19:02
epoch [29/50] batch [900/2000] time 2.033 (2.030) data 0.000 (0.001) loss 0.4387 (1.2618) lr 1.1253e-03 eta 1 day, 0:18:26
epoch [29/50] batch [920/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.1301 (1.2703) lr 1.1253e-03 eta 1 day, 0:17:48
epoch [29/50] batch [940/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.0783 (1.2647) lr 1.1253e-03 eta 1 day, 0:17:03
epoch [29/50] batch [960/2000] time 1.973 (2.030) data 0.000 (0.001) loss 1.3963 (1.2678) lr 1.1253e-03 eta 1 day, 0:16:23
epoch [29/50] batch [980/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.5081 (1.2706) lr 1.1253e-03 eta 1 day, 0:15:43
epoch [29/50] batch [1000/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.5949 (1.2710) lr 1.1253e-03 eta 1 day, 0:14:57
epoch [29/50] batch [1020/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.1116 (1.2673) lr 1.1253e-03 eta 1 day, 0:14:11
epoch [29/50] batch [1040/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.8968 (1.2675) lr 1.1253e-03 eta 1 day, 0:13:35
epoch [29/50] batch [1060/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.9494 (1.2697) lr 1.1253e-03 eta 1 day, 0:12:56
epoch [29/50] batch [1080/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.1728 (1.2697) lr 1.1253e-03 eta 1 day, 0:12:17
epoch [29/50] batch [1100/2000] time 2.048 (2.030) data 0.000 (0.001) loss 1.4740 (1.2706) lr 1.1253e-03 eta 1 day, 0:11:34
epoch [29/50] batch [1120/2000] time 1.995 (2.030) data 0.000 (0.001) loss 0.5051 (1.2703) lr 1.1253e-03 eta 1 day, 0:10:54
epoch [29/50] batch [1140/2000] time 2.047 (2.030) data 0.001 (0.001) loss 0.4167 (1.2656) lr 1.1253e-03 eta 1 day, 0:10:05
epoch [29/50] batch [1160/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.1177 (1.2684) lr 1.1253e-03 eta 1 day, 0:09:21
epoch [29/50] batch [1180/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.8286 (1.2673) lr 1.1253e-03 eta 1 day, 0:08:40
epoch [29/50] batch [1200/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.0238 (1.2610) lr 1.1253e-03 eta 1 day, 0:07:56
epoch [29/50] batch [1220/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.4070 (1.2600) lr 1.1253e-03 eta 1 day, 0:07:19
epoch [29/50] batch [1240/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.4506 (1.2567) lr 1.1253e-03 eta 1 day, 0:06:42
epoch [29/50] batch [1260/2000] time 2.052 (2.030) data 0.000 (0.001) loss 2.8308 (1.2556) lr 1.1253e-03 eta 1 day, 0:05:58
epoch [29/50] batch [1280/2000] time 2.062 (2.030) data 0.000 (0.001) loss 0.0181 (1.2496) lr 1.1253e-03 eta 1 day, 0:05:19
epoch [29/50] batch [1300/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.9103 (1.2481) lr 1.1253e-03 eta 1 day, 0:04:43
epoch [29/50] batch [1320/2000] time 2.052 (2.030) data 0.000 (0.001) loss 3.8644 (1.2483) lr 1.1253e-03 eta 1 day, 0:03:59
epoch [29/50] batch [1340/2000] time 2.001 (2.030) data 0.000 (0.001) loss 0.1407 (1.2439) lr 1.1253e-03 eta 1 day, 0:03:17
epoch [29/50] batch [1360/2000] time 1.976 (2.030) data 0.000 (0.001) loss 0.3690 (1.2400) lr 1.1253e-03 eta 1 day, 0:02:35
epoch [29/50] batch [1380/2000] time 2.002 (2.030) data 0.000 (0.001) loss 1.0758 (1.2367) lr 1.1253e-03 eta 1 day, 0:01:57
epoch [29/50] batch [1400/2000] time 2.035 (2.030) data 0.000 (0.001) loss 2.4230 (1.2335) lr 1.1253e-03 eta 1 day, 0:01:16
epoch [29/50] batch [1420/2000] time 2.005 (2.030) data 0.000 (0.001) loss 0.7920 (1.2298) lr 1.1253e-03 eta 1 day, 0:00:35
epoch [29/50] batch [1440/2000] time 2.003 (2.030) data 0.000 (0.001) loss 0.1143 (1.2260) lr 1.1253e-03 eta 1 day, 0:00:00
epoch [29/50] batch [1460/2000] time 1.997 (2.030) data 0.000 (0.001) loss 2.3689 (1.2277) lr 1.1253e-03 eta 23:59:21
epoch [29/50] batch [1480/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.8741 (1.2250) lr 1.1253e-03 eta 23:58:35
epoch [29/50] batch [1500/2000] time 1.974 (2.030) data 0.000 (0.001) loss 0.3062 (1.2245) lr 1.1253e-03 eta 23:57:50
epoch [29/50] batch [1520/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.2530 (1.2276) lr 1.1253e-03 eta 23:57:14
epoch [29/50] batch [1540/2000] time 2.000 (2.030) data 0.000 (0.001) loss 1.0578 (1.2240) lr 1.1253e-03 eta 23:56:28
epoch [29/50] batch [1560/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.1722 (1.2261) lr 1.1253e-03 eta 23:55:49
epoch [29/50] batch [1580/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.8564 (1.2295) lr 1.1253e-03 eta 23:55:05
epoch [29/50] batch [1600/2000] time 1.976 (2.030) data 0.000 (0.001) loss 1.9569 (1.2286) lr 1.1253e-03 eta 23:54:26
epoch [29/50] batch [1620/2000] time 1.977 (2.030) data 0.000 (0.001) loss 1.0756 (1.2278) lr 1.1253e-03 eta 23:53:44
epoch [29/50] batch [1640/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.6342 (1.2278) lr 1.1253e-03 eta 23:53:06
epoch [29/50] batch [1660/2000] time 2.030 (2.030) data 0.000 (0.001) loss 1.1272 (1.2290) lr 1.1253e-03 eta 23:52:24
epoch [29/50] batch [1680/2000] time 2.050 (2.030) data 0.001 (0.001) loss 2.9461 (1.2309) lr 1.1253e-03 eta 23:51:43
epoch [29/50] batch [1700/2000] time 2.023 (2.030) data 0.000 (0.001) loss 0.7664 (1.2272) lr 1.1253e-03 eta 23:51:05
epoch [29/50] batch [1720/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.2004 (1.2311) lr 1.1253e-03 eta 23:50:23
epoch [29/50] batch [1740/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.3906 (1.2296) lr 1.1253e-03 eta 23:49:41
epoch [29/50] batch [1760/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.4963 (1.2267) lr 1.1253e-03 eta 23:49:01
epoch [29/50] batch [1780/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.6059 (1.2244) lr 1.1253e-03 eta 23:48:21
epoch [29/50] batch [1800/2000] time 2.047 (2.030) data 0.000 (0.001) loss 2.9718 (1.2266) lr 1.1253e-03 eta 23:47:38
epoch [29/50] batch [1820/2000] time 2.047 (2.030) data 0.000 (0.001) loss 0.1045 (1.2249) lr 1.1253e-03 eta 23:46:55
epoch [29/50] batch [1840/2000] time 2.027 (2.030) data 0.000 (0.000) loss 2.3068 (1.2214) lr 1.1253e-03 eta 23:46:15
epoch [29/50] batch [1860/2000] time 1.996 (2.030) data 0.000 (0.000) loss 1.1593 (1.2215) lr 1.1253e-03 eta 23:45:35
epoch [29/50] batch [1880/2000] time 1.996 (2.030) data 0.000 (0.000) loss 0.6193 (1.2223) lr 1.1253e-03 eta 23:44:56
epoch [29/50] batch [1900/2000] time 1.999 (2.030) data 0.000 (0.000) loss 1.0295 (1.2240) lr 1.1253e-03 eta 23:44:15
epoch [29/50] batch [1920/2000] time 2.028 (2.030) data 0.000 (0.000) loss 1.6512 (1.2240) lr 1.1253e-03 eta 23:43:35
epoch [29/50] batch [1940/2000] time 2.050 (2.030) data 0.000 (0.000) loss 0.4493 (1.2221) lr 1.1253e-03 eta 23:42:53
epoch [29/50] batch [1960/2000] time 2.052 (2.030) data 0.000 (0.000) loss 2.2518 (1.2229) lr 1.1253e-03 eta 23:42:12
epoch [29/50] batch [1980/2000] time 1.973 (2.030) data 0.000 (0.000) loss 1.4027 (1.2236) lr 1.1253e-03 eta 23:41:28
epoch [29/50] batch [2000/2000] time 2.000 (2.030) data 0.000 (0.000) loss 1.3139 (1.2251) lr 1.0628e-03 eta 23:40:47
epoch [30/50] batch [20/2000] time 2.029 (2.056) data 0.000 (0.027) loss 0.1335 (1.1030) lr 1.0628e-03 eta 23:58:38
epoch [30/50] batch [40/2000] time 2.000 (2.044) data 0.000 (0.014) loss 0.1521 (0.9931) lr 1.0628e-03 eta 23:49:46
epoch [30/50] batch [60/2000] time 2.057 (2.041) data 0.001 (0.009) loss 1.8827 (1.1416) lr 1.0628e-03 eta 23:46:32
epoch [30/50] batch [80/2000] time 2.028 (2.037) data 0.000 (0.007) loss 2.3058 (1.1363) lr 1.0628e-03 eta 23:43:01
epoch [30/50] batch [100/2000] time 2.025 (2.035) data 0.000 (0.006) loss 1.4623 (1.1424) lr 1.0628e-03 eta 23:41:11
epoch [30/50] batch [120/2000] time 1.997 (2.035) data 0.000 (0.005) loss 0.3207 (1.1393) lr 1.0628e-03 eta 23:40:25
epoch [30/50] batch [140/2000] time 2.051 (2.034) data 0.000 (0.004) loss 1.7427 (1.1777) lr 1.0628e-03 eta 23:39:10
epoch [30/50] batch [160/2000] time 2.052 (2.034) data 0.000 (0.004) loss 2.0075 (1.1940) lr 1.0628e-03 eta 23:38:35
epoch [30/50] batch [180/2000] time 2.053 (2.034) data 0.000 (0.003) loss 0.4925 (1.2221) lr 1.0628e-03 eta 23:37:30
epoch [30/50] batch [200/2000] time 2.052 (2.033) data 0.000 (0.003) loss 1.3134 (1.2513) lr 1.0628e-03 eta 23:36:12
epoch [30/50] batch [220/2000] time 1.998 (2.032) data 0.000 (0.003) loss 3.3947 (1.2624) lr 1.0628e-03 eta 23:34:53
epoch [30/50] batch [240/2000] time 2.054 (2.032) data 0.000 (0.002) loss 1.9090 (1.2732) lr 1.0628e-03 eta 23:34:15
epoch [30/50] batch [260/2000] time 2.027 (2.032) data 0.000 (0.002) loss 0.8396 (1.2494) lr 1.0628e-03 eta 23:33:24
epoch [30/50] batch [280/2000] time 1.996 (2.031) data 0.000 (0.002) loss 0.0436 (1.2375) lr 1.0628e-03 eta 23:32:16
epoch [30/50] batch [300/2000] time 2.049 (2.031) data 0.000 (0.002) loss 1.5692 (1.2215) lr 1.0628e-03 eta 23:31:20
epoch [30/50] batch [320/2000] time 2.048 (2.031) data 0.000 (0.002) loss 0.9988 (1.2057) lr 1.0628e-03 eta 23:30:32
epoch [30/50] batch [340/2000] time 2.050 (2.031) data 0.000 (0.002) loss 1.1186 (1.1963) lr 1.0628e-03 eta 23:29:53
epoch [30/50] batch [360/2000] time 1.997 (2.030) data 0.000 (0.002) loss 2.6836 (1.1937) lr 1.0628e-03 eta 23:29:01
epoch [30/50] batch [380/2000] time 2.027 (2.030) data 0.000 (0.002) loss 0.6746 (1.1803) lr 1.0628e-03 eta 23:28:21
epoch [30/50] batch [400/2000] time 2.029 (2.030) data 0.000 (0.002) loss 2.9049 (1.1774) lr 1.0628e-03 eta 23:27:30
epoch [30/50] batch [420/2000] time 1.973 (2.030) data 0.000 (0.001) loss 0.7652 (1.1907) lr 1.0628e-03 eta 23:26:26
epoch [30/50] batch [440/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.3642 (1.1942) lr 1.0628e-03 eta 23:25:49
epoch [30/50] batch [460/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.7882 (1.1947) lr 1.0628e-03 eta 23:24:55
epoch [30/50] batch [480/2000] time 1.974 (2.029) data 0.000 (0.001) loss 0.4366 (1.1944) lr 1.0628e-03 eta 23:24:13
epoch [30/50] batch [500/2000] time 2.049 (2.029) data 0.000 (0.001) loss 2.2641 (1.1838) lr 1.0628e-03 eta 23:23:28
epoch [30/50] batch [520/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.4428 (1.1688) lr 1.0628e-03 eta 23:22:47
epoch [30/50] batch [540/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.3717 (1.1859) lr 1.0628e-03 eta 23:21:58
epoch [30/50] batch [560/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.2715 (1.1981) lr 1.0628e-03 eta 23:21:25
epoch [30/50] batch [580/2000] time 1.995 (2.029) data 0.000 (0.001) loss 0.3066 (1.1941) lr 1.0628e-03 eta 23:20:42
epoch [30/50] batch [600/2000] time 2.028 (2.029) data 0.001 (0.001) loss 0.5813 (1.1968) lr 1.0628e-03 eta 23:19:54
epoch [30/50] batch [620/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.5680 (1.1910) lr 1.0628e-03 eta 23:19:13
epoch [30/50] batch [640/2000] time 2.027 (2.029) data 0.000 (0.001) loss 2.0174 (1.1936) lr 1.0628e-03 eta 23:18:27
epoch [30/50] batch [660/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.0788 (1.1924) lr 1.0628e-03 eta 23:17:38
epoch [30/50] batch [680/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.2382 (1.1958) lr 1.0628e-03 eta 23:16:56
epoch [30/50] batch [700/2000] time 2.029 (2.028) data 0.000 (0.001) loss 0.1792 (1.1830) lr 1.0628e-03 eta 23:16:13
epoch [30/50] batch [720/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.6587 (1.1813) lr 1.0628e-03 eta 23:15:32
epoch [30/50] batch [740/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.3507 (1.1867) lr 1.0628e-03 eta 23:14:47
epoch [30/50] batch [760/2000] time 2.029 (2.028) data 0.000 (0.001) loss 2.7795 (1.2002) lr 1.0628e-03 eta 23:14:08
epoch [30/50] batch [780/2000] time 2.053 (2.028) data 0.000 (0.001) loss 2.4486 (1.2130) lr 1.0628e-03 eta 23:13:28
epoch [30/50] batch [800/2000] time 2.000 (2.028) data 0.000 (0.001) loss 0.7657 (1.2171) lr 1.0628e-03 eta 23:12:44
epoch [30/50] batch [820/2000] time 2.053 (2.028) data 0.000 (0.001) loss 0.5085 (1.2170) lr 1.0628e-03 eta 23:12:06
epoch [30/50] batch [840/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.6975 (1.2141) lr 1.0628e-03 eta 23:11:28
epoch [30/50] batch [860/2000] time 2.028 (2.028) data 0.000 (0.001) loss 0.8244 (1.2095) lr 1.0628e-03 eta 23:10:47
epoch [30/50] batch [880/2000] time 2.053 (2.028) data 0.000 (0.001) loss 0.6126 (1.2066) lr 1.0628e-03 eta 23:10:03
epoch [30/50] batch [900/2000] time 2.003 (2.028) data 0.000 (0.001) loss 0.3595 (1.2106) lr 1.0628e-03 eta 23:09:27
epoch [30/50] batch [920/2000] time 2.003 (2.028) data 0.000 (0.001) loss 0.4687 (1.2139) lr 1.0628e-03 eta 23:08:50
epoch [30/50] batch [940/2000] time 2.029 (2.028) data 0.000 (0.001) loss 0.8390 (1.2107) lr 1.0628e-03 eta 23:07:58
epoch [30/50] batch [960/2000] time 1.976 (2.028) data 0.000 (0.001) loss 1.0793 (1.2016) lr 1.0628e-03 eta 23:07:24
epoch [30/50] batch [980/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.3577 (1.2047) lr 1.0628e-03 eta 23:06:50
epoch [30/50] batch [1000/2000] time 2.030 (2.029) data 0.000 (0.001) loss 2.6044 (1.2052) lr 1.0628e-03 eta 23:06:09
epoch [30/50] batch [1020/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.6288 (1.2028) lr 1.0628e-03 eta 23:05:28
epoch [30/50] batch [1040/2000] time 2.026 (2.028) data 0.000 (0.001) loss 3.5167 (1.2043) lr 1.0628e-03 eta 23:04:42
epoch [30/50] batch [1060/2000] time 1.972 (2.028) data 0.000 (0.001) loss 0.6177 (1.2007) lr 1.0628e-03 eta 23:03:54
epoch [30/50] batch [1080/2000] time 1.996 (2.028) data 0.000 (0.001) loss 0.2723 (1.2103) lr 1.0628e-03 eta 23:03:13
epoch [30/50] batch [1100/2000] time 2.034 (2.028) data 0.000 (0.001) loss 0.4965 (1.2066) lr 1.0628e-03 eta 23:02:34
epoch [30/50] batch [1120/2000] time 2.006 (2.028) data 0.000 (0.001) loss 1.3367 (1.2078) lr 1.0628e-03 eta 23:01:54
epoch [30/50] batch [1140/2000] time 2.030 (2.028) data 0.001 (0.001) loss 1.3235 (1.2100) lr 1.0628e-03 eta 23:01:18
epoch [30/50] batch [1160/2000] time 1.997 (2.028) data 0.000 (0.001) loss 1.9136 (1.2112) lr 1.0628e-03 eta 23:00:39
epoch [30/50] batch [1180/2000] time 2.051 (2.028) data 0.000 (0.001) loss 2.2402 (1.2084) lr 1.0628e-03 eta 22:59:57
epoch [30/50] batch [1200/2000] time 1.999 (2.028) data 0.000 (0.001) loss 0.1243 (1.2035) lr 1.0628e-03 eta 22:59:20
epoch [30/50] batch [1220/2000] time 2.028 (2.028) data 0.000 (0.001) loss 1.1743 (1.1989) lr 1.0628e-03 eta 22:58:41
epoch [30/50] batch [1240/2000] time 2.031 (2.029) data 0.000 (0.001) loss 1.5463 (1.1967) lr 1.0628e-03 eta 22:58:02
epoch [30/50] batch [1260/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.5152 (1.1927) lr 1.0628e-03 eta 22:57:23
epoch [30/50] batch [1280/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.9553 (1.1935) lr 1.0628e-03 eta 22:56:39
epoch [30/50] batch [1300/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.3755 (1.1882) lr 1.0628e-03 eta 22:56:02
epoch [30/50] batch [1320/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.4091 (1.1881) lr 1.0628e-03 eta 22:55:23
epoch [30/50] batch [1340/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.1877 (1.1862) lr 1.0628e-03 eta 22:54:44
epoch [30/50] batch [1360/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.2038 (1.1875) lr 1.0628e-03 eta 22:54:07
epoch [30/50] batch [1380/2000] time 2.052 (2.029) data 0.000 (0.001) loss 4.8775 (1.1887) lr 1.0628e-03 eta 22:53:26
epoch [30/50] batch [1400/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.6541 (1.1922) lr 1.0628e-03 eta 22:52:46
epoch [30/50] batch [1420/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.7970 (1.1929) lr 1.0628e-03 eta 22:52:07
epoch [30/50] batch [1440/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.8257 (1.1901) lr 1.0628e-03 eta 22:51:25
epoch [30/50] batch [1460/2000] time 2.051 (2.029) data 0.000 (0.001) loss 4.6139 (1.1899) lr 1.0628e-03 eta 22:50:49
epoch [30/50] batch [1480/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.8285 (1.1915) lr 1.0628e-03 eta 22:50:03
epoch [30/50] batch [1500/2000] time 2.027 (2.029) data 0.000 (0.001) loss 2.3401 (1.1955) lr 1.0628e-03 eta 22:49:21
epoch [30/50] batch [1520/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.6948 (1.1913) lr 1.0628e-03 eta 22:48:39
epoch [30/50] batch [1540/2000] time 2.052 (2.029) data 0.000 (0.001) loss 3.4065 (1.1912) lr 1.0628e-03 eta 22:47:59
epoch [30/50] batch [1560/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.9616 (1.1930) lr 1.0628e-03 eta 22:47:17
epoch [30/50] batch [1580/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.0878 (1.1954) lr 1.0628e-03 eta 22:46:35
epoch [30/50] batch [1600/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.0766 (1.1973) lr 1.0628e-03 eta 22:45:54
epoch [30/50] batch [1620/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.7289 (1.1984) lr 1.0628e-03 eta 22:45:13
epoch [30/50] batch [1640/2000] time 2.026 (2.029) data 0.000 (0.001) loss 2.1275 (1.1995) lr 1.0628e-03 eta 22:44:34
epoch [30/50] batch [1660/2000] time 2.027 (2.029) data 0.000 (0.001) loss 2.2213 (1.1988) lr 1.0628e-03 eta 22:43:56
epoch [30/50] batch [1680/2000] time 2.050 (2.029) data 0.001 (0.001) loss 1.8055 (1.2032) lr 1.0628e-03 eta 22:43:17
epoch [30/50] batch [1700/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.5444 (1.2052) lr 1.0628e-03 eta 22:42:37
epoch [30/50] batch [1720/2000] time 1.999 (2.029) data 0.000 (0.001) loss 1.5739 (1.2071) lr 1.0628e-03 eta 22:41:57
epoch [30/50] batch [1740/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.5786 (1.2060) lr 1.0628e-03 eta 22:41:18
epoch [30/50] batch [1760/2000] time 1.999 (2.029) data 0.000 (0.001) loss 1.2159 (1.2077) lr 1.0628e-03 eta 22:40:39
epoch [30/50] batch [1780/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.3971 (1.2097) lr 1.0628e-03 eta 22:39:56
epoch [30/50] batch [1800/2000] time 2.001 (2.029) data 0.000 (0.001) loss 0.5335 (1.2110) lr 1.0628e-03 eta 22:39:16
epoch [30/50] batch [1820/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.4148 (1.2162) lr 1.0628e-03 eta 22:38:36
epoch [30/50] batch [1840/2000] time 1.999 (2.029) data 0.000 (0.000) loss 2.2476 (1.2193) lr 1.0628e-03 eta 22:37:56
epoch [30/50] batch [1860/2000] time 1.998 (2.029) data 0.000 (0.000) loss 1.2520 (1.2174) lr 1.0628e-03 eta 22:37:12
epoch [30/50] batch [1880/2000] time 2.028 (2.029) data 0.000 (0.000) loss 2.6007 (1.2204) lr 1.0628e-03 eta 22:36:34
epoch [30/50] batch [1900/2000] time 2.052 (2.029) data 0.000 (0.000) loss 0.1621 (1.2209) lr 1.0628e-03 eta 22:35:54
epoch [30/50] batch [1920/2000] time 2.052 (2.029) data 0.000 (0.000) loss 1.6115 (1.2179) lr 1.0628e-03 eta 22:35:14
epoch [30/50] batch [1940/2000] time 1.998 (2.029) data 0.000 (0.000) loss 3.2139 (1.2189) lr 1.0628e-03 eta 22:34:37
epoch [30/50] batch [1960/2000] time 2.054 (2.029) data 0.000 (0.000) loss 0.3484 (1.2211) lr 1.0628e-03 eta 22:33:59
epoch [30/50] batch [1980/2000] time 2.028 (2.029) data 0.000 (0.000) loss 2.4803 (1.2234) lr 1.0628e-03 eta 22:33:19
epoch [30/50] batch [2000/2000] time 2.028 (2.029) data 0.000 (0.000) loss 0.0512 (1.2210) lr 1.0000e-03 eta 22:32:39
epoch [31/50] batch [20/2000] time 1.996 (2.054) data 0.000 (0.027) loss 3.2724 (1.3202) lr 1.0000e-03 eta 22:48:49
epoch [31/50] batch [40/2000] time 2.049 (2.042) data 0.000 (0.013) loss 0.9952 (1.3585) lr 1.0000e-03 eta 22:40:04
epoch [31/50] batch [60/2000] time 2.028 (2.037) data 0.001 (0.009) loss 1.9959 (1.2823) lr 1.0000e-03 eta 22:35:45
epoch [31/50] batch [80/2000] time 2.049 (2.034) data 0.000 (0.007) loss 0.7276 (1.2168) lr 1.0000e-03 eta 22:33:34
epoch [31/50] batch [100/2000] time 2.048 (2.033) data 0.000 (0.005) loss 3.9791 (1.2197) lr 1.0000e-03 eta 22:31:47
epoch [31/50] batch [120/2000] time 1.995 (2.031) data 0.000 (0.005) loss 0.7630 (1.1887) lr 1.0000e-03 eta 22:29:52
epoch [31/50] batch [140/2000] time 2.050 (2.030) data 0.000 (0.004) loss 0.3957 (1.2034) lr 1.0000e-03 eta 22:28:16
epoch [31/50] batch [160/2000] time 2.030 (2.029) data 0.000 (0.004) loss 0.7095 (1.1833) lr 1.0000e-03 eta 22:27:16
epoch [31/50] batch [180/2000] time 1.999 (2.028) data 0.000 (0.003) loss 0.1908 (1.1471) lr 1.0000e-03 eta 22:26:05
epoch [31/50] batch [200/2000] time 2.029 (2.028) data 0.000 (0.003) loss 2.5496 (1.1707) lr 1.0000e-03 eta 22:25:20
epoch [31/50] batch [220/2000] time 1.998 (2.029) data 0.000 (0.003) loss 1.5538 (1.1615) lr 1.0000e-03 eta 22:24:56
epoch [31/50] batch [240/2000] time 1.997 (2.028) data 0.000 (0.002) loss 1.7867 (1.1583) lr 1.0000e-03 eta 22:24:12
epoch [31/50] batch [260/2000] time 2.049 (2.028) data 0.000 (0.002) loss 2.5792 (1.1765) lr 1.0000e-03 eta 22:23:24
epoch [31/50] batch [280/2000] time 2.000 (2.028) data 0.000 (0.002) loss 0.3881 (1.1812) lr 1.0000e-03 eta 22:22:31
epoch [31/50] batch [300/2000] time 1.997 (2.028) data 0.000 (0.002) loss 1.0506 (1.1707) lr 1.0000e-03 eta 22:21:54
epoch [31/50] batch [320/2000] time 2.001 (2.028) data 0.000 (0.002) loss 0.3611 (1.1661) lr 1.0000e-03 eta 22:21:19
epoch [31/50] batch [340/2000] time 1.998 (2.028) data 0.000 (0.002) loss 1.1141 (1.1722) lr 1.0000e-03 eta 22:20:33
epoch [31/50] batch [360/2000] time 1.996 (2.028) data 0.000 (0.002) loss 0.6773 (1.1617) lr 1.0000e-03 eta 22:19:50
epoch [31/50] batch [380/2000] time 2.027 (2.028) data 0.000 (0.002) loss 0.3497 (1.1565) lr 1.0000e-03 eta 22:19:23
epoch [31/50] batch [400/2000] time 1.996 (2.028) data 0.000 (0.002) loss 0.6214 (1.1530) lr 1.0000e-03 eta 22:18:34
epoch [31/50] batch [420/2000] time 1.996 (2.028) data 0.000 (0.001) loss 0.5695 (1.1501) lr 1.0000e-03 eta 22:18:04
epoch [31/50] batch [440/2000] time 2.028 (2.028) data 0.000 (0.001) loss 2.0139 (1.1624) lr 1.0000e-03 eta 22:17:06
epoch [31/50] batch [460/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.1609 (1.1479) lr 1.0000e-03 eta 22:16:37
epoch [31/50] batch [480/2000] time 2.028 (2.028) data 0.000 (0.001) loss 2.8030 (1.1538) lr 1.0000e-03 eta 22:15:44
epoch [31/50] batch [500/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.7985 (1.1527) lr 1.0000e-03 eta 22:14:59
epoch [31/50] batch [520/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.9942 (1.1499) lr 1.0000e-03 eta 22:14:14
epoch [31/50] batch [540/2000] time 2.027 (2.028) data 0.000 (0.001) loss 1.8668 (1.1545) lr 1.0000e-03 eta 22:13:35
epoch [31/50] batch [560/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.6303 (1.1486) lr 1.0000e-03 eta 22:12:49
epoch [31/50] batch [580/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.5701 (1.1464) lr 1.0000e-03 eta 22:12:08
epoch [31/50] batch [600/2000] time 2.032 (2.028) data 0.001 (0.001) loss 1.4305 (1.1525) lr 1.0000e-03 eta 22:11:37
epoch [31/50] batch [620/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.3440 (1.1550) lr 1.0000e-03 eta 22:11:07
epoch [31/50] batch [640/2000] time 2.000 (2.028) data 0.000 (0.001) loss 1.0512 (1.1469) lr 1.0000e-03 eta 22:10:17
epoch [31/50] batch [660/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.2910 (1.1415) lr 1.0000e-03 eta 22:09:35
epoch [31/50] batch [680/2000] time 1.996 (2.028) data 0.000 (0.001) loss 1.0735 (1.1365) lr 1.0000e-03 eta 22:09:03
epoch [31/50] batch [700/2000] time 1.997 (2.028) data 0.000 (0.001) loss 0.0831 (1.1303) lr 1.0000e-03 eta 22:08:29
epoch [31/50] batch [720/2000] time 2.028 (2.028) data 0.000 (0.001) loss 1.6015 (1.1295) lr 1.0000e-03 eta 22:07:43
epoch [31/50] batch [740/2000] time 1.998 (2.028) data 0.000 (0.001) loss 1.1547 (1.1284) lr 1.0000e-03 eta 22:07:01
epoch [31/50] batch [760/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.7817 (1.1219) lr 1.0000e-03 eta 22:06:20
epoch [31/50] batch [780/2000] time 1.998 (2.028) data 0.000 (0.001) loss 1.6798 (1.1210) lr 1.0000e-03 eta 22:05:45
epoch [31/50] batch [800/2000] time 2.028 (2.028) data 0.000 (0.001) loss 1.2306 (1.1247) lr 1.0000e-03 eta 22:05:12
epoch [31/50] batch [820/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.6416 (1.1258) lr 1.0000e-03 eta 22:04:27
epoch [31/50] batch [840/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.5049 (1.1290) lr 1.0000e-03 eta 22:03:44
epoch [31/50] batch [860/2000] time 2.027 (2.028) data 0.000 (0.001) loss 1.6994 (1.1340) lr 1.0000e-03 eta 22:02:59
epoch [31/50] batch [880/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.9263 (1.1313) lr 1.0000e-03 eta 22:02:23
epoch [31/50] batch [900/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.3642 (1.1361) lr 1.0000e-03 eta 22:01:37
epoch [31/50] batch [920/2000] time 1.997 (2.028) data 0.000 (0.001) loss 3.7346 (1.1446) lr 1.0000e-03 eta 22:00:57
epoch [31/50] batch [940/2000] time 2.029 (2.028) data 0.000 (0.001) loss 0.1063 (1.1552) lr 1.0000e-03 eta 22:00:13
epoch [31/50] batch [960/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.9307 (1.1640) lr 1.0000e-03 eta 21:59:31
epoch [31/50] batch [980/2000] time 2.056 (2.028) data 0.000 (0.001) loss 1.6705 (1.1715) lr 1.0000e-03 eta 21:58:55
epoch [31/50] batch [1000/2000] time 2.051 (2.028) data 0.000 (0.001) loss 2.0600 (1.1740) lr 1.0000e-03 eta 21:58:17
epoch [31/50] batch [1020/2000] time 1.999 (2.028) data 0.000 (0.001) loss 0.6572 (1.1788) lr 1.0000e-03 eta 21:57:38
epoch [31/50] batch [1040/2000] time 1.974 (2.028) data 0.000 (0.001) loss 1.7311 (1.1805) lr 1.0000e-03 eta 21:57:00
epoch [31/50] batch [1060/2000] time 2.002 (2.028) data 0.000 (0.001) loss 2.9351 (1.1886) lr 1.0000e-03 eta 21:56:17
epoch [31/50] batch [1080/2000] time 2.054 (2.028) data 0.000 (0.001) loss 2.0880 (1.1928) lr 1.0000e-03 eta 21:55:38
epoch [31/50] batch [1100/2000] time 2.053 (2.028) data 0.000 (0.001) loss 0.6574 (1.1939) lr 1.0000e-03 eta 21:55:01
epoch [31/50] batch [1120/2000] time 2.053 (2.028) data 0.000 (0.001) loss 1.6629 (1.1927) lr 1.0000e-03 eta 21:54:19
epoch [31/50] batch [1140/2000] time 1.999 (2.028) data 0.001 (0.001) loss 2.0920 (1.1915) lr 1.0000e-03 eta 21:53:37
epoch [31/50] batch [1160/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.4605 (1.1899) lr 1.0000e-03 eta 21:53:01
epoch [31/50] batch [1180/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.9025 (1.1916) lr 1.0000e-03 eta 21:52:19
epoch [31/50] batch [1200/2000] time 2.052 (2.028) data 0.000 (0.001) loss 2.7601 (1.2005) lr 1.0000e-03 eta 21:51:37
epoch [31/50] batch [1220/2000] time 2.030 (2.028) data 0.000 (0.001) loss 1.5222 (1.1955) lr 1.0000e-03 eta 21:51:00
epoch [31/50] batch [1240/2000] time 2.055 (2.028) data 0.000 (0.001) loss 3.4078 (1.1917) lr 1.0000e-03 eta 21:50:18
epoch [31/50] batch [1260/2000] time 2.029 (2.028) data 0.000 (0.001) loss 2.6179 (1.1971) lr 1.0000e-03 eta 21:49:41
epoch [31/50] batch [1280/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.8830 (1.1951) lr 1.0000e-03 eta 21:48:59
epoch [31/50] batch [1300/2000] time 2.035 (2.028) data 0.000 (0.001) loss 2.0593 (1.1980) lr 1.0000e-03 eta 21:48:15
epoch [31/50] batch [1320/2000] time 2.001 (2.028) data 0.000 (0.001) loss 0.3428 (1.1982) lr 1.0000e-03 eta 21:47:37
epoch [31/50] batch [1340/2000] time 2.053 (2.028) data 0.000 (0.001) loss 2.1107 (1.2019) lr 1.0000e-03 eta 21:46:59
epoch [31/50] batch [1360/2000] time 1.998 (2.028) data 0.000 (0.001) loss 1.8427 (1.2070) lr 1.0000e-03 eta 21:46:18
epoch [31/50] batch [1380/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.6454 (1.2059) lr 1.0000e-03 eta 21:45:41
epoch [31/50] batch [1400/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.1125 (1.2087) lr 1.0000e-03 eta 21:45:03
epoch [31/50] batch [1420/2000] time 2.032 (2.029) data 0.000 (0.001) loss 0.4404 (1.2095) lr 1.0000e-03 eta 21:44:22
epoch [31/50] batch [1440/2000] time 2.000 (2.029) data 0.000 (0.001) loss 1.2728 (1.2072) lr 1.0000e-03 eta 21:43:47
epoch [31/50] batch [1460/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.9504 (1.2051) lr 1.0000e-03 eta 21:43:05
epoch [31/50] batch [1480/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.4607 (1.2093) lr 1.0000e-03 eta 21:42:24
epoch [31/50] batch [1500/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.9548 (1.2083) lr 1.0000e-03 eta 21:41:43
epoch [31/50] batch [1520/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.0316 (1.2088) lr 1.0000e-03 eta 21:41:04
epoch [31/50] batch [1540/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.0441 (1.2077) lr 1.0000e-03 eta 21:40:23
epoch [31/50] batch [1560/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.0371 (1.2061) lr 1.0000e-03 eta 21:39:43
epoch [31/50] batch [1580/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.7802 (1.2043) lr 1.0000e-03 eta 21:39:05
epoch [31/50] batch [1600/2000] time 2.029 (2.029) data 0.000 (0.001) loss 3.2663 (1.2064) lr 1.0000e-03 eta 21:38:23
epoch [31/50] batch [1620/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.3537 (1.2098) lr 1.0000e-03 eta 21:37:41
epoch [31/50] batch [1640/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.0470 (1.2092) lr 1.0000e-03 eta 21:37:04
epoch [31/50] batch [1660/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.3850 (1.2110) lr 1.0000e-03 eta 21:36:22
epoch [31/50] batch [1680/2000] time 2.029 (2.029) data 0.001 (0.001) loss 0.7576 (1.2088) lr 1.0000e-03 eta 21:35:39
epoch [31/50] batch [1700/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.8061 (1.2087) lr 1.0000e-03 eta 21:35:03
epoch [31/50] batch [1720/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.3352 (1.2099) lr 1.0000e-03 eta 21:34:24
epoch [31/50] batch [1740/2000] time 2.055 (2.029) data 0.000 (0.001) loss 0.2779 (1.2071) lr 1.0000e-03 eta 21:33:46
epoch [31/50] batch [1760/2000] time 2.030 (2.029) data 0.000 (0.000) loss 2.2944 (1.2111) lr 1.0000e-03 eta 21:33:03
epoch [31/50] batch [1780/2000] time 2.052 (2.029) data 0.000 (0.000) loss 0.4980 (1.2104) lr 1.0000e-03 eta 21:32:23
epoch [31/50] batch [1800/2000] time 1.998 (2.029) data 0.000 (0.000) loss 1.3451 (1.2142) lr 1.0000e-03 eta 21:31:41
epoch [31/50] batch [1820/2000] time 1.997 (2.029) data 0.000 (0.000) loss 1.6989 (1.2135) lr 1.0000e-03 eta 21:31:01
epoch [31/50] batch [1840/2000] time 2.053 (2.029) data 0.000 (0.000) loss 1.7890 (1.2136) lr 1.0000e-03 eta 21:30:20
epoch [31/50] batch [1860/2000] time 2.029 (2.029) data 0.000 (0.000) loss 0.0187 (1.2129) lr 1.0000e-03 eta 21:29:42
epoch [31/50] batch [1880/2000] time 2.001 (2.029) data 0.000 (0.000) loss 0.4733 (1.2117) lr 1.0000e-03 eta 21:29:03
epoch [31/50] batch [1900/2000] time 2.054 (2.029) data 0.000 (0.000) loss 1.5202 (1.2128) lr 1.0000e-03 eta 21:28:23
epoch [31/50] batch [1920/2000] time 2.056 (2.029) data 0.000 (0.000) loss 3.8264 (1.2174) lr 1.0000e-03 eta 21:27:44
epoch [31/50] batch [1940/2000] time 2.054 (2.029) data 0.000 (0.000) loss 4.8949 (1.2174) lr 1.0000e-03 eta 21:27:04
epoch [31/50] batch [1960/2000] time 2.055 (2.029) data 0.000 (0.000) loss 0.5625 (1.2151) lr 1.0000e-03 eta 21:26:23
epoch [31/50] batch [1980/2000] time 2.000 (2.029) data 0.000 (0.000) loss 0.8170 (1.2184) lr 1.0000e-03 eta 21:25:44
epoch [31/50] batch [2000/2000] time 2.028 (2.029) data 0.000 (0.000) loss 2.9815 (1.2155) lr 9.3721e-04 eta 21:25:05
epoch [32/50] batch [20/2000] time 2.001 (2.060) data 0.000 (0.027) loss 0.9489 (0.9524) lr 9.3721e-04 eta 21:43:40
epoch [32/50] batch [40/2000] time 2.030 (2.048) data 0.000 (0.013) loss 0.0574 (0.9915) lr 9.3721e-04 eta 21:35:48
epoch [32/50] batch [60/2000] time 2.060 (2.045) data 0.001 (0.009) loss 1.6854 (1.0760) lr 9.3721e-04 eta 21:32:53
epoch [32/50] batch [80/2000] time 2.034 (2.040) data 0.000 (0.007) loss 0.8876 (1.1102) lr 9.3721e-04 eta 21:29:26
epoch [32/50] batch [100/2000] time 2.001 (2.039) data 0.000 (0.006) loss 0.4893 (1.1298) lr 9.3721e-04 eta 21:28:05
epoch [32/50] batch [120/2000] time 2.058 (2.038) data 0.000 (0.005) loss 1.1535 (1.1054) lr 9.3721e-04 eta 21:26:31
epoch [32/50] batch [140/2000] time 2.002 (2.036) data 0.000 (0.004) loss 1.4007 (1.1419) lr 9.3721e-04 eta 21:24:52
epoch [32/50] batch [160/2000] time 2.058 (2.036) data 0.000 (0.004) loss 0.8151 (1.1591) lr 9.3721e-04 eta 21:24:03
epoch [32/50] batch [180/2000] time 1.982 (2.036) data 0.000 (0.003) loss 0.4143 (1.1615) lr 9.3721e-04 eta 21:23:15
epoch [32/50] batch [200/2000] time 2.057 (2.036) data 0.000 (0.003) loss 0.8804 (1.1593) lr 9.3721e-04 eta 21:22:49
epoch [32/50] batch [220/2000] time 2.009 (2.037) data 0.000 (0.003) loss 0.7239 (1.1457) lr 9.3721e-04 eta 21:22:21
epoch [32/50] batch [240/2000] time 2.060 (2.037) data 0.000 (0.002) loss 0.6861 (1.1386) lr 9.3721e-04 eta 21:22:00
epoch [32/50] batch [260/2000] time 2.035 (2.037) data 0.000 (0.002) loss 1.5302 (1.1403) lr 9.3721e-04 eta 21:21:30
epoch [32/50] batch [280/2000] time 2.033 (2.038) data 0.000 (0.002) loss 1.1762 (1.1349) lr 9.3721e-04 eta 21:21:08
epoch [32/50] batch [300/2000] time 2.001 (2.037) data 0.000 (0.002) loss 0.2478 (1.1354) lr 9.3721e-04 eta 21:19:59
epoch [32/50] batch [320/2000] time 2.032 (2.037) data 0.000 (0.002) loss 0.7350 (1.1419) lr 9.3721e-04 eta 21:19:01
epoch [32/50] batch [340/2000] time 2.028 (2.037) data 0.000 (0.002) loss 2.7990 (1.1379) lr 9.3721e-04 eta 21:18:14
epoch [32/50] batch [360/2000] time 2.055 (2.036) data 0.000 (0.002) loss 1.5237 (1.1416) lr 9.3721e-04 eta 21:17:25
epoch [32/50] batch [380/2000] time 2.052 (2.036) data 0.000 (0.002) loss 0.9310 (1.1443) lr 9.3721e-04 eta 21:16:42
epoch [32/50] batch [400/2000] time 2.052 (2.036) data 0.000 (0.002) loss 2.5390 (1.1380) lr 9.3721e-04 eta 21:15:58
epoch [32/50] batch [420/2000] time 2.052 (2.036) data 0.000 (0.001) loss 0.7678 (1.1445) lr 9.3721e-04 eta 21:14:58
epoch [32/50] batch [440/2000] time 2.051 (2.035) data 0.000 (0.001) loss 0.5102 (1.1491) lr 9.3721e-04 eta 21:14:07
epoch [32/50] batch [460/2000] time 1.999 (2.035) data 0.000 (0.001) loss 0.1126 (1.1549) lr 9.3721e-04 eta 21:13:10
epoch [32/50] batch [480/2000] time 2.052 (2.035) data 0.000 (0.001) loss 0.6897 (1.1569) lr 9.3721e-04 eta 21:12:24
epoch [32/50] batch [500/2000] time 1.977 (2.034) data 0.000 (0.001) loss 2.4192 (1.1629) lr 9.3721e-04 eta 21:11:31
epoch [32/50] batch [520/2000] time 2.030 (2.034) data 0.000 (0.001) loss 1.0562 (1.1543) lr 9.3721e-04 eta 21:10:42
epoch [32/50] batch [540/2000] time 1.976 (2.034) data 0.000 (0.001) loss 0.0737 (1.1554) lr 9.3721e-04 eta 21:09:52
epoch [32/50] batch [560/2000] time 2.053 (2.034) data 0.000 (0.001) loss 0.8722 (1.1549) lr 9.3721e-04 eta 21:09:10
epoch [32/50] batch [580/2000] time 2.000 (2.034) data 0.000 (0.001) loss 0.4575 (1.1579) lr 9.3721e-04 eta 21:08:29
epoch [32/50] batch [600/2000] time 1.999 (2.034) data 0.001 (0.001) loss 2.7072 (1.1639) lr 9.3721e-04 eta 21:07:46
epoch [32/50] batch [620/2000] time 2.001 (2.034) data 0.000 (0.001) loss 0.2376 (1.1674) lr 9.3721e-04 eta 21:06:54
epoch [32/50] batch [640/2000] time 2.036 (2.033) data 0.000 (0.001) loss 0.1033 (1.1581) lr 9.3721e-04 eta 21:06:10
epoch [32/50] batch [660/2000] time 2.060 (2.034) data 0.000 (0.001) loss 2.4607 (1.1629) lr 9.3721e-04 eta 21:05:30
epoch [32/50] batch [680/2000] time 2.036 (2.034) data 0.000 (0.001) loss 1.9366 (1.1640) lr 9.3721e-04 eta 21:04:51
epoch [32/50] batch [700/2000] time 2.060 (2.034) data 0.000 (0.001) loss 1.1921 (1.1638) lr 9.3721e-04 eta 21:04:11
epoch [32/50] batch [720/2000] time 2.037 (2.034) data 0.000 (0.001) loss 2.7608 (1.1586) lr 9.3721e-04 eta 21:03:33
epoch [32/50] batch [740/2000] time 2.058 (2.034) data 0.000 (0.001) loss 0.9569 (1.1567) lr 9.3721e-04 eta 21:03:02
epoch [32/50] batch [760/2000] time 2.030 (2.034) data 0.000 (0.001) loss 2.3266 (1.1575) lr 9.3721e-04 eta 21:02:18
epoch [32/50] batch [780/2000] time 2.028 (2.034) data 0.000 (0.001) loss 1.1543 (1.1595) lr 9.3721e-04 eta 21:01:33
epoch [32/50] batch [800/2000] time 1.995 (2.033) data 0.000 (0.001) loss 2.7181 (1.1650) lr 9.3721e-04 eta 21:00:44
epoch [32/50] batch [820/2000] time 2.047 (2.033) data 0.000 (0.001) loss 1.2773 (1.1576) lr 9.3721e-04 eta 20:59:58
epoch [32/50] batch [840/2000] time 2.052 (2.033) data 0.000 (0.001) loss 1.0629 (1.1560) lr 9.3721e-04 eta 20:59:15
epoch [32/50] batch [860/2000] time 2.032 (2.033) data 0.000 (0.001) loss 0.7848 (1.1554) lr 9.3721e-04 eta 20:58:33
epoch [32/50] batch [880/2000] time 2.031 (2.033) data 0.000 (0.001) loss 1.0404 (1.1545) lr 9.3721e-04 eta 20:57:54
epoch [32/50] batch [900/2000] time 2.031 (2.033) data 0.000 (0.001) loss 2.0528 (1.1553) lr 9.3721e-04 eta 20:57:12
epoch [32/50] batch [920/2000] time 2.032 (2.033) data 0.000 (0.001) loss 1.9199 (1.1583) lr 9.3721e-04 eta 20:56:23
epoch [32/50] batch [940/2000] time 2.033 (2.033) data 0.000 (0.001) loss 2.8023 (1.1618) lr 9.3721e-04 eta 20:55:39
epoch [32/50] batch [960/2000] time 2.031 (2.033) data 0.000 (0.001) loss 1.1398 (1.1594) lr 9.3721e-04 eta 20:54:57
epoch [32/50] batch [980/2000] time 2.032 (2.033) data 0.000 (0.001) loss 1.7002 (1.1566) lr 9.3721e-04 eta 20:54:09
epoch [32/50] batch [1000/2000] time 2.053 (2.033) data 0.000 (0.001) loss 0.4980 (1.1599) lr 9.3721e-04 eta 20:53:28
epoch [32/50] batch [1020/2000] time 2.057 (2.033) data 0.000 (0.001) loss 1.8721 (1.1596) lr 9.3721e-04 eta 20:52:51
epoch [32/50] batch [1040/2000] time 2.051 (2.033) data 0.000 (0.001) loss 0.8497 (1.1598) lr 9.3721e-04 eta 20:52:08
epoch [32/50] batch [1060/2000] time 2.028 (2.033) data 0.000 (0.001) loss 0.9711 (1.1646) lr 9.3721e-04 eta 20:51:24
epoch [32/50] batch [1080/2000] time 1.975 (2.033) data 0.000 (0.001) loss 0.4294 (1.1651) lr 9.3721e-04 eta 20:50:46
epoch [32/50] batch [1100/2000] time 2.030 (2.033) data 0.000 (0.001) loss 1.2085 (1.1680) lr 9.3721e-04 eta 20:50:00
epoch [32/50] batch [1120/2000] time 2.051 (2.033) data 0.000 (0.001) loss 1.6134 (1.1696) lr 9.3721e-04 eta 20:49:19
epoch [32/50] batch [1140/2000] time 2.053 (2.033) data 0.000 (0.001) loss 1.7894 (1.1714) lr 9.3721e-04 eta 20:48:37
epoch [32/50] batch [1160/2000] time 2.052 (2.032) data 0.000 (0.001) loss 2.8638 (1.1733) lr 9.3721e-04 eta 20:47:51
epoch [32/50] batch [1180/2000] time 2.025 (2.032) data 0.000 (0.001) loss 1.1066 (1.1792) lr 9.3721e-04 eta 20:47:06
epoch [32/50] batch [1200/2000] time 2.026 (2.032) data 0.000 (0.001) loss 2.7729 (1.1821) lr 9.3721e-04 eta 20:46:21
epoch [32/50] batch [1220/2000] time 2.001 (2.032) data 0.000 (0.001) loss 1.7626 (1.1807) lr 9.3721e-04 eta 20:45:34
epoch [32/50] batch [1240/2000] time 2.059 (2.032) data 0.000 (0.001) loss 1.3846 (1.1846) lr 9.3721e-04 eta 20:44:53
epoch [32/50] batch [1260/2000] time 2.055 (2.032) data 0.000 (0.001) loss 0.2492 (1.1843) lr 9.3721e-04 eta 20:44:08
epoch [32/50] batch [1280/2000] time 2.054 (2.032) data 0.000 (0.001) loss 0.9892 (1.1811) lr 9.3721e-04 eta 20:43:30
epoch [32/50] batch [1300/2000] time 2.031 (2.032) data 0.000 (0.001) loss 0.0858 (1.1813) lr 9.3721e-04 eta 20:42:47
epoch [32/50] batch [1320/2000] time 2.002 (2.032) data 0.000 (0.001) loss 0.5697 (1.1904) lr 9.3721e-04 eta 20:42:03
epoch [32/50] batch [1340/2000] time 2.030 (2.032) data 0.000 (0.001) loss 0.9440 (1.1924) lr 9.3721e-04 eta 20:41:22
epoch [32/50] batch [1360/2000] time 2.052 (2.032) data 0.000 (0.001) loss 2.2120 (1.1904) lr 9.3721e-04 eta 20:40:43
epoch [32/50] batch [1380/2000] time 2.050 (2.032) data 0.000 (0.001) loss 1.2905 (1.1884) lr 9.3721e-04 eta 20:40:03
epoch [32/50] batch [1400/2000] time 1.999 (2.032) data 0.000 (0.001) loss 1.1089 (1.1909) lr 9.3721e-04 eta 20:39:24
epoch [32/50] batch [1420/2000] time 2.050 (2.032) data 0.000 (0.001) loss 0.0224 (1.1930) lr 9.3721e-04 eta 20:38:47
epoch [32/50] batch [1440/2000] time 2.048 (2.032) data 0.000 (0.001) loss 0.5042 (1.1945) lr 9.3721e-04 eta 20:38:08
epoch [32/50] batch [1460/2000] time 2.051 (2.032) data 0.000 (0.001) loss 1.7934 (1.2008) lr 9.3721e-04 eta 20:37:26
epoch [32/50] batch [1480/2000] time 2.051 (2.032) data 0.000 (0.001) loss 0.7825 (1.2012) lr 9.3721e-04 eta 20:36:41
epoch [32/50] batch [1500/2000] time 2.051 (2.032) data 0.000 (0.001) loss 0.6161 (1.2011) lr 9.3721e-04 eta 20:35:56
epoch [32/50] batch [1520/2000] time 1.995 (2.032) data 0.000 (0.001) loss 0.2936 (1.1999) lr 9.3721e-04 eta 20:35:15
epoch [32/50] batch [1540/2000] time 2.053 (2.032) data 0.000 (0.001) loss 0.7390 (1.2009) lr 9.3721e-04 eta 20:34:35
epoch [32/50] batch [1560/2000] time 2.056 (2.032) data 0.000 (0.001) loss 1.1303 (1.1991) lr 9.3721e-04 eta 20:33:54
epoch [32/50] batch [1580/2000] time 2.051 (2.032) data 0.000 (0.001) loss 2.3355 (1.2013) lr 9.3721e-04 eta 20:33:12
epoch [32/50] batch [1600/2000] time 1.999 (2.032) data 0.000 (0.001) loss 2.6438 (1.2015) lr 9.3721e-04 eta 20:32:31
epoch [32/50] batch [1620/2000] time 2.057 (2.032) data 0.000 (0.001) loss 0.7164 (1.1989) lr 9.3721e-04 eta 20:31:50
epoch [32/50] batch [1640/2000] time 2.055 (2.032) data 0.000 (0.001) loss 0.4186 (1.1964) lr 9.3721e-04 eta 20:31:10
epoch [32/50] batch [1660/2000] time 2.055 (2.032) data 0.000 (0.001) loss 0.9944 (1.1930) lr 9.3721e-04 eta 20:30:30
epoch [32/50] batch [1680/2000] time 2.052 (2.032) data 0.001 (0.001) loss 1.4192 (1.1934) lr 9.3721e-04 eta 20:29:49
epoch [32/50] batch [1700/2000] time 1.998 (2.032) data 0.000 (0.001) loss 0.5535 (1.1929) lr 9.3721e-04 eta 20:29:08
epoch [32/50] batch [1720/2000] time 2.031 (2.031) data 0.000 (0.001) loss 0.2418 (1.1925) lr 9.3721e-04 eta 20:28:21
epoch [32/50] batch [1740/2000] time 2.054 (2.032) data 0.000 (0.001) loss 1.8677 (1.1930) lr 9.3721e-04 eta 20:27:42
epoch [32/50] batch [1760/2000] time 2.052 (2.032) data 0.000 (0.001) loss 0.2381 (1.1915) lr 9.3721e-04 eta 20:27:04
epoch [32/50] batch [1780/2000] time 1.996 (2.032) data 0.000 (0.001) loss 2.1341 (1.1903) lr 9.3721e-04 eta 20:26:20
epoch [32/50] batch [1800/2000] time 2.026 (2.031) data 0.000 (0.001) loss 0.7424 (1.1936) lr 9.3721e-04 eta 20:25:37
epoch [32/50] batch [1820/2000] time 2.027 (2.031) data 0.000 (0.001) loss 2.0179 (1.1944) lr 9.3721e-04 eta 20:24:54
epoch [32/50] batch [1840/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.4162 (1.1926) lr 9.3721e-04 eta 20:24:11
epoch [32/50] batch [1860/2000] time 1.976 (2.031) data 0.000 (0.001) loss 0.4587 (1.1932) lr 9.3721e-04 eta 20:23:28
epoch [32/50] batch [1880/2000] time 2.052 (2.031) data 0.000 (0.000) loss 1.1679 (1.1934) lr 9.3721e-04 eta 20:22:49
epoch [32/50] batch [1900/2000] time 2.029 (2.031) data 0.000 (0.000) loss 0.8295 (1.1973) lr 9.3721e-04 eta 20:22:08
epoch [32/50] batch [1920/2000] time 2.026 (2.031) data 0.000 (0.000) loss 1.1969 (1.1960) lr 9.3721e-04 eta 20:21:26
epoch [32/50] batch [1940/2000] time 2.000 (2.031) data 0.000 (0.000) loss 1.6110 (1.1967) lr 9.3721e-04 eta 20:20:39
epoch [32/50] batch [1960/2000] time 2.002 (2.031) data 0.000 (0.000) loss 2.8133 (1.2003) lr 9.3721e-04 eta 20:19:58
epoch [32/50] batch [1980/2000] time 2.057 (2.031) data 0.000 (0.000) loss 1.6914 (1.1980) lr 9.3721e-04 eta 20:19:20
epoch [32/50] batch [2000/2000] time 2.030 (2.031) data 0.000 (0.000) loss 2.3563 (1.2035) lr 8.7467e-04 eta 20:18:39
epoch [33/50] batch [20/2000] time 2.054 (2.064) data 0.000 (0.027) loss 0.7929 (1.5770) lr 8.7467e-04 eta 20:37:41
epoch [33/50] batch [40/2000] time 1.999 (2.048) data 0.000 (0.013) loss 2.2316 (1.4495) lr 8.7467e-04 eta 20:27:42
epoch [33/50] batch [60/2000] time 2.001 (2.041) data 0.001 (0.009) loss 2.4199 (1.3179) lr 8.7467e-04 eta 20:22:43
epoch [33/50] batch [80/2000] time 2.036 (2.038) data 0.000 (0.007) loss 0.6058 (1.2083) lr 8.7467e-04 eta 20:19:50
epoch [33/50] batch [100/2000] time 2.028 (2.036) data 0.000 (0.006) loss 3.3649 (1.1877) lr 8.7467e-04 eta 20:18:15
epoch [33/50] batch [120/2000] time 2.028 (2.034) data 0.000 (0.005) loss 0.2468 (1.1413) lr 8.7467e-04 eta 20:16:06
epoch [33/50] batch [140/2000] time 1.998 (2.033) data 0.000 (0.004) loss 0.6973 (1.1488) lr 8.7467e-04 eta 20:15:12
epoch [33/50] batch [160/2000] time 1.997 (2.033) data 0.000 (0.004) loss 3.5423 (1.1910) lr 8.7467e-04 eta 20:14:37
epoch [33/50] batch [180/2000] time 2.029 (2.033) data 0.000 (0.003) loss 0.5488 (1.1918) lr 8.7467e-04 eta 20:13:46
epoch [33/50] batch [200/2000] time 2.028 (2.033) data 0.000 (0.003) loss 0.3233 (1.1967) lr 8.7467e-04 eta 20:13:03
epoch [33/50] batch [220/2000] time 2.055 (2.033) data 0.000 (0.003) loss 3.3069 (1.2077) lr 8.7467e-04 eta 20:12:21
epoch [33/50] batch [240/2000] time 2.031 (2.033) data 0.000 (0.002) loss 0.3573 (1.2404) lr 8.7467e-04 eta 20:11:44
epoch [33/50] batch [260/2000] time 2.053 (2.033) data 0.000 (0.002) loss 1.1774 (1.2447) lr 8.7467e-04 eta 20:11:07
epoch [33/50] batch [280/2000] time 1.973 (2.033) data 0.000 (0.002) loss 0.2807 (1.2424) lr 8.7467e-04 eta 20:10:05
epoch [33/50] batch [300/2000] time 1.976 (2.033) data 0.000 (0.002) loss 0.9797 (1.2462) lr 8.7467e-04 eta 20:09:26
epoch [33/50] batch [320/2000] time 1.974 (2.033) data 0.000 (0.002) loss 1.5360 (1.2397) lr 8.7467e-04 eta 20:08:50
epoch [33/50] batch [340/2000] time 2.026 (2.032) data 0.000 (0.002) loss 0.5514 (1.2296) lr 8.7467e-04 eta 20:07:54
epoch [33/50] batch [360/2000] time 2.052 (2.032) data 0.000 (0.002) loss 1.6170 (1.2408) lr 8.7467e-04 eta 20:07:12
epoch [33/50] batch [380/2000] time 2.050 (2.032) data 0.000 (0.002) loss 0.7318 (1.2341) lr 8.7467e-04 eta 20:06:19
epoch [33/50] batch [400/2000] time 2.052 (2.032) data 0.000 (0.002) loss 0.7757 (1.2170) lr 8.7467e-04 eta 20:05:35
epoch [33/50] batch [420/2000] time 1.995 (2.031) data 0.000 (0.001) loss 0.8824 (1.2042) lr 8.7467e-04 eta 20:04:39
epoch [33/50] batch [440/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.2892 (1.1988) lr 8.7467e-04 eta 20:03:55
epoch [33/50] batch [460/2000] time 1.995 (2.031) data 0.000 (0.001) loss 0.5236 (1.2074) lr 8.7467e-04 eta 20:03:19
epoch [33/50] batch [480/2000] time 2.056 (2.031) data 0.000 (0.001) loss 0.7525 (1.2154) lr 8.7467e-04 eta 20:02:34
epoch [33/50] batch [500/2000] time 2.028 (2.031) data 0.000 (0.001) loss 1.2508 (1.2087) lr 8.7467e-04 eta 20:01:50
epoch [33/50] batch [520/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.7370 (1.2206) lr 8.7467e-04 eta 20:01:09
epoch [33/50] batch [540/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.7423 (1.2237) lr 8.7467e-04 eta 20:00:35
epoch [33/50] batch [560/2000] time 2.054 (2.031) data 0.000 (0.001) loss 2.3936 (1.2414) lr 8.7467e-04 eta 19:59:52
epoch [33/50] batch [580/2000] time 2.028 (2.031) data 0.000 (0.001) loss 1.4218 (1.2381) lr 8.7467e-04 eta 19:59:08
epoch [33/50] batch [600/2000] time 2.052 (2.031) data 0.001 (0.001) loss 0.8491 (1.2357) lr 8.7467e-04 eta 19:58:30
epoch [33/50] batch [620/2000] time 2.028 (2.031) data 0.000 (0.001) loss 1.8154 (1.2269) lr 8.7467e-04 eta 19:57:45
epoch [33/50] batch [640/2000] time 2.050 (2.031) data 0.000 (0.001) loss 1.3278 (1.2128) lr 8.7467e-04 eta 19:56:58
epoch [33/50] batch [660/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.3746 (1.2114) lr 8.7467e-04 eta 19:56:11
epoch [33/50] batch [680/2000] time 2.030 (2.031) data 0.000 (0.001) loss 2.0331 (1.2048) lr 8.7467e-04 eta 19:55:34
epoch [33/50] batch [700/2000] time 2.000 (2.031) data 0.000 (0.001) loss 2.1484 (1.2005) lr 8.7467e-04 eta 19:54:42
epoch [33/50] batch [720/2000] time 2.054 (2.031) data 0.000 (0.001) loss 0.3442 (1.1950) lr 8.7467e-04 eta 19:53:58
epoch [33/50] batch [740/2000] time 2.027 (2.031) data 0.000 (0.001) loss 1.9429 (1.2088) lr 8.7467e-04 eta 19:53:15
epoch [33/50] batch [760/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.0382 (1.2175) lr 8.7467e-04 eta 19:52:30
epoch [33/50] batch [780/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.2015 (1.2143) lr 8.7467e-04 eta 19:51:46
epoch [33/50] batch [800/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.8691 (1.2080) lr 8.7467e-04 eta 19:50:55
epoch [33/50] batch [820/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.8632 (1.2179) lr 8.7467e-04 eta 19:50:06
epoch [33/50] batch [840/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.2132 (1.2230) lr 8.7467e-04 eta 19:49:21
epoch [33/50] batch [860/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.7605 (1.2190) lr 8.7467e-04 eta 19:48:34
epoch [33/50] batch [880/2000] time 1.973 (2.029) data 0.000 (0.001) loss 0.3642 (1.2167) lr 8.7467e-04 eta 19:47:52
epoch [33/50] batch [900/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.8019 (1.2155) lr 8.7467e-04 eta 19:47:13
epoch [33/50] batch [920/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.3264 (1.2063) lr 8.7467e-04 eta 19:46:31
epoch [33/50] batch [940/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.0621 (1.1994) lr 8.7467e-04 eta 19:45:51
epoch [33/50] batch [960/2000] time 2.049 (2.029) data 0.000 (0.001) loss 2.5739 (1.1986) lr 8.7467e-04 eta 19:45:10
epoch [33/50] batch [980/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.1703 (1.1967) lr 8.7467e-04 eta 19:44:31
epoch [33/50] batch [1000/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.8759 (1.1951) lr 8.7467e-04 eta 19:43:53
epoch [33/50] batch [1020/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.9542 (1.1977) lr 8.7467e-04 eta 19:43:13
epoch [33/50] batch [1040/2000] time 2.028 (2.029) data 0.000 (0.001) loss 2.7415 (1.2004) lr 8.7467e-04 eta 19:42:26
epoch [33/50] batch [1060/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.4139 (1.1998) lr 8.7467e-04 eta 19:41:44
epoch [33/50] batch [1080/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.1121 (1.2020) lr 8.7467e-04 eta 19:41:02
epoch [33/50] batch [1100/2000] time 2.047 (2.029) data 0.000 (0.001) loss 0.8102 (1.1995) lr 8.7467e-04 eta 19:40:19
epoch [33/50] batch [1120/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.9373 (1.2005) lr 8.7467e-04 eta 19:39:39
epoch [33/50] batch [1140/2000] time 1.999 (2.029) data 0.001 (0.001) loss 1.5022 (1.2033) lr 8.7467e-04 eta 19:38:55
epoch [33/50] batch [1160/2000] time 2.004 (2.029) data 0.000 (0.001) loss 0.7867 (1.2032) lr 8.7467e-04 eta 19:38:14
epoch [33/50] batch [1180/2000] time 2.001 (2.029) data 0.000 (0.001) loss 1.0340 (1.2057) lr 8.7467e-04 eta 19:37:35
epoch [33/50] batch [1200/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.6334 (1.2009) lr 8.7467e-04 eta 19:36:53
epoch [33/50] batch [1220/2000] time 2.030 (2.029) data 0.000 (0.001) loss 1.9822 (1.1967) lr 8.7467e-04 eta 19:36:12
epoch [33/50] batch [1240/2000] time 2.003 (2.029) data 0.000 (0.001) loss 0.3413 (1.2037) lr 8.7467e-04 eta 19:35:35
epoch [33/50] batch [1260/2000] time 2.035 (2.029) data 0.000 (0.001) loss 1.2492 (1.2025) lr 8.7467e-04 eta 19:34:54
epoch [33/50] batch [1280/2000] time 2.032 (2.029) data 0.000 (0.001) loss 0.5243 (1.1998) lr 8.7467e-04 eta 19:34:12
epoch [33/50] batch [1300/2000] time 2.049 (2.029) data 0.000 (0.001) loss 2.3896 (1.2019) lr 8.7467e-04 eta 19:33:31
epoch [33/50] batch [1320/2000] time 1.974 (2.029) data 0.000 (0.001) loss 4.0857 (1.2085) lr 8.7467e-04 eta 19:32:51
epoch [33/50] batch [1340/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.2513 (1.2044) lr 8.7467e-04 eta 19:32:15
epoch [33/50] batch [1360/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.2865 (1.2069) lr 8.7467e-04 eta 19:31:34
epoch [33/50] batch [1380/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.5198 (1.2053) lr 8.7467e-04 eta 19:30:55
epoch [33/50] batch [1400/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.6026 (1.2058) lr 8.7467e-04 eta 19:30:14
epoch [33/50] batch [1420/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.2782 (1.2088) lr 8.7467e-04 eta 19:29:32
epoch [33/50] batch [1440/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.9682 (1.2121) lr 8.7467e-04 eta 19:28:50
epoch [33/50] batch [1460/2000] time 2.026 (2.029) data 0.000 (0.001) loss 2.0117 (1.2152) lr 8.7467e-04 eta 19:28:13
epoch [33/50] batch [1480/2000] time 2.051 (2.029) data 0.000 (0.001) loss 2.3005 (1.2167) lr 8.7467e-04 eta 19:27:33
epoch [33/50] batch [1500/2000] time 1.999 (2.029) data 0.000 (0.001) loss 1.5229 (1.2203) lr 8.7467e-04 eta 19:26:48
epoch [33/50] batch [1520/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.1287 (1.2198) lr 8.7467e-04 eta 19:26:09
epoch [33/50] batch [1540/2000] time 2.029 (2.029) data 0.000 (0.001) loss 2.1084 (1.2222) lr 8.7467e-04 eta 19:25:29
epoch [33/50] batch [1560/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.4147 (1.2226) lr 8.7467e-04 eta 19:24:45
epoch [33/50] batch [1580/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.1579 (1.2212) lr 8.7467e-04 eta 19:24:06
epoch [33/50] batch [1600/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.5511 (1.2230) lr 8.7467e-04 eta 19:23:25
epoch [33/50] batch [1620/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.7648 (1.2231) lr 8.7467e-04 eta 19:22:47
epoch [33/50] batch [1640/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.8308 (1.2286) lr 8.7467e-04 eta 19:22:04
epoch [33/50] batch [1660/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.8095 (1.2313) lr 8.7467e-04 eta 19:21:22
epoch [33/50] batch [1680/2000] time 2.048 (2.029) data 0.001 (0.001) loss 0.3203 (1.2329) lr 8.7467e-04 eta 19:20:43
epoch [33/50] batch [1700/2000] time 2.051 (2.029) data 0.000 (0.001) loss 2.3462 (1.2319) lr 8.7467e-04 eta 19:20:02
epoch [33/50] batch [1720/2000] time 2.058 (2.029) data 0.000 (0.001) loss 1.6621 (1.2371) lr 8.7467e-04 eta 19:19:23
epoch [33/50] batch [1740/2000] time 2.034 (2.029) data 0.000 (0.001) loss 0.1154 (1.2389) lr 8.7467e-04 eta 19:18:43
epoch [33/50] batch [1760/2000] time 1.980 (2.029) data 0.000 (0.001) loss 2.2923 (1.2386) lr 8.7467e-04 eta 19:18:04
epoch [33/50] batch [1780/2000] time 2.056 (2.029) data 0.000 (0.000) loss 0.7167 (1.2401) lr 8.7467e-04 eta 19:17:26
epoch [33/50] batch [1800/2000] time 2.060 (2.030) data 0.000 (0.000) loss 1.2020 (1.2378) lr 8.7467e-04 eta 19:16:50
epoch [33/50] batch [1820/2000] time 2.058 (2.030) data 0.000 (0.000) loss 1.9114 (1.2371) lr 8.7467e-04 eta 19:16:11
epoch [33/50] batch [1840/2000] time 2.053 (2.030) data 0.000 (0.000) loss 1.9136 (1.2361) lr 8.7467e-04 eta 19:15:34
epoch [33/50] batch [1860/2000] time 2.026 (2.030) data 0.000 (0.000) loss 1.2444 (1.2355) lr 8.7467e-04 eta 19:14:52
epoch [33/50] batch [1880/2000] time 2.051 (2.030) data 0.000 (0.000) loss 1.7379 (1.2376) lr 8.7467e-04 eta 19:14:11
epoch [33/50] batch [1900/2000] time 2.049 (2.030) data 0.000 (0.000) loss 1.1816 (1.2409) lr 8.7467e-04 eta 19:13:31
epoch [33/50] batch [1920/2000] time 1.998 (2.030) data 0.000 (0.000) loss 1.2943 (1.2379) lr 8.7467e-04 eta 19:12:50
epoch [33/50] batch [1940/2000] time 2.050 (2.030) data 0.000 (0.000) loss 0.8912 (1.2376) lr 8.7467e-04 eta 19:12:10
epoch [33/50] batch [1960/2000] time 2.026 (2.030) data 0.000 (0.000) loss 0.9168 (1.2339) lr 8.7467e-04 eta 19:11:28
epoch [33/50] batch [1980/2000] time 1.996 (2.030) data 0.000 (0.000) loss 1.8717 (1.2368) lr 8.7467e-04 eta 19:10:48
epoch [33/50] batch [2000/2000] time 2.024 (2.030) data 0.000 (0.000) loss 0.3871 (1.2349) lr 8.1262e-04 eta 19:10:05
epoch [34/50] batch [20/2000] time 2.049 (2.063) data 0.000 (0.027) loss 0.2602 (1.5658) lr 8.1262e-04 eta 19:28:08
epoch [34/50] batch [40/2000] time 2.049 (2.043) data 0.000 (0.014) loss 3.1356 (1.5961) lr 8.1262e-04 eta 19:16:07
epoch [34/50] batch [60/2000] time 1.997 (2.037) data 0.001 (0.009) loss 1.3696 (1.5994) lr 8.1262e-04 eta 19:12:24
epoch [34/50] batch [80/2000] time 2.030 (2.035) data 0.000 (0.007) loss 0.2017 (1.4465) lr 8.1262e-04 eta 19:10:28
epoch [34/50] batch [100/2000] time 2.026 (2.033) data 0.000 (0.006) loss 2.2750 (1.4213) lr 8.1262e-04 eta 19:08:36
epoch [34/50] batch [120/2000] time 1.998 (2.032) data 0.000 (0.005) loss 0.8361 (1.3908) lr 8.1262e-04 eta 19:07:22
epoch [34/50] batch [140/2000] time 2.031 (2.032) data 0.000 (0.004) loss 2.7078 (1.3427) lr 8.1262e-04 eta 19:06:35
epoch [34/50] batch [160/2000] time 2.029 (2.032) data 0.000 (0.004) loss 0.8147 (1.3611) lr 8.1262e-04 eta 19:06:01
epoch [34/50] batch [180/2000] time 2.000 (2.032) data 0.000 (0.003) loss 0.2838 (1.3539) lr 8.1262e-04 eta 19:05:19
epoch [34/50] batch [200/2000] time 2.027 (2.032) data 0.000 (0.003) loss 0.8492 (1.3600) lr 8.1262e-04 eta 19:04:46
epoch [34/50] batch [220/2000] time 2.052 (2.032) data 0.000 (0.003) loss 0.8912 (1.3642) lr 8.1262e-04 eta 19:03:52
epoch [34/50] batch [240/2000] time 2.051 (2.031) data 0.000 (0.002) loss 2.4475 (1.3315) lr 8.1262e-04 eta 19:02:42
epoch [34/50] batch [260/2000] time 2.054 (2.031) data 0.000 (0.002) loss 0.6757 (1.3199) lr 8.1262e-04 eta 19:01:55
epoch [34/50] batch [280/2000] time 1.999 (2.031) data 0.000 (0.002) loss 3.1106 (1.3380) lr 8.1262e-04 eta 19:01:27
epoch [34/50] batch [300/2000] time 1.998 (2.031) data 0.000 (0.002) loss 1.7125 (1.3190) lr 8.1262e-04 eta 19:00:56
epoch [34/50] batch [320/2000] time 1.999 (2.031) data 0.000 (0.002) loss 1.2882 (1.3119) lr 8.1262e-04 eta 19:00:00
epoch [34/50] batch [340/2000] time 2.029 (2.031) data 0.000 (0.002) loss 0.3520 (1.3050) lr 8.1262e-04 eta 18:59:28
epoch [34/50] batch [360/2000] time 2.051 (2.031) data 0.000 (0.002) loss 0.4871 (1.2896) lr 8.1262e-04 eta 18:58:51
epoch [34/50] batch [380/2000] time 2.024 (2.031) data 0.000 (0.002) loss 0.7720 (1.2819) lr 8.1262e-04 eta 18:58:03
epoch [34/50] batch [400/2000] time 2.048 (2.031) data 0.000 (0.002) loss 0.5879 (1.2605) lr 8.1262e-04 eta 18:57:33
epoch [34/50] batch [420/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.9321 (1.2469) lr 8.1262e-04 eta 18:56:54
epoch [34/50] batch [440/2000] time 2.028 (2.031) data 0.000 (0.001) loss 1.0794 (1.2630) lr 8.1262e-04 eta 18:56:06
epoch [34/50] batch [460/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.4702 (1.2582) lr 8.1262e-04 eta 18:55:24
epoch [34/50] batch [480/2000] time 2.052 (2.031) data 0.000 (0.001) loss 2.2587 (1.2612) lr 8.1262e-04 eta 18:54:42
epoch [34/50] batch [500/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.6997 (1.2610) lr 8.1262e-04 eta 18:54:08
epoch [34/50] batch [520/2000] time 1.972 (2.031) data 0.000 (0.001) loss 0.1734 (1.2453) lr 8.1262e-04 eta 18:53:09
epoch [34/50] batch [540/2000] time 1.974 (2.031) data 0.000 (0.001) loss 1.8008 (1.2475) lr 8.1262e-04 eta 18:52:23
epoch [34/50] batch [560/2000] time 2.026 (2.031) data 0.000 (0.001) loss 1.1629 (1.2491) lr 8.1262e-04 eta 18:51:45
epoch [34/50] batch [580/2000] time 1.974 (2.031) data 0.000 (0.001) loss 0.2443 (1.2493) lr 8.1262e-04 eta 18:50:59
epoch [34/50] batch [600/2000] time 1.998 (2.030) data 0.001 (0.001) loss 0.2753 (1.2383) lr 8.1262e-04 eta 18:50:17
epoch [34/50] batch [620/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.8621 (1.2313) lr 8.1262e-04 eta 18:49:33
epoch [34/50] batch [640/2000] time 1.993 (2.030) data 0.000 (0.001) loss 0.3312 (1.2341) lr 8.1262e-04 eta 18:48:42
epoch [34/50] batch [660/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.4037 (1.2319) lr 8.1262e-04 eta 18:47:56
epoch [34/50] batch [680/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.5132 (1.2262) lr 8.1262e-04 eta 18:47:10
epoch [34/50] batch [700/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.4194 (1.2268) lr 8.1262e-04 eta 18:46:32
epoch [34/50] batch [720/2000] time 1.995 (2.030) data 0.000 (0.001) loss 0.5749 (1.2169) lr 8.1262e-04 eta 18:45:50
epoch [34/50] batch [740/2000] time 2.048 (2.030) data 0.000 (0.001) loss 1.2827 (1.2179) lr 8.1262e-04 eta 18:45:13
epoch [34/50] batch [760/2000] time 2.047 (2.030) data 0.000 (0.001) loss 0.3357 (1.2207) lr 8.1262e-04 eta 18:44:37
epoch [34/50] batch [780/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.3031 (1.2253) lr 8.1262e-04 eta 18:43:51
epoch [34/50] batch [800/2000] time 2.001 (2.030) data 0.000 (0.001) loss 0.9411 (1.2177) lr 8.1262e-04 eta 18:43:08
epoch [34/50] batch [820/2000] time 2.032 (2.030) data 0.000 (0.001) loss 1.8014 (1.2129) lr 8.1262e-04 eta 18:42:38
epoch [34/50] batch [840/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.2197 (1.2164) lr 8.1262e-04 eta 18:41:55
epoch [34/50] batch [860/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.8772 (1.2195) lr 8.1262e-04 eta 18:41:21
epoch [34/50] batch [880/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.6718 (1.2223) lr 8.1262e-04 eta 18:40:37
epoch [34/50] batch [900/2000] time 2.000 (2.030) data 0.000 (0.001) loss 1.0996 (1.2238) lr 8.1262e-04 eta 18:39:51
epoch [34/50] batch [920/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.7360 (1.2180) lr 8.1262e-04 eta 18:39:08
epoch [34/50] batch [940/2000] time 1.998 (2.030) data 0.000 (0.001) loss 2.0758 (1.2224) lr 8.1262e-04 eta 18:38:28
epoch [34/50] batch [960/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.5421 (1.2175) lr 8.1262e-04 eta 18:37:49
epoch [34/50] batch [980/2000] time 2.053 (2.030) data 0.000 (0.001) loss 3.2783 (1.2260) lr 8.1262e-04 eta 18:37:08
epoch [34/50] batch [1000/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.3912 (1.2202) lr 8.1262e-04 eta 18:36:24
epoch [34/50] batch [1020/2000] time 2.030 (2.030) data 0.000 (0.001) loss 1.3153 (1.2218) lr 8.1262e-04 eta 18:35:47
epoch [34/50] batch [1040/2000] time 1.975 (2.030) data 0.000 (0.001) loss 1.5362 (1.2178) lr 8.1262e-04 eta 18:35:06
epoch [34/50] batch [1060/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.4957 (1.2187) lr 8.1262e-04 eta 18:34:25
epoch [34/50] batch [1080/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.3424 (1.2173) lr 8.1262e-04 eta 18:33:48
epoch [34/50] batch [1100/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.8823 (1.2183) lr 8.1262e-04 eta 18:33:09
epoch [34/50] batch [1120/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.2610 (1.2189) lr 8.1262e-04 eta 18:32:27
epoch [34/50] batch [1140/2000] time 2.054 (2.030) data 0.001 (0.001) loss 0.3715 (1.2197) lr 8.1262e-04 eta 18:31:49
epoch [34/50] batch [1160/2000] time 2.000 (2.030) data 0.000 (0.001) loss 1.7189 (1.2198) lr 8.1262e-04 eta 18:31:06
epoch [34/50] batch [1180/2000] time 2.032 (2.030) data 0.000 (0.001) loss 4.6702 (1.2299) lr 8.1262e-04 eta 18:30:25
epoch [34/50] batch [1200/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.8386 (1.2328) lr 8.1262e-04 eta 18:29:48
epoch [34/50] batch [1220/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.4849 (1.2286) lr 8.1262e-04 eta 18:29:10
epoch [34/50] batch [1240/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.2523 (1.2292) lr 8.1262e-04 eta 18:28:30
epoch [34/50] batch [1260/2000] time 2.001 (2.030) data 0.000 (0.001) loss 1.7369 (1.2321) lr 8.1262e-04 eta 18:27:51
epoch [34/50] batch [1280/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.5913 (1.2322) lr 8.1262e-04 eta 18:27:07
epoch [34/50] batch [1300/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.9915 (1.2358) lr 8.1262e-04 eta 18:26:27
epoch [34/50] batch [1320/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.8099 (1.2312) lr 8.1262e-04 eta 18:25:41
epoch [34/50] batch [1340/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.2604 (1.2324) lr 8.1262e-04 eta 18:25:02
epoch [34/50] batch [1360/2000] time 2.002 (2.030) data 0.000 (0.001) loss 0.8117 (1.2351) lr 8.1262e-04 eta 18:24:21
epoch [34/50] batch [1380/2000] time 2.029 (2.030) data 0.000 (0.001) loss 2.5794 (1.2353) lr 8.1262e-04 eta 18:23:42
epoch [34/50] batch [1400/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.7131 (1.2368) lr 8.1262e-04 eta 18:23:02
epoch [34/50] batch [1420/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.3148 (1.2346) lr 8.1262e-04 eta 18:22:19
epoch [34/50] batch [1440/2000] time 2.006 (2.030) data 0.000 (0.001) loss 1.3646 (1.2299) lr 8.1262e-04 eta 18:21:41
epoch [34/50] batch [1460/2000] time 2.008 (2.030) data 0.000 (0.001) loss 2.4818 (1.2285) lr 8.1262e-04 eta 18:21:01
epoch [34/50] batch [1480/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.3288 (1.2349) lr 8.1262e-04 eta 18:20:21
epoch [34/50] batch [1500/2000] time 2.058 (2.030) data 0.000 (0.001) loss 0.8730 (1.2327) lr 8.1262e-04 eta 18:19:41
epoch [34/50] batch [1520/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.2950 (1.2292) lr 8.1262e-04 eta 18:19:02
epoch [34/50] batch [1540/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.1663 (1.2247) lr 8.1262e-04 eta 18:18:23
epoch [34/50] batch [1560/2000] time 1.999 (2.030) data 0.000 (0.001) loss 2.4147 (1.2224) lr 8.1262e-04 eta 18:17:45
epoch [34/50] batch [1580/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.4166 (1.2259) lr 8.1262e-04 eta 18:17:02
epoch [34/50] batch [1600/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.2509 (1.2273) lr 8.1262e-04 eta 18:16:24
epoch [34/50] batch [1620/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.5292 (1.2283) lr 8.1262e-04 eta 18:15:40
epoch [34/50] batch [1640/2000] time 2.003 (2.030) data 0.000 (0.001) loss 0.5965 (1.2303) lr 8.1262e-04 eta 18:15:01
epoch [34/50] batch [1660/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.8101 (1.2272) lr 8.1262e-04 eta 18:14:22
epoch [34/50] batch [1680/2000] time 2.031 (2.030) data 0.001 (0.001) loss 0.8358 (1.2347) lr 8.1262e-04 eta 18:13:42
epoch [34/50] batch [1700/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.1973 (1.2324) lr 8.1262e-04 eta 18:12:59
epoch [34/50] batch [1720/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.8955 (1.2344) lr 8.1262e-04 eta 18:12:20
epoch [34/50] batch [1740/2000] time 2.030 (2.030) data 0.000 (0.001) loss 1.4883 (1.2375) lr 8.1262e-04 eta 18:11:37
epoch [34/50] batch [1760/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.8761 (1.2345) lr 8.1262e-04 eta 18:10:54
epoch [34/50] batch [1780/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.6643 (1.2340) lr 8.1262e-04 eta 18:10:14
epoch [34/50] batch [1800/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.9123 (1.2359) lr 8.1262e-04 eta 18:09:37
epoch [34/50] batch [1820/2000] time 2.003 (2.030) data 0.000 (0.001) loss 1.1839 (1.2351) lr 8.1262e-04 eta 18:08:56
epoch [34/50] batch [1840/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.9853 (1.2327) lr 8.1262e-04 eta 18:08:16
epoch [34/50] batch [1860/2000] time 2.001 (2.030) data 0.000 (0.000) loss 0.7776 (1.2280) lr 8.1262e-04 eta 18:07:36
epoch [34/50] batch [1880/2000] time 2.029 (2.030) data 0.000 (0.000) loss 0.6824 (1.2285) lr 8.1262e-04 eta 18:06:56
epoch [34/50] batch [1900/2000] time 1.997 (2.030) data 0.000 (0.000) loss 0.3644 (1.2291) lr 8.1262e-04 eta 18:06:14
epoch [34/50] batch [1920/2000] time 2.026 (2.030) data 0.000 (0.000) loss 2.3808 (1.2303) lr 8.1262e-04 eta 18:05:34
epoch [34/50] batch [1940/2000] time 2.049 (2.030) data 0.000 (0.000) loss 1.8411 (1.2354) lr 8.1262e-04 eta 18:04:52
epoch [34/50] batch [1960/2000] time 1.996 (2.030) data 0.000 (0.000) loss 1.0392 (1.2334) lr 8.1262e-04 eta 18:04:12
epoch [34/50] batch [1980/2000] time 2.051 (2.030) data 0.000 (0.000) loss 0.1206 (1.2334) lr 8.1262e-04 eta 18:03:30
epoch [34/50] batch [2000/2000] time 2.025 (2.030) data 0.000 (0.000) loss 0.3455 (1.2317) lr 7.5131e-04 eta 18:02:50
epoch [35/50] batch [20/2000] time 2.050 (2.060) data 0.000 (0.027) loss 0.0435 (1.4777) lr 7.5131e-04 eta 18:17:49
epoch [35/50] batch [40/2000] time 2.050 (2.042) data 0.000 (0.014) loss 0.4030 (1.2660) lr 7.5131e-04 eta 18:07:56
epoch [35/50] batch [60/2000] time 2.047 (2.038) data 0.001 (0.009) loss 0.6932 (1.1588) lr 7.5131e-04 eta 18:05:00
epoch [35/50] batch [80/2000] time 2.025 (2.036) data 0.000 (0.007) loss 3.0973 (1.1490) lr 7.5131e-04 eta 18:02:58
epoch [35/50] batch [100/2000] time 1.970 (2.032) data 0.000 (0.006) loss 1.3531 (1.0886) lr 7.5131e-04 eta 18:00:27
epoch [35/50] batch [120/2000] time 1.995 (2.033) data 0.000 (0.005) loss 0.5472 (1.0735) lr 7.5131e-04 eta 18:00:08
epoch [35/50] batch [140/2000] time 2.002 (2.033) data 0.000 (0.004) loss 2.3504 (1.0450) lr 7.5131e-04 eta 17:59:18
epoch [35/50] batch [160/2000] time 2.028 (2.032) data 0.000 (0.004) loss 2.3437 (1.0768) lr 7.5131e-04 eta 17:58:09
epoch [35/50] batch [180/2000] time 2.029 (2.031) data 0.000 (0.003) loss 0.6671 (1.0983) lr 7.5131e-04 eta 17:57:02
epoch [35/50] batch [200/2000] time 2.000 (2.030) data 0.000 (0.003) loss 1.8192 (1.1241) lr 7.5131e-04 eta 17:56:01
epoch [35/50] batch [220/2000] time 2.029 (2.030) data 0.000 (0.003) loss 0.8595 (1.1201) lr 7.5131e-04 eta 17:55:17
epoch [35/50] batch [240/2000] time 2.030 (2.030) data 0.000 (0.002) loss 1.7623 (1.1170) lr 7.5131e-04 eta 17:54:25
epoch [35/50] batch [260/2000] time 2.055 (2.029) data 0.000 (0.002) loss 1.3553 (1.1388) lr 7.5131e-04 eta 17:53:29
epoch [35/50] batch [280/2000] time 2.034 (2.030) data 0.000 (0.002) loss 0.0768 (1.1505) lr 7.5131e-04 eta 17:53:02
epoch [35/50] batch [300/2000] time 2.052 (2.030) data 0.000 (0.002) loss 0.9446 (1.1359) lr 7.5131e-04 eta 17:52:28
epoch [35/50] batch [320/2000] time 2.054 (2.030) data 0.000 (0.002) loss 0.7435 (1.1279) lr 7.5131e-04 eta 17:51:39
epoch [35/50] batch [340/2000] time 1.999 (2.030) data 0.000 (0.002) loss 1.6723 (1.1290) lr 7.5131e-04 eta 17:51:06
epoch [35/50] batch [360/2000] time 2.053 (2.030) data 0.000 (0.002) loss 2.1081 (1.1439) lr 7.5131e-04 eta 17:50:22
epoch [35/50] batch [380/2000] time 2.053 (2.030) data 0.000 (0.002) loss 2.0099 (1.1470) lr 7.5131e-04 eta 17:49:50
epoch [35/50] batch [400/2000] time 1.974 (2.030) data 0.000 (0.002) loss 0.5448 (1.1473) lr 7.5131e-04 eta 17:48:58
epoch [35/50] batch [420/2000] time 2.052 (2.030) data 0.000 (0.001) loss 3.5507 (1.1679) lr 7.5131e-04 eta 17:48:21
epoch [35/50] batch [440/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.0861 (1.1763) lr 7.5131e-04 eta 17:47:41
epoch [35/50] batch [460/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.8916 (1.1759) lr 7.5131e-04 eta 17:47:00
epoch [35/50] batch [480/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.5337 (1.1756) lr 7.5131e-04 eta 17:46:16
epoch [35/50] batch [500/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.8278 (1.1833) lr 7.5131e-04 eta 17:45:37
epoch [35/50] batch [520/2000] time 2.030 (2.030) data 0.000 (0.001) loss 1.9131 (1.1936) lr 7.5131e-04 eta 17:44:54
epoch [35/50] batch [540/2000] time 2.031 (2.030) data 0.000 (0.001) loss 1.0915 (1.1936) lr 7.5131e-04 eta 17:44:10
epoch [35/50] batch [560/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.5202 (1.1913) lr 7.5131e-04 eta 17:43:29
epoch [35/50] batch [580/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.9770 (1.1927) lr 7.5131e-04 eta 17:42:50
epoch [35/50] batch [600/2000] time 2.051 (2.030) data 0.001 (0.001) loss 0.5659 (1.1978) lr 7.5131e-04 eta 17:42:09
epoch [35/50] batch [620/2000] time 2.053 (2.029) data 0.000 (0.001) loss 2.5197 (1.1987) lr 7.5131e-04 eta 17:41:23
epoch [35/50] batch [640/2000] time 1.997 (2.029) data 0.000 (0.001) loss 2.0540 (1.2037) lr 7.5131e-04 eta 17:40:43
epoch [35/50] batch [660/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.8555 (1.2063) lr 7.5131e-04 eta 17:40:04
epoch [35/50] batch [680/2000] time 2.025 (2.029) data 0.000 (0.001) loss 1.0666 (1.1996) lr 7.5131e-04 eta 17:39:14
epoch [35/50] batch [700/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.8158 (1.2037) lr 7.5131e-04 eta 17:38:36
epoch [35/50] batch [720/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.4591 (1.1955) lr 7.5131e-04 eta 17:37:58
epoch [35/50] batch [740/2000] time 2.025 (2.029) data 0.000 (0.001) loss 0.4049 (1.1861) lr 7.5131e-04 eta 17:37:13
epoch [35/50] batch [760/2000] time 2.025 (2.029) data 0.000 (0.001) loss 1.0538 (1.1788) lr 7.5131e-04 eta 17:36:29
epoch [35/50] batch [780/2000] time 1.994 (2.029) data 0.000 (0.001) loss 1.2310 (1.1799) lr 7.5131e-04 eta 17:35:44
epoch [35/50] batch [800/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.1720 (1.1811) lr 7.5131e-04 eta 17:35:04
epoch [35/50] batch [820/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.5898 (1.1756) lr 7.5131e-04 eta 17:34:23
epoch [35/50] batch [840/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.3366 (1.1769) lr 7.5131e-04 eta 17:33:41
epoch [35/50] batch [860/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.5355 (1.1775) lr 7.5131e-04 eta 17:32:57
epoch [35/50] batch [880/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.7727 (1.1798) lr 7.5131e-04 eta 17:32:15
epoch [35/50] batch [900/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.9037 (1.1807) lr 7.5131e-04 eta 17:31:32
epoch [35/50] batch [920/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.7202 (1.1828) lr 7.5131e-04 eta 17:30:53
epoch [35/50] batch [940/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.0896 (1.1826) lr 7.5131e-04 eta 17:30:10
epoch [35/50] batch [960/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.5967 (1.1874) lr 7.5131e-04 eta 17:29:30
epoch [35/50] batch [980/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.7816 (1.1852) lr 7.5131e-04 eta 17:28:55
epoch [35/50] batch [1000/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.4172 (1.1860) lr 7.5131e-04 eta 17:28:15
epoch [35/50] batch [1020/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.2492 (1.1822) lr 7.5131e-04 eta 17:27:40
epoch [35/50] batch [1040/2000] time 2.001 (2.029) data 0.000 (0.001) loss 0.3209 (1.1750) lr 7.5131e-04 eta 17:26:56
epoch [35/50] batch [1060/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.6814 (1.1763) lr 7.5131e-04 eta 17:26:17
epoch [35/50] batch [1080/2000] time 2.055 (2.029) data 0.000 (0.001) loss 1.0281 (1.1854) lr 7.5131e-04 eta 17:25:39
epoch [35/50] batch [1100/2000] time 2.000 (2.029) data 0.000 (0.001) loss 1.4297 (1.1904) lr 7.5131e-04 eta 17:24:58
epoch [35/50] batch [1120/2000] time 1.977 (2.029) data 0.000 (0.001) loss 0.4348 (1.1909) lr 7.5131e-04 eta 17:24:19
epoch [35/50] batch [1140/2000] time 2.054 (2.029) data 0.001 (0.001) loss 1.1657 (1.1962) lr 7.5131e-04 eta 17:23:40
epoch [35/50] batch [1160/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.3539 (1.1946) lr 7.5131e-04 eta 17:22:59
epoch [35/50] batch [1180/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.0779 (1.1905) lr 7.5131e-04 eta 17:22:18
epoch [35/50] batch [1200/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.8936 (1.1911) lr 7.5131e-04 eta 17:21:39
epoch [35/50] batch [1220/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.3883 (1.1922) lr 7.5131e-04 eta 17:20:59
epoch [35/50] batch [1240/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.7489 (1.1963) lr 7.5131e-04 eta 17:20:16
epoch [35/50] batch [1260/2000] time 2.052 (2.029) data 0.000 (0.001) loss 2.6369 (1.2023) lr 7.5131e-04 eta 17:19:36
epoch [35/50] batch [1280/2000] time 2.005 (2.029) data 0.000 (0.001) loss 0.2864 (1.1961) lr 7.5131e-04 eta 17:18:57
epoch [35/50] batch [1300/2000] time 1.983 (2.029) data 0.000 (0.001) loss 0.6477 (1.2017) lr 7.5131e-04 eta 17:18:19
epoch [35/50] batch [1320/2000] time 2.057 (2.029) data 0.000 (0.001) loss 1.6523 (1.2034) lr 7.5131e-04 eta 17:17:40
epoch [35/50] batch [1340/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.5825 (1.2069) lr 7.5131e-04 eta 17:17:01
epoch [35/50] batch [1360/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.3349 (1.2105) lr 7.5131e-04 eta 17:16:20
epoch [35/50] batch [1380/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.5041 (1.2061) lr 7.5131e-04 eta 17:15:38
epoch [35/50] batch [1400/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.6528 (1.2047) lr 7.5131e-04 eta 17:14:57
epoch [35/50] batch [1420/2000] time 2.031 (2.029) data 0.000 (0.001) loss 0.0878 (1.2025) lr 7.5131e-04 eta 17:14:15
epoch [35/50] batch [1440/2000] time 1.999 (2.029) data 0.000 (0.001) loss 2.3135 (1.1996) lr 7.5131e-04 eta 17:13:34
epoch [35/50] batch [1460/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.4486 (1.1985) lr 7.5131e-04 eta 17:12:52
epoch [35/50] batch [1480/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.6468 (1.1987) lr 7.5131e-04 eta 17:12:12
epoch [35/50] batch [1500/2000] time 2.051 (2.029) data 0.000 (0.001) loss 2.2076 (1.1979) lr 7.5131e-04 eta 17:11:34
epoch [35/50] batch [1520/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.6862 (1.1978) lr 7.5131e-04 eta 17:10:52
epoch [35/50] batch [1540/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.3667 (1.1978) lr 7.5131e-04 eta 17:10:10
epoch [35/50] batch [1560/2000] time 2.001 (2.029) data 0.000 (0.001) loss 0.2942 (1.1971) lr 7.5131e-04 eta 17:09:29
epoch [35/50] batch [1580/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.0904 (1.1967) lr 7.5131e-04 eta 17:08:50
epoch [35/50] batch [1600/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.2604 (1.1969) lr 7.5131e-04 eta 17:08:10
epoch [35/50] batch [1620/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.4977 (1.1977) lr 7.5131e-04 eta 17:07:29
epoch [35/50] batch [1640/2000] time 1.999 (2.029) data 0.000 (0.001) loss 1.6661 (1.1974) lr 7.5131e-04 eta 17:06:50
epoch [35/50] batch [1660/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.3862 (1.1959) lr 7.5131e-04 eta 17:06:08
epoch [35/50] batch [1680/2000] time 2.054 (2.029) data 0.001 (0.001) loss 4.0771 (1.1946) lr 7.5131e-04 eta 17:05:30
epoch [35/50] batch [1700/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.0667 (1.1944) lr 7.5131e-04 eta 17:04:49
epoch [35/50] batch [1720/2000] time 2.032 (2.029) data 0.000 (0.001) loss 2.0259 (1.1959) lr 7.5131e-04 eta 17:04:06
epoch [35/50] batch [1740/2000] time 2.058 (2.029) data 0.000 (0.001) loss 0.3536 (1.1939) lr 7.5131e-04 eta 17:03:30
epoch [35/50] batch [1760/2000] time 2.058 (2.030) data 0.000 (0.001) loss 0.9276 (1.1937) lr 7.5131e-04 eta 17:02:53
epoch [35/50] batch [1780/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.2241 (1.1955) lr 7.5131e-04 eta 17:02:14
epoch [35/50] batch [1800/2000] time 2.050 (2.030) data 0.000 (0.000) loss 0.9273 (1.1963) lr 7.5131e-04 eta 17:01:32
epoch [35/50] batch [1820/2000] time 1.975 (2.029) data 0.000 (0.000) loss 2.7434 (1.1978) lr 7.5131e-04 eta 17:00:48
epoch [35/50] batch [1840/2000] time 2.052 (2.029) data 0.000 (0.000) loss 1.8001 (1.1967) lr 7.5131e-04 eta 17:00:07
epoch [35/50] batch [1860/2000] time 2.051 (2.029) data 0.000 (0.000) loss 1.3618 (1.2026) lr 7.5131e-04 eta 16:59:26
epoch [35/50] batch [1880/2000] time 1.971 (2.029) data 0.000 (0.000) loss 1.1500 (1.2014) lr 7.5131e-04 eta 16:58:47
epoch [35/50] batch [1900/2000] time 2.050 (2.029) data 0.000 (0.000) loss 0.3689 (1.2008) lr 7.5131e-04 eta 16:58:05
epoch [35/50] batch [1920/2000] time 2.049 (2.029) data 0.000 (0.000) loss 0.7249 (1.2008) lr 7.5131e-04 eta 16:57:26
epoch [35/50] batch [1940/2000] time 2.056 (2.029) data 0.000 (0.000) loss 1.6885 (1.2026) lr 7.5131e-04 eta 16:56:45
epoch [35/50] batch [1960/2000] time 2.038 (2.029) data 0.000 (0.000) loss 0.2644 (1.2045) lr 7.5131e-04 eta 16:56:04
epoch [35/50] batch [1980/2000] time 2.059 (2.030) data 0.000 (0.000) loss 3.3169 (1.2059) lr 7.5131e-04 eta 16:55:27
epoch [35/50] batch [2000/2000] time 2.057 (2.030) data 0.000 (0.000) loss 2.2171 (1.2070) lr 6.9098e-04 eta 16:54:48
epoch [36/50] batch [20/2000] time 2.052 (2.059) data 0.000 (0.027) loss 0.6095 (1.1254) lr 6.9098e-04 eta 17:08:38
epoch [36/50] batch [40/2000] time 1.999 (2.040) data 0.000 (0.014) loss 1.9279 (0.9920) lr 6.9098e-04 eta 16:58:43
epoch [36/50] batch [60/2000] time 2.030 (2.038) data 0.001 (0.009) loss 0.4703 (0.9826) lr 6.9098e-04 eta 16:57:02
epoch [36/50] batch [80/2000] time 1.999 (2.036) data 0.000 (0.007) loss 1.6726 (1.0845) lr 6.9098e-04 eta 16:55:09
epoch [36/50] batch [100/2000] time 2.054 (2.032) data 0.000 (0.006) loss 0.3988 (1.0793) lr 6.9098e-04 eta 16:52:39
epoch [36/50] batch [120/2000] time 1.995 (2.031) data 0.000 (0.005) loss 1.4026 (1.1098) lr 6.9098e-04 eta 16:51:16
epoch [36/50] batch [140/2000] time 2.048 (2.031) data 0.000 (0.004) loss 0.6386 (1.1317) lr 6.9098e-04 eta 16:50:54
epoch [36/50] batch [160/2000] time 2.051 (2.032) data 0.000 (0.004) loss 0.1906 (1.1642) lr 6.9098e-04 eta 16:50:26
epoch [36/50] batch [180/2000] time 2.001 (2.031) data 0.000 (0.003) loss 0.1239 (1.1559) lr 6.9098e-04 eta 16:49:23
epoch [36/50] batch [200/2000] time 2.004 (2.032) data 0.000 (0.003) loss 0.5925 (1.1563) lr 6.9098e-04 eta 16:49:04
epoch [36/50] batch [220/2000] time 2.000 (2.032) data 0.000 (0.003) loss 1.7482 (1.1655) lr 6.9098e-04 eta 16:48:31
epoch [36/50] batch [240/2000] time 2.055 (2.032) data 0.000 (0.002) loss 1.6980 (1.1994) lr 6.9098e-04 eta 16:47:44
epoch [36/50] batch [260/2000] time 2.052 (2.031) data 0.000 (0.002) loss 1.0470 (1.1933) lr 6.9098e-04 eta 16:46:55
epoch [36/50] batch [280/2000] time 2.001 (2.031) data 0.000 (0.002) loss 2.2931 (1.1837) lr 6.9098e-04 eta 16:46:04
epoch [36/50] batch [300/2000] time 2.001 (2.031) data 0.000 (0.002) loss 0.6235 (1.1886) lr 6.9098e-04 eta 16:45:19
epoch [36/50] batch [320/2000] time 1.999 (2.031) data 0.000 (0.002) loss 1.0723 (1.2048) lr 6.9098e-04 eta 16:44:30
epoch [36/50] batch [340/2000] time 2.004 (2.031) data 0.000 (0.002) loss 0.2605 (1.2067) lr 6.9098e-04 eta 16:43:49
epoch [36/50] batch [360/2000] time 2.056 (2.031) data 0.000 (0.002) loss 1.7189 (1.2010) lr 6.9098e-04 eta 16:43:10
epoch [36/50] batch [380/2000] time 1.976 (2.031) data 0.000 (0.002) loss 0.6163 (1.2187) lr 6.9098e-04 eta 16:42:43
epoch [36/50] batch [400/2000] time 2.051 (2.031) data 0.000 (0.002) loss 1.7372 (1.2068) lr 6.9098e-04 eta 16:41:59
epoch [36/50] batch [420/2000] time 2.002 (2.031) data 0.000 (0.001) loss 1.7322 (1.1998) lr 6.9098e-04 eta 16:41:18
epoch [36/50] batch [440/2000] time 2.007 (2.031) data 0.000 (0.001) loss 1.5575 (1.2097) lr 6.9098e-04 eta 16:40:35
epoch [36/50] batch [460/2000] time 2.057 (2.031) data 0.000 (0.001) loss 1.4565 (1.1977) lr 6.9098e-04 eta 16:40:06
epoch [36/50] batch [480/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.3714 (1.2021) lr 6.9098e-04 eta 16:39:24
epoch [36/50] batch [500/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.5469 (1.1928) lr 6.9098e-04 eta 16:38:39
epoch [36/50] batch [520/2000] time 2.051 (2.032) data 0.000 (0.001) loss 1.8947 (1.1982) lr 6.9098e-04 eta 16:38:11
epoch [36/50] batch [540/2000] time 2.027 (2.031) data 0.000 (0.001) loss 2.1325 (1.1983) lr 6.9098e-04 eta 16:37:22
epoch [36/50] batch [560/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.1597 (1.1966) lr 6.9098e-04 eta 16:36:34
epoch [36/50] batch [580/2000] time 1.974 (2.031) data 0.000 (0.001) loss 1.1759 (1.1996) lr 6.9098e-04 eta 16:35:40
epoch [36/50] batch [600/2000] time 2.054 (2.031) data 0.001 (0.001) loss 1.3758 (1.2055) lr 6.9098e-04 eta 16:35:00
epoch [36/50] batch [620/2000] time 2.026 (2.031) data 0.000 (0.001) loss 1.2261 (1.2093) lr 6.9098e-04 eta 16:34:24
epoch [36/50] batch [640/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.2685 (1.2095) lr 6.9098e-04 eta 16:33:44
epoch [36/50] batch [660/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.9765 (1.2155) lr 6.9098e-04 eta 16:33:03
epoch [36/50] batch [680/2000] time 2.000 (2.031) data 0.000 (0.001) loss 2.0898 (1.2156) lr 6.9098e-04 eta 16:32:25
epoch [36/50] batch [700/2000] time 2.005 (2.031) data 0.000 (0.001) loss 0.7018 (1.2061) lr 6.9098e-04 eta 16:31:41
epoch [36/50] batch [720/2000] time 2.007 (2.031) data 0.000 (0.001) loss 1.2237 (1.2078) lr 6.9098e-04 eta 16:31:09
epoch [36/50] batch [740/2000] time 2.056 (2.031) data 0.000 (0.001) loss 0.4172 (1.2044) lr 6.9098e-04 eta 16:30:33
epoch [36/50] batch [760/2000] time 2.030 (2.031) data 0.000 (0.001) loss 3.0365 (1.1981) lr 6.9098e-04 eta 16:29:47
epoch [36/50] batch [780/2000] time 2.049 (2.031) data 0.000 (0.001) loss 3.1508 (1.1948) lr 6.9098e-04 eta 16:29:03
epoch [36/50] batch [800/2000] time 1.998 (2.031) data 0.000 (0.001) loss 1.6567 (1.1928) lr 6.9098e-04 eta 16:28:18
epoch [36/50] batch [820/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.1963 (1.1979) lr 6.9098e-04 eta 16:27:43
epoch [36/50] batch [840/2000] time 2.049 (2.031) data 0.000 (0.001) loss 0.1809 (1.1982) lr 6.9098e-04 eta 16:27:01
epoch [36/50] batch [860/2000] time 1.996 (2.031) data 0.000 (0.001) loss 2.1027 (1.2022) lr 6.9098e-04 eta 16:26:15
epoch [36/50] batch [880/2000] time 2.028 (2.031) data 0.000 (0.001) loss 1.0521 (1.2055) lr 6.9098e-04 eta 16:25:31
epoch [36/50] batch [900/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.0167 (1.1969) lr 6.9098e-04 eta 16:24:50
epoch [36/50] batch [920/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.8655 (1.1927) lr 6.9098e-04 eta 16:24:06
epoch [36/50] batch [940/2000] time 1.996 (2.031) data 0.000 (0.001) loss 1.0888 (1.1930) lr 6.9098e-04 eta 16:23:27
epoch [36/50] batch [960/2000] time 1.996 (2.031) data 0.000 (0.001) loss 0.1393 (1.1969) lr 6.9098e-04 eta 16:22:45
epoch [36/50] batch [980/2000] time 1.973 (2.030) data 0.000 (0.001) loss 1.1631 (1.1981) lr 6.9098e-04 eta 16:21:59
epoch [36/50] batch [1000/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.6579 (1.1937) lr 6.9098e-04 eta 16:21:15
epoch [36/50] batch [1020/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.5543 (1.1964) lr 6.9098e-04 eta 16:20:36
epoch [36/50] batch [1040/2000] time 1.975 (2.030) data 0.000 (0.001) loss 0.2738 (1.1896) lr 6.9098e-04 eta 16:19:55
epoch [36/50] batch [1060/2000] time 2.031 (2.030) data 0.000 (0.001) loss 2.2169 (1.1921) lr 6.9098e-04 eta 16:19:09
epoch [36/50] batch [1080/2000] time 2.028 (2.030) data 0.000 (0.001) loss 4.3372 (1.1943) lr 6.9098e-04 eta 16:18:24
epoch [36/50] batch [1100/2000] time 2.031 (2.030) data 0.000 (0.001) loss 1.9835 (1.1926) lr 6.9098e-04 eta 16:17:41
epoch [36/50] batch [1120/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.6518 (1.1955) lr 6.9098e-04 eta 16:16:59
epoch [36/50] batch [1140/2000] time 2.052 (2.030) data 0.000 (0.001) loss 2.7070 (1.1964) lr 6.9098e-04 eta 16:16:18
epoch [36/50] batch [1160/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.2267 (1.1974) lr 6.9098e-04 eta 16:15:38
epoch [36/50] batch [1180/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.0629 (1.1978) lr 6.9098e-04 eta 16:14:58
epoch [36/50] batch [1200/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.6102 (1.2010) lr 6.9098e-04 eta 16:14:23
epoch [36/50] batch [1220/2000] time 1.972 (2.030) data 0.000 (0.001) loss 2.3600 (1.2023) lr 6.9098e-04 eta 16:13:38
epoch [36/50] batch [1240/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.5176 (1.2045) lr 6.9098e-04 eta 16:12:57
epoch [36/50] batch [1260/2000] time 2.003 (2.030) data 0.000 (0.001) loss 2.8783 (1.2090) lr 6.9098e-04 eta 16:12:17
epoch [36/50] batch [1280/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.2289 (1.2101) lr 6.9098e-04 eta 16:11:40
epoch [36/50] batch [1300/2000] time 2.026 (2.030) data 0.000 (0.001) loss 1.0230 (1.2100) lr 6.9098e-04 eta 16:11:01
epoch [36/50] batch [1320/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.5764 (1.2101) lr 6.9098e-04 eta 16:10:18
epoch [36/50] batch [1340/2000] time 1.974 (2.030) data 0.000 (0.001) loss 1.8253 (1.2149) lr 6.9098e-04 eta 16:09:40
epoch [36/50] batch [1360/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.0395 (1.2182) lr 6.9098e-04 eta 16:08:59
epoch [36/50] batch [1380/2000] time 2.032 (2.030) data 0.000 (0.001) loss 1.4120 (1.2156) lr 6.9098e-04 eta 16:08:17
epoch [36/50] batch [1400/2000] time 2.061 (2.030) data 0.000 (0.001) loss 1.4166 (1.2181) lr 6.9098e-04 eta 16:07:39
epoch [36/50] batch [1420/2000] time 2.001 (2.030) data 0.000 (0.001) loss 1.6555 (1.2218) lr 6.9098e-04 eta 16:06:57
epoch [36/50] batch [1440/2000] time 2.059 (2.030) data 0.000 (0.001) loss 0.2373 (1.2229) lr 6.9098e-04 eta 16:06:17
epoch [36/50] batch [1460/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.0952 (1.2238) lr 6.9098e-04 eta 16:05:39
epoch [36/50] batch [1480/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.3728 (1.2226) lr 6.9098e-04 eta 16:04:58
epoch [36/50] batch [1500/2000] time 2.027 (2.030) data 0.000 (0.001) loss 2.0668 (1.2183) lr 6.9098e-04 eta 16:04:17
epoch [36/50] batch [1520/2000] time 2.049 (2.030) data 0.000 (0.001) loss 4.0083 (1.2206) lr 6.9098e-04 eta 16:03:38
epoch [36/50] batch [1540/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.4322 (1.2202) lr 6.9098e-04 eta 16:02:55
epoch [36/50] batch [1560/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.3002 (1.2219) lr 6.9098e-04 eta 16:02:16
epoch [36/50] batch [1580/2000] time 2.048 (2.030) data 0.000 (0.001) loss 2.5367 (1.2225) lr 6.9098e-04 eta 16:01:33
epoch [36/50] batch [1600/2000] time 2.048 (2.030) data 0.000 (0.001) loss 1.2297 (1.2212) lr 6.9098e-04 eta 16:00:51
epoch [36/50] batch [1620/2000] time 2.023 (2.030) data 0.000 (0.001) loss 1.5442 (1.2231) lr 6.9098e-04 eta 16:00:08
epoch [36/50] batch [1640/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.5638 (1.2254) lr 6.9098e-04 eta 15:59:25
epoch [36/50] batch [1660/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.0215 (1.2254) lr 6.9098e-04 eta 15:58:45
epoch [36/50] batch [1680/2000] time 1.999 (2.030) data 0.001 (0.001) loss 0.5327 (1.2229) lr 6.9098e-04 eta 15:58:04
epoch [36/50] batch [1700/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.2162 (1.2213) lr 6.9098e-04 eta 15:57:26
epoch [36/50] batch [1720/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.8370 (1.2231) lr 6.9098e-04 eta 15:56:45
epoch [36/50] batch [1740/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.4138 (1.2221) lr 6.9098e-04 eta 15:56:04
epoch [36/50] batch [1760/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.8158 (1.2193) lr 6.9098e-04 eta 15:55:24
epoch [36/50] batch [1780/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.2583 (1.2148) lr 6.9098e-04 eta 15:54:43
epoch [36/50] batch [1800/2000] time 2.051 (2.030) data 0.000 (0.000) loss 0.4567 (1.2130) lr 6.9098e-04 eta 15:54:02
epoch [36/50] batch [1820/2000] time 2.031 (2.030) data 0.000 (0.000) loss 1.3693 (1.2100) lr 6.9098e-04 eta 15:53:22
epoch [36/50] batch [1840/2000] time 2.000 (2.030) data 0.000 (0.000) loss 1.7342 (1.2087) lr 6.9098e-04 eta 15:52:41
epoch [36/50] batch [1860/2000] time 1.978 (2.030) data 0.000 (0.000) loss 0.7502 (1.2051) lr 6.9098e-04 eta 15:52:00
epoch [36/50] batch [1880/2000] time 2.002 (2.030) data 0.000 (0.000) loss 0.9461 (1.2037) lr 6.9098e-04 eta 15:51:18
epoch [36/50] batch [1900/2000] time 2.029 (2.030) data 0.000 (0.000) loss 1.4629 (1.2001) lr 6.9098e-04 eta 15:50:36
epoch [36/50] batch [1920/2000] time 2.053 (2.030) data 0.000 (0.000) loss 0.7337 (1.2005) lr 6.9098e-04 eta 15:49:56
epoch [36/50] batch [1940/2000] time 2.032 (2.030) data 0.000 (0.000) loss 3.8637 (1.2033) lr 6.9098e-04 eta 15:49:16
epoch [36/50] batch [1960/2000] time 2.053 (2.030) data 0.000 (0.000) loss 0.8509 (1.2040) lr 6.9098e-04 eta 15:48:33
epoch [36/50] batch [1980/2000] time 1.974 (2.030) data 0.000 (0.000) loss 1.4930 (1.2070) lr 6.9098e-04 eta 15:47:52
epoch [36/50] batch [2000/2000] time 2.027 (2.030) data 0.000 (0.000) loss 3.2915 (1.2085) lr 6.3188e-04 eta 15:47:10
epoch [37/50] batch [20/2000] time 2.051 (2.060) data 0.000 (0.027) loss 2.9532 (1.5594) lr 6.3188e-04 eta 16:00:42
epoch [37/50] batch [40/2000] time 2.050 (2.047) data 0.000 (0.014) loss 1.4811 (1.3523) lr 6.3188e-04 eta 15:54:02
epoch [37/50] batch [60/2000] time 2.051 (2.042) data 0.001 (0.009) loss 1.2157 (1.3225) lr 6.3188e-04 eta 15:51:01
epoch [37/50] batch [80/2000] time 2.051 (2.037) data 0.000 (0.007) loss 3.6403 (1.3807) lr 6.3188e-04 eta 15:48:00
epoch [37/50] batch [100/2000] time 2.029 (2.035) data 0.000 (0.006) loss 2.4795 (1.3702) lr 6.3188e-04 eta 15:46:03
epoch [37/50] batch [120/2000] time 2.050 (2.034) data 0.000 (0.005) loss 2.0771 (1.3494) lr 6.3188e-04 eta 15:45:02
epoch [37/50] batch [140/2000] time 1.997 (2.033) data 0.000 (0.004) loss 2.3194 (1.3046) lr 6.3188e-04 eta 15:43:50
epoch [37/50] batch [160/2000] time 2.052 (2.032) data 0.000 (0.004) loss 0.7524 (1.3031) lr 6.3188e-04 eta 15:42:50
epoch [37/50] batch [180/2000] time 1.999 (2.032) data 0.000 (0.003) loss 1.3855 (1.3055) lr 6.3188e-04 eta 15:42:01
epoch [37/50] batch [200/2000] time 2.056 (2.031) data 0.000 (0.003) loss 0.5616 (1.2913) lr 6.3188e-04 eta 15:41:05
epoch [37/50] batch [220/2000] time 2.051 (2.031) data 0.000 (0.003) loss 0.7563 (1.2992) lr 6.3188e-04 eta 15:40:32
epoch [37/50] batch [240/2000] time 2.000 (2.031) data 0.000 (0.002) loss 0.5239 (1.3118) lr 6.3188e-04 eta 15:39:40
epoch [37/50] batch [260/2000] time 2.056 (2.031) data 0.000 (0.002) loss 0.2584 (1.3131) lr 6.3188e-04 eta 15:39:01
epoch [37/50] batch [280/2000] time 2.052 (2.031) data 0.000 (0.002) loss 0.1148 (1.3043) lr 6.3188e-04 eta 15:38:13
epoch [37/50] batch [300/2000] time 2.054 (2.031) data 0.000 (0.002) loss 0.3876 (1.2727) lr 6.3188e-04 eta 15:37:34
epoch [37/50] batch [320/2000] time 2.028 (2.030) data 0.000 (0.002) loss 0.6664 (1.2628) lr 6.3188e-04 eta 15:36:41
epoch [37/50] batch [340/2000] time 2.056 (2.030) data 0.000 (0.002) loss 1.2802 (1.2674) lr 6.3188e-04 eta 15:36:01
epoch [37/50] batch [360/2000] time 2.029 (2.031) data 0.000 (0.002) loss 2.5649 (1.2556) lr 6.3188e-04 eta 15:35:30
epoch [37/50] batch [380/2000] time 2.056 (2.031) data 0.000 (0.002) loss 0.7532 (1.2597) lr 6.3188e-04 eta 15:34:43
epoch [37/50] batch [400/2000] time 1.974 (2.030) data 0.000 (0.002) loss 1.4195 (1.2668) lr 6.3188e-04 eta 15:33:56
epoch [37/50] batch [420/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.7710 (1.2777) lr 6.3188e-04 eta 15:33:18
epoch [37/50] batch [440/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.5553 (1.2684) lr 6.3188e-04 eta 15:32:24
epoch [37/50] batch [460/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.0537 (1.2809) lr 6.3188e-04 eta 15:31:44
epoch [37/50] batch [480/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.9591 (1.2672) lr 6.3188e-04 eta 15:31:00
epoch [37/50] batch [500/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.1307 (1.2632) lr 6.3188e-04 eta 15:30:23
epoch [37/50] batch [520/2000] time 1.975 (2.030) data 0.000 (0.001) loss 1.1067 (1.2697) lr 6.3188e-04 eta 15:29:33
epoch [37/50] batch [540/2000] time 2.031 (2.029) data 0.000 (0.001) loss 1.0528 (1.2673) lr 6.3188e-04 eta 15:28:44
epoch [37/50] batch [560/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.1557 (1.2672) lr 6.3188e-04 eta 15:28:02
epoch [37/50] batch [580/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.0352 (1.2500) lr 6.3188e-04 eta 15:27:27
epoch [37/50] batch [600/2000] time 2.052 (2.029) data 0.001 (0.001) loss 1.2568 (1.2434) lr 6.3188e-04 eta 15:26:47
epoch [37/50] batch [620/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.5074 (1.2364) lr 6.3188e-04 eta 15:26:14
epoch [37/50] batch [640/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.6294 (1.2317) lr 6.3188e-04 eta 15:25:34
epoch [37/50] batch [660/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.7771 (1.2219) lr 6.3188e-04 eta 15:24:56
epoch [37/50] batch [680/2000] time 2.054 (2.030) data 0.000 (0.001) loss 2.2088 (1.2150) lr 6.3188e-04 eta 15:24:14
epoch [37/50] batch [700/2000] time 2.053 (2.030) data 0.000 (0.001) loss 3.4785 (1.2190) lr 6.3188e-04 eta 15:23:31
epoch [37/50] batch [720/2000] time 2.052 (2.030) data 0.000 (0.001) loss 3.3261 (1.2122) lr 6.3188e-04 eta 15:22:51
epoch [37/50] batch [740/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.1843 (1.2086) lr 6.3188e-04 eta 15:22:07
epoch [37/50] batch [760/2000] time 2.031 (2.030) data 0.000 (0.001) loss 1.6730 (1.2104) lr 6.3188e-04 eta 15:21:26
epoch [37/50] batch [780/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.9997 (1.2179) lr 6.3188e-04 eta 15:20:45
epoch [37/50] batch [800/2000] time 2.032 (2.030) data 0.000 (0.001) loss 1.0110 (1.2204) lr 6.3188e-04 eta 15:20:06
epoch [37/50] batch [820/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.5970 (1.2218) lr 6.3188e-04 eta 15:19:21
epoch [37/50] batch [840/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.3656 (1.2152) lr 6.3188e-04 eta 15:18:37
epoch [37/50] batch [860/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.8850 (1.2117) lr 6.3188e-04 eta 15:17:56
epoch [37/50] batch [880/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.3082 (1.2077) lr 6.3188e-04 eta 15:17:17
epoch [37/50] batch [900/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.2187 (1.2013) lr 6.3188e-04 eta 15:16:38
epoch [37/50] batch [920/2000] time 1.995 (2.030) data 0.000 (0.001) loss 0.4671 (1.1991) lr 6.3188e-04 eta 15:15:58
epoch [37/50] batch [940/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.2999 (1.2012) lr 6.3188e-04 eta 15:15:18
epoch [37/50] batch [960/2000] time 2.045 (2.030) data 0.000 (0.001) loss 0.7024 (1.2023) lr 6.3188e-04 eta 15:14:38
epoch [37/50] batch [980/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.8138 (1.1996) lr 6.3188e-04 eta 15:13:53
epoch [37/50] batch [1000/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.9592 (1.1950) lr 6.3188e-04 eta 15:13:13
epoch [37/50] batch [1020/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.8015 (1.1979) lr 6.3188e-04 eta 15:12:33
epoch [37/50] batch [1040/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.3075 (1.1964) lr 6.3188e-04 eta 15:11:51
epoch [37/50] batch [1060/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.6990 (1.1992) lr 6.3188e-04 eta 15:11:13
epoch [37/50] batch [1080/2000] time 1.995 (2.029) data 0.000 (0.001) loss 0.9276 (1.2041) lr 6.3188e-04 eta 15:10:32
epoch [37/50] batch [1100/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.8701 (1.2062) lr 6.3188e-04 eta 15:09:48
epoch [37/50] batch [1120/2000] time 2.053 (2.029) data 0.000 (0.001) loss 3.4865 (1.2147) lr 6.3188e-04 eta 15:09:02
epoch [37/50] batch [1140/2000] time 2.053 (2.029) data 0.001 (0.001) loss 0.8543 (1.2125) lr 6.3188e-04 eta 15:08:23
epoch [37/50] batch [1160/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.9071 (1.2191) lr 6.3188e-04 eta 15:07:44
epoch [37/50] batch [1180/2000] time 1.975 (2.029) data 0.000 (0.001) loss 1.6337 (1.2172) lr 6.3188e-04 eta 15:07:04
epoch [37/50] batch [1200/2000] time 1.999 (2.029) data 0.000 (0.001) loss 1.7925 (1.2161) lr 6.3188e-04 eta 15:06:26
epoch [37/50] batch [1220/2000] time 2.000 (2.029) data 0.000 (0.001) loss 1.3107 (1.2180) lr 6.3188e-04 eta 15:05:45
epoch [37/50] batch [1240/2000] time 2.055 (2.029) data 0.000 (0.001) loss 0.7523 (1.2200) lr 6.3188e-04 eta 15:05:05
epoch [37/50] batch [1260/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.2158 (1.2187) lr 6.3188e-04 eta 15:04:30
epoch [37/50] batch [1280/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.3893 (1.2156) lr 6.3188e-04 eta 15:03:47
epoch [37/50] batch [1300/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.1840 (1.2117) lr 6.3188e-04 eta 15:03:09
epoch [37/50] batch [1320/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.3164 (1.2105) lr 6.3188e-04 eta 15:02:27
epoch [37/50] batch [1340/2000] time 1.974 (2.030) data 0.000 (0.001) loss 2.4142 (1.2129) lr 6.3188e-04 eta 15:01:47
epoch [37/50] batch [1360/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.4880 (1.2121) lr 6.3188e-04 eta 15:01:08
epoch [37/50] batch [1380/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.3551 (1.2182) lr 6.3188e-04 eta 15:00:29
epoch [37/50] batch [1400/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.3810 (1.2161) lr 6.3188e-04 eta 14:59:49
epoch [37/50] batch [1420/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.3705 (1.2132) lr 6.3188e-04 eta 14:59:10
epoch [37/50] batch [1440/2000] time 1.973 (2.030) data 0.000 (0.001) loss 1.3326 (1.2100) lr 6.3188e-04 eta 14:58:30
epoch [37/50] batch [1460/2000] time 1.973 (2.030) data 0.000 (0.001) loss 1.5514 (1.2088) lr 6.3188e-04 eta 14:57:46
epoch [37/50] batch [1480/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.8153 (1.2091) lr 6.3188e-04 eta 14:57:03
epoch [37/50] batch [1500/2000] time 2.028 (2.030) data 0.000 (0.001) loss 2.4155 (1.2141) lr 6.3188e-04 eta 14:56:24
epoch [37/50] batch [1520/2000] time 2.000 (2.030) data 0.000 (0.001) loss 2.8169 (1.2128) lr 6.3188e-04 eta 14:55:43
epoch [37/50] batch [1540/2000] time 2.054 (2.030) data 0.000 (0.001) loss 2.3751 (1.2170) lr 6.3188e-04 eta 14:55:01
epoch [37/50] batch [1560/2000] time 2.032 (2.030) data 0.000 (0.001) loss 0.0458 (1.2168) lr 6.3188e-04 eta 14:54:22
epoch [37/50] batch [1580/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.8956 (1.2176) lr 6.3188e-04 eta 14:53:41
epoch [37/50] batch [1600/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.7281 (1.2189) lr 6.3188e-04 eta 14:52:59
epoch [37/50] batch [1620/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.3388 (1.2188) lr 6.3188e-04 eta 14:52:16
epoch [37/50] batch [1640/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.4500 (1.2177) lr 6.3188e-04 eta 14:51:36
epoch [37/50] batch [1660/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.4885 (1.2169) lr 6.3188e-04 eta 14:50:56
epoch [37/50] batch [1680/2000] time 1.996 (2.029) data 0.001 (0.001) loss 1.0097 (1.2161) lr 6.3188e-04 eta 14:50:16
epoch [37/50] batch [1700/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.2864 (1.2163) lr 6.3188e-04 eta 14:49:34
epoch [37/50] batch [1720/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.0871 (1.2180) lr 6.3188e-04 eta 14:48:54
epoch [37/50] batch [1740/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.5495 (1.2182) lr 6.3188e-04 eta 14:48:14
epoch [37/50] batch [1760/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.3714 (1.2200) lr 6.3188e-04 eta 14:47:34
epoch [37/50] batch [1780/2000] time 2.026 (2.030) data 0.000 (0.001) loss 0.1186 (1.2177) lr 6.3188e-04 eta 14:46:54
epoch [37/50] batch [1800/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.4277 (1.2200) lr 6.3188e-04 eta 14:46:12
epoch [37/50] batch [1820/2000] time 1.976 (2.029) data 0.000 (0.001) loss 1.2417 (1.2185) lr 6.3188e-04 eta 14:45:31
epoch [37/50] batch [1840/2000] time 2.000 (2.029) data 0.000 (0.000) loss 2.7407 (1.2201) lr 6.3188e-04 eta 14:44:50
epoch [37/50] batch [1860/2000] time 2.054 (2.029) data 0.000 (0.000) loss 0.2503 (1.2191) lr 6.3188e-04 eta 14:44:10
epoch [37/50] batch [1880/2000] time 2.026 (2.029) data 0.000 (0.000) loss 2.9619 (1.2203) lr 6.3188e-04 eta 14:43:28
epoch [37/50] batch [1900/2000] time 2.050 (2.029) data 0.000 (0.000) loss 0.2141 (1.2180) lr 6.3188e-04 eta 14:42:46
epoch [37/50] batch [1920/2000] time 2.026 (2.029) data 0.000 (0.000) loss 1.6736 (1.2190) lr 6.3188e-04 eta 14:42:06
epoch [37/50] batch [1940/2000] time 1.996 (2.029) data 0.000 (0.000) loss 0.1129 (1.2206) lr 6.3188e-04 eta 14:41:26
epoch [37/50] batch [1960/2000] time 1.997 (2.029) data 0.000 (0.000) loss 1.5706 (1.2202) lr 6.3188e-04 eta 14:40:45
epoch [37/50] batch [1980/2000] time 2.052 (2.029) data 0.000 (0.000) loss 1.2699 (1.2208) lr 6.3188e-04 eta 14:40:04
epoch [37/50] batch [2000/2000] time 2.050 (2.029) data 0.000 (0.000) loss 0.3153 (1.2225) lr 5.7422e-04 eta 14:39:25
epoch [38/50] batch [20/2000] time 2.051 (2.066) data 0.000 (0.027) loss 0.2409 (1.3495) lr 5.7422e-04 eta 14:54:31
epoch [38/50] batch [40/2000] time 2.028 (2.047) data 0.000 (0.013) loss 1.2383 (1.3189) lr 5.7422e-04 eta 14:45:37
epoch [38/50] batch [60/2000] time 2.001 (2.041) data 0.001 (0.009) loss 1.3747 (1.3040) lr 5.7422e-04 eta 14:42:15
epoch [38/50] batch [80/2000] time 2.001 (2.037) data 0.000 (0.007) loss 1.8246 (1.2931) lr 5.7422e-04 eta 14:40:04
epoch [38/50] batch [100/2000] time 2.055 (2.037) data 0.000 (0.006) loss 0.6499 (1.2270) lr 5.7422e-04 eta 14:39:28
epoch [38/50] batch [120/2000] time 2.000 (2.036) data 0.000 (0.005) loss 0.3375 (1.2122) lr 5.7422e-04 eta 14:38:05
epoch [38/50] batch [140/2000] time 2.054 (2.034) data 0.000 (0.004) loss 2.3124 (1.2261) lr 5.7422e-04 eta 14:36:45
epoch [38/50] batch [160/2000] time 2.002 (2.034) data 0.000 (0.004) loss 0.3318 (1.2017) lr 5.7422e-04 eta 14:36:09
epoch [38/50] batch [180/2000] time 2.051 (2.034) data 0.000 (0.003) loss 0.3724 (1.2333) lr 5.7422e-04 eta 14:35:24
epoch [38/50] batch [200/2000] time 1.996 (2.034) data 0.000 (0.003) loss 2.1956 (1.2373) lr 5.7422e-04 eta 14:34:39
epoch [38/50] batch [220/2000] time 2.005 (2.034) data 0.000 (0.003) loss 0.1715 (1.2264) lr 5.7422e-04 eta 14:33:47
epoch [38/50] batch [240/2000] time 2.056 (2.033) data 0.000 (0.002) loss 0.4375 (1.2088) lr 5.7422e-04 eta 14:32:59
epoch [38/50] batch [260/2000] time 2.054 (2.033) data 0.000 (0.002) loss 0.8761 (1.1948) lr 5.7422e-04 eta 14:32:18
epoch [38/50] batch [280/2000] time 2.054 (2.032) data 0.000 (0.002) loss 3.6040 (1.2091) lr 5.7422e-04 eta 14:31:14
epoch [38/50] batch [300/2000] time 2.050 (2.032) data 0.000 (0.002) loss 0.6386 (1.2067) lr 5.7422e-04 eta 14:30:27
epoch [38/50] batch [320/2000] time 2.030 (2.032) data 0.000 (0.002) loss 0.3134 (1.2137) lr 5.7422e-04 eta 14:29:45
epoch [38/50] batch [340/2000] time 2.000 (2.032) data 0.000 (0.002) loss 0.2488 (1.2143) lr 5.7422e-04 eta 14:29:06
epoch [38/50] batch [360/2000] time 2.036 (2.032) data 0.000 (0.002) loss 0.2050 (1.1990) lr 5.7422e-04 eta 14:28:25
epoch [38/50] batch [380/2000] time 2.054 (2.033) data 0.000 (0.002) loss 2.0954 (1.2105) lr 5.7422e-04 eta 14:28:02
epoch [38/50] batch [400/2000] time 2.054 (2.033) data 0.000 (0.002) loss 0.8803 (1.2174) lr 5.7422e-04 eta 14:27:23
epoch [38/50] batch [420/2000] time 2.029 (2.033) data 0.000 (0.001) loss 0.9861 (1.2204) lr 5.7422e-04 eta 14:26:42
epoch [38/50] batch [440/2000] time 2.028 (2.033) data 0.000 (0.001) loss 3.7099 (1.2205) lr 5.7422e-04 eta 14:25:54
epoch [38/50] batch [460/2000] time 2.001 (2.033) data 0.000 (0.001) loss 0.8212 (1.2132) lr 5.7422e-04 eta 14:25:12
epoch [38/50] batch [480/2000] time 2.055 (2.033) data 0.000 (0.001) loss 0.3835 (1.2093) lr 5.7422e-04 eta 14:24:32
epoch [38/50] batch [500/2000] time 2.032 (2.033) data 0.000 (0.001) loss 0.5316 (1.1993) lr 5.7422e-04 eta 14:23:53
epoch [38/50] batch [520/2000] time 1.999 (2.032) data 0.000 (0.001) loss 1.0703 (1.2008) lr 5.7422e-04 eta 14:23:04
epoch [38/50] batch [540/2000] time 2.032 (2.032) data 0.000 (0.001) loss 0.6986 (1.2014) lr 5.7422e-04 eta 14:22:24
epoch [38/50] batch [560/2000] time 2.054 (2.032) data 0.000 (0.001) loss 1.3219 (1.2069) lr 5.7422e-04 eta 14:21:41
epoch [38/50] batch [580/2000] time 2.037 (2.032) data 0.000 (0.001) loss 0.0231 (1.1988) lr 5.7422e-04 eta 14:21:00
epoch [38/50] batch [600/2000] time 2.035 (2.032) data 0.001 (0.001) loss 0.2387 (1.2070) lr 5.7422e-04 eta 14:20:24
epoch [38/50] batch [620/2000] time 2.056 (2.033) data 0.000 (0.001) loss 0.7593 (1.1963) lr 5.7422e-04 eta 14:19:47
epoch [38/50] batch [640/2000] time 2.000 (2.033) data 0.000 (0.001) loss 0.5283 (1.1925) lr 5.7422e-04 eta 14:19:04
epoch [38/50] batch [660/2000] time 2.028 (2.033) data 0.000 (0.001) loss 0.5444 (1.1824) lr 5.7422e-04 eta 14:18:28
epoch [38/50] batch [680/2000] time 2.051 (2.032) data 0.000 (0.001) loss 0.5109 (1.1769) lr 5.7422e-04 eta 14:17:41
epoch [38/50] batch [700/2000] time 2.051 (2.033) data 0.000 (0.001) loss 0.1537 (1.1684) lr 5.7422e-04 eta 14:17:03
epoch [38/50] batch [720/2000] time 1.977 (2.033) data 0.000 (0.001) loss 0.5999 (1.1783) lr 5.7422e-04 eta 14:16:22
epoch [38/50] batch [740/2000] time 2.058 (2.033) data 0.000 (0.001) loss 1.7349 (1.1910) lr 5.7422e-04 eta 14:15:42
epoch [38/50] batch [760/2000] time 2.053 (2.033) data 0.000 (0.001) loss 0.6376 (1.1940) lr 5.7422e-04 eta 14:15:05
epoch [38/50] batch [780/2000] time 2.048 (2.033) data 0.000 (0.001) loss 0.7952 (1.1925) lr 5.7422e-04 eta 14:14:22
epoch [38/50] batch [800/2000] time 2.051 (2.033) data 0.000 (0.001) loss 0.2322 (1.1902) lr 5.7422e-04 eta 14:13:39
epoch [38/50] batch [820/2000] time 1.996 (2.032) data 0.000 (0.001) loss 1.1681 (1.1851) lr 5.7422e-04 eta 14:12:57
epoch [38/50] batch [840/2000] time 2.053 (2.033) data 0.000 (0.001) loss 0.9292 (1.1859) lr 5.7422e-04 eta 14:12:20
epoch [38/50] batch [860/2000] time 2.034 (2.033) data 0.000 (0.001) loss 2.2128 (1.1867) lr 5.7422e-04 eta 14:11:40
epoch [38/50] batch [880/2000] time 2.006 (2.033) data 0.000 (0.001) loss 1.2504 (1.1816) lr 5.7422e-04 eta 14:11:01
epoch [38/50] batch [900/2000] time 2.031 (2.033) data 0.000 (0.001) loss 1.3007 (1.1791) lr 5.7422e-04 eta 14:10:26
epoch [38/50] batch [920/2000] time 1.998 (2.033) data 0.000 (0.001) loss 2.1852 (1.1755) lr 5.7422e-04 eta 14:09:46
epoch [38/50] batch [940/2000] time 1.975 (2.033) data 0.000 (0.001) loss 1.1204 (1.1830) lr 5.7422e-04 eta 14:09:05
epoch [38/50] batch [960/2000] time 2.052 (2.033) data 0.000 (0.001) loss 0.1016 (1.1824) lr 5.7422e-04 eta 14:08:24
epoch [38/50] batch [980/2000] time 2.053 (2.033) data 0.000 (0.001) loss 0.3552 (1.1806) lr 5.7422e-04 eta 14:07:42
epoch [38/50] batch [1000/2000] time 2.053 (2.033) data 0.000 (0.001) loss 0.0509 (1.1795) lr 5.7422e-04 eta 14:07:02
epoch [38/50] batch [1020/2000] time 2.049 (2.033) data 0.000 (0.001) loss 2.6206 (1.1815) lr 5.7422e-04 eta 14:06:20
epoch [38/50] batch [1040/2000] time 1.995 (2.033) data 0.000 (0.001) loss 1.5592 (1.1804) lr 5.7422e-04 eta 14:05:39
epoch [38/50] batch [1060/2000] time 1.997 (2.033) data 0.000 (0.001) loss 1.6102 (1.1786) lr 5.7422e-04 eta 14:04:51
epoch [38/50] batch [1080/2000] time 1.995 (2.033) data 0.000 (0.001) loss 1.8567 (1.1821) lr 5.7422e-04 eta 14:04:10
epoch [38/50] batch [1100/2000] time 2.049 (2.033) data 0.000 (0.001) loss 2.1973 (1.1859) lr 5.7422e-04 eta 14:03:29
epoch [38/50] batch [1120/2000] time 2.049 (2.032) data 0.000 (0.001) loss 1.1897 (1.1911) lr 5.7422e-04 eta 14:02:43
epoch [38/50] batch [1140/2000] time 2.049 (2.032) data 0.001 (0.001) loss 2.1395 (1.1910) lr 5.7422e-04 eta 14:01:58
epoch [38/50] batch [1160/2000] time 2.051 (2.032) data 0.000 (0.001) loss 1.3126 (1.1925) lr 5.7422e-04 eta 14:01:14
epoch [38/50] batch [1180/2000] time 2.000 (2.032) data 0.000 (0.001) loss 1.7493 (1.1889) lr 5.7422e-04 eta 14:00:32
epoch [38/50] batch [1200/2000] time 2.055 (2.032) data 0.000 (0.001) loss 2.6722 (1.1887) lr 5.7422e-04 eta 13:59:49
epoch [38/50] batch [1220/2000] time 2.028 (2.032) data 0.000 (0.001) loss 3.0756 (1.1884) lr 5.7422e-04 eta 13:59:09
epoch [38/50] batch [1240/2000] time 2.029 (2.032) data 0.000 (0.001) loss 0.9308 (1.1910) lr 5.7422e-04 eta 13:58:30
epoch [38/50] batch [1260/2000] time 2.031 (2.032) data 0.000 (0.001) loss 4.2123 (1.1939) lr 5.7422e-04 eta 13:57:48
epoch [38/50] batch [1280/2000] time 2.050 (2.032) data 0.000 (0.001) loss 0.1623 (1.1912) lr 5.7422e-04 eta 13:57:07
epoch [38/50] batch [1300/2000] time 2.048 (2.032) data 0.000 (0.001) loss 0.9268 (1.1908) lr 5.7422e-04 eta 13:56:23
epoch [38/50] batch [1320/2000] time 2.051 (2.032) data 0.000 (0.001) loss 1.1918 (1.1868) lr 5.7422e-04 eta 13:55:41
epoch [38/50] batch [1340/2000] time 2.027 (2.032) data 0.000 (0.001) loss 0.6674 (1.1880) lr 5.7422e-04 eta 13:54:59
epoch [38/50] batch [1360/2000] time 2.049 (2.032) data 0.000 (0.001) loss 0.6467 (1.1921) lr 5.7422e-04 eta 13:54:18
epoch [38/50] batch [1380/2000] time 2.029 (2.032) data 0.000 (0.001) loss 1.5992 (1.1912) lr 5.7422e-04 eta 13:53:37
epoch [38/50] batch [1400/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.6096 (1.1875) lr 5.7422e-04 eta 13:52:54
epoch [38/50] batch [1420/2000] time 2.027 (2.031) data 0.000 (0.001) loss 0.2035 (1.1849) lr 5.7422e-04 eta 13:52:09
epoch [38/50] batch [1440/2000] time 2.000 (2.031) data 0.000 (0.001) loss 0.6683 (1.1885) lr 5.7422e-04 eta 13:51:25
epoch [38/50] batch [1460/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.0694 (1.1860) lr 5.7422e-04 eta 13:50:44
epoch [38/50] batch [1480/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.4957 (1.1863) lr 5.7422e-04 eta 13:50:03
epoch [38/50] batch [1500/2000] time 2.050 (2.031) data 0.000 (0.001) loss 1.1089 (1.1868) lr 5.7422e-04 eta 13:49:22
epoch [38/50] batch [1520/2000] time 2.051 (2.031) data 0.000 (0.001) loss 3.3106 (1.1872) lr 5.7422e-04 eta 13:48:42
epoch [38/50] batch [1540/2000] time 2.000 (2.031) data 0.000 (0.001) loss 1.9363 (1.1829) lr 5.7422e-04 eta 13:48:01
epoch [38/50] batch [1560/2000] time 2.000 (2.031) data 0.000 (0.001) loss 0.1597 (1.1887) lr 5.7422e-04 eta 13:47:19
epoch [38/50] batch [1580/2000] time 2.054 (2.031) data 0.000 (0.001) loss 0.6438 (1.1898) lr 5.7422e-04 eta 13:46:39
epoch [38/50] batch [1600/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.9521 (1.1906) lr 5.7422e-04 eta 13:45:58
epoch [38/50] batch [1620/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.1897 (1.1928) lr 5.7422e-04 eta 13:45:15
epoch [38/50] batch [1640/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.2320 (1.1903) lr 5.7422e-04 eta 13:44:35
epoch [38/50] batch [1660/2000] time 2.049 (2.031) data 0.000 (0.001) loss 0.8781 (1.1956) lr 5.7422e-04 eta 13:43:54
epoch [38/50] batch [1680/2000] time 1.972 (2.031) data 0.001 (0.001) loss 1.4315 (1.1996) lr 5.7422e-04 eta 13:43:12
epoch [38/50] batch [1700/2000] time 2.054 (2.031) data 0.000 (0.001) loss 2.5708 (1.2009) lr 5.7422e-04 eta 13:42:32
epoch [38/50] batch [1720/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.4636 (1.1983) lr 5.7422e-04 eta 13:41:50
epoch [38/50] batch [1740/2000] time 2.026 (2.031) data 0.000 (0.001) loss 0.1214 (1.1984) lr 5.7422e-04 eta 13:41:07
epoch [38/50] batch [1760/2000] time 1.973 (2.031) data 0.000 (0.001) loss 0.7818 (1.2010) lr 5.7422e-04 eta 13:40:25
epoch [38/50] batch [1780/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.1135 (1.1978) lr 5.7422e-04 eta 13:39:44
epoch [38/50] batch [1800/2000] time 1.996 (2.031) data 0.000 (0.001) loss 0.3232 (1.1998) lr 5.7422e-04 eta 13:39:02
epoch [38/50] batch [1820/2000] time 2.028 (2.031) data 0.000 (0.001) loss 1.4722 (1.2009) lr 5.7422e-04 eta 13:38:21
epoch [38/50] batch [1840/2000] time 2.026 (2.031) data 0.000 (0.000) loss 0.7150 (1.1989) lr 5.7422e-04 eta 13:37:39
epoch [38/50] batch [1860/2000] time 2.050 (2.031) data 0.000 (0.000) loss 1.9339 (1.2005) lr 5.7422e-04 eta 13:36:57
epoch [38/50] batch [1880/2000] time 1.974 (2.031) data 0.000 (0.000) loss 0.6520 (1.2005) lr 5.7422e-04 eta 13:36:16
epoch [38/50] batch [1900/2000] time 1.999 (2.030) data 0.000 (0.000) loss 1.0696 (1.2029) lr 5.7422e-04 eta 13:35:33
epoch [38/50] batch [1920/2000] time 2.002 (2.030) data 0.000 (0.000) loss 1.9259 (1.2022) lr 5.7422e-04 eta 13:34:53
epoch [38/50] batch [1940/2000] time 2.032 (2.030) data 0.000 (0.000) loss 1.0802 (1.2036) lr 5.7422e-04 eta 13:34:12
epoch [38/50] batch [1960/2000] time 1.995 (2.030) data 0.000 (0.000) loss 0.6421 (1.2031) lr 5.7422e-04 eta 13:33:32
epoch [38/50] batch [1980/2000] time 1.996 (2.030) data 0.000 (0.000) loss 1.1471 (1.2032) lr 5.7422e-04 eta 13:32:49
epoch [38/50] batch [2000/2000] time 2.048 (2.030) data 0.000 (0.000) loss 3.1664 (1.2025) lr 5.1825e-04 eta 13:32:09
epoch [39/50] batch [20/2000] time 1.974 (2.051) data 0.000 (0.027) loss 0.8846 (1.4722) lr 5.1825e-04 eta 13:39:41
epoch [39/50] batch [40/2000] time 1.999 (2.035) data 0.000 (0.014) loss 0.9832 (1.2577) lr 5.1825e-04 eta 13:32:47
epoch [39/50] batch [60/2000] time 2.028 (2.033) data 0.001 (0.009) loss 1.6961 (1.3456) lr 5.1825e-04 eta 13:30:58
epoch [39/50] batch [80/2000] time 2.048 (2.030) data 0.000 (0.007) loss 2.5814 (1.3617) lr 5.1825e-04 eta 13:29:23
epoch [39/50] batch [100/2000] time 2.052 (2.029) data 0.000 (0.006) loss 1.8667 (1.4033) lr 5.1825e-04 eta 13:28:15
epoch [39/50] batch [120/2000] time 2.028 (2.030) data 0.000 (0.005) loss 2.3427 (1.4007) lr 5.1825e-04 eta 13:27:52
epoch [39/50] batch [140/2000] time 2.054 (2.030) data 0.000 (0.004) loss 1.3944 (1.3756) lr 5.1825e-04 eta 13:27:17
epoch [39/50] batch [160/2000] time 2.055 (2.030) data 0.000 (0.004) loss 2.3234 (1.3571) lr 5.1825e-04 eta 13:26:36
epoch [39/50] batch [180/2000] time 1.999 (2.029) data 0.000 (0.003) loss 0.8735 (1.3081) lr 5.1825e-04 eta 13:25:41
epoch [39/50] batch [200/2000] time 1.997 (2.029) data 0.000 (0.003) loss 2.2223 (1.2960) lr 5.1825e-04 eta 13:24:59
epoch [39/50] batch [220/2000] time 2.056 (2.030) data 0.000 (0.003) loss 0.7739 (1.3071) lr 5.1825e-04 eta 13:24:29
epoch [39/50] batch [240/2000] time 2.051 (2.030) data 0.000 (0.002) loss 0.3843 (1.2669) lr 5.1825e-04 eta 13:23:46
epoch [39/50] batch [260/2000] time 2.053 (2.029) data 0.000 (0.002) loss 3.9875 (1.2635) lr 5.1825e-04 eta 13:22:59
epoch [39/50] batch [280/2000] time 2.053 (2.029) data 0.000 (0.002) loss 2.0576 (1.2516) lr 5.1825e-04 eta 13:22:18
epoch [39/50] batch [300/2000] time 1.997 (2.029) data 0.000 (0.002) loss 0.6198 (1.2389) lr 5.1825e-04 eta 13:21:25
epoch [39/50] batch [320/2000] time 1.997 (2.029) data 0.000 (0.002) loss 1.4171 (1.2352) lr 5.1825e-04 eta 13:20:42
epoch [39/50] batch [340/2000] time 1.997 (2.029) data 0.000 (0.002) loss 0.2483 (1.2202) lr 5.1825e-04 eta 13:20:04
epoch [39/50] batch [360/2000] time 2.055 (2.029) data 0.000 (0.002) loss 2.9216 (1.2423) lr 5.1825e-04 eta 13:19:21
epoch [39/50] batch [380/2000] time 2.027 (2.029) data 0.000 (0.002) loss 0.7577 (1.2333) lr 5.1825e-04 eta 13:18:35
epoch [39/50] batch [400/2000] time 2.055 (2.029) data 0.000 (0.002) loss 1.5478 (1.2164) lr 5.1825e-04 eta 13:17:59
epoch [39/50] batch [420/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.8666 (1.2257) lr 5.1825e-04 eta 13:17:23
epoch [39/50] batch [440/2000] time 2.028 (2.029) data 0.000 (0.001) loss 2.1116 (1.2312) lr 5.1825e-04 eta 13:16:43
epoch [39/50] batch [460/2000] time 2.048 (2.029) data 0.000 (0.001) loss 2.1684 (1.2380) lr 5.1825e-04 eta 13:16:02
epoch [39/50] batch [480/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.3626 (1.2279) lr 5.1825e-04 eta 13:15:26
epoch [39/50] batch [500/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.2171 (1.2094) lr 5.1825e-04 eta 13:14:49
epoch [39/50] batch [520/2000] time 2.028 (2.030) data 0.000 (0.001) loss 2.5714 (1.2067) lr 5.1825e-04 eta 13:14:16
epoch [39/50] batch [540/2000] time 2.028 (2.030) data 0.000 (0.001) loss 1.1364 (1.2127) lr 5.1825e-04 eta 13:13:33
epoch [39/50] batch [560/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.3566 (1.2089) lr 5.1825e-04 eta 13:12:49
epoch [39/50] batch [580/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.8097 (1.1936) lr 5.1825e-04 eta 13:12:07
epoch [39/50] batch [600/2000] time 2.051 (2.029) data 0.001 (0.001) loss 1.6482 (1.1895) lr 5.1825e-04 eta 13:11:28
epoch [39/50] batch [620/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.9711 (1.1856) lr 5.1825e-04 eta 13:10:42
epoch [39/50] batch [640/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.6384 (1.1835) lr 5.1825e-04 eta 13:10:05
epoch [39/50] batch [660/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.8945 (1.1846) lr 5.1825e-04 eta 13:09:22
epoch [39/50] batch [680/2000] time 2.030 (2.029) data 0.000 (0.001) loss 2.6783 (1.1876) lr 5.1825e-04 eta 13:08:42
epoch [39/50] batch [700/2000] time 1.976 (2.029) data 0.000 (0.001) loss 0.8291 (1.1827) lr 5.1825e-04 eta 13:07:58
epoch [39/50] batch [720/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.7659 (1.1730) lr 5.1825e-04 eta 13:07:19
epoch [39/50] batch [740/2000] time 2.056 (2.029) data 0.000 (0.001) loss 0.0381 (1.1719) lr 5.1825e-04 eta 13:06:39
epoch [39/50] batch [760/2000] time 1.998 (2.029) data 0.000 (0.001) loss 2.7113 (1.1772) lr 5.1825e-04 eta 13:05:56
epoch [39/50] batch [780/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.1679 (1.1739) lr 5.1825e-04 eta 13:05:14
epoch [39/50] batch [800/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.7566 (1.1765) lr 5.1825e-04 eta 13:04:30
epoch [39/50] batch [820/2000] time 2.055 (2.029) data 0.000 (0.001) loss 0.0344 (1.1820) lr 5.1825e-04 eta 13:03:47
epoch [39/50] batch [840/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.9294 (1.1845) lr 5.1825e-04 eta 13:03:09
epoch [39/50] batch [860/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.0832 (1.1829) lr 5.1825e-04 eta 13:02:26
epoch [39/50] batch [880/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.6997 (1.1790) lr 5.1825e-04 eta 13:01:45
epoch [39/50] batch [900/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.2824 (1.1774) lr 5.1825e-04 eta 13:01:05
epoch [39/50] batch [920/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.1086 (1.1715) lr 5.1825e-04 eta 13:00:23
epoch [39/50] batch [940/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.1073 (1.1763) lr 5.1825e-04 eta 12:59:41
epoch [39/50] batch [960/2000] time 2.004 (2.029) data 0.000 (0.001) loss 0.6258 (1.1764) lr 5.1825e-04 eta 12:59:01
epoch [39/50] batch [980/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.3540 (1.1759) lr 5.1825e-04 eta 12:58:24
epoch [39/50] batch [1000/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.5305 (1.1768) lr 5.1825e-04 eta 12:57:43
epoch [39/50] batch [1020/2000] time 2.049 (2.029) data 0.000 (0.001) loss 3.4125 (1.1827) lr 5.1825e-04 eta 12:57:03
epoch [39/50] batch [1040/2000] time 2.057 (2.029) data 0.000 (0.001) loss 0.7514 (1.1805) lr 5.1825e-04 eta 12:56:24
epoch [39/50] batch [1060/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.2427 (1.1818) lr 5.1825e-04 eta 12:55:43
epoch [39/50] batch [1080/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.0548 (1.1818) lr 5.1825e-04 eta 12:55:06
epoch [39/50] batch [1100/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.4310 (1.1810) lr 5.1825e-04 eta 12:54:25
epoch [39/50] batch [1120/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.2290 (1.1821) lr 5.1825e-04 eta 12:53:44
epoch [39/50] batch [1140/2000] time 2.027 (2.029) data 0.001 (0.001) loss 0.5171 (1.1817) lr 5.1825e-04 eta 12:53:03
epoch [39/50] batch [1160/2000] time 2.027 (2.029) data 0.000 (0.001) loss 3.5101 (1.1824) lr 5.1825e-04 eta 12:52:23
epoch [39/50] batch [1180/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.9568 (1.1778) lr 5.1825e-04 eta 12:51:43
epoch [39/50] batch [1200/2000] time 2.047 (2.029) data 0.000 (0.001) loss 2.0906 (1.1767) lr 5.1825e-04 eta 12:51:04
epoch [39/50] batch [1220/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.1528 (1.1803) lr 5.1825e-04 eta 12:50:21
epoch [39/50] batch [1240/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.0431 (1.1768) lr 5.1825e-04 eta 12:49:38
epoch [39/50] batch [1260/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.0856 (1.1764) lr 5.1825e-04 eta 12:48:56
epoch [39/50] batch [1280/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.0291 (1.1771) lr 5.1825e-04 eta 12:48:15
epoch [39/50] batch [1300/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.2535 (1.1784) lr 5.1825e-04 eta 12:47:33
epoch [39/50] batch [1320/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.2025 (1.1773) lr 5.1825e-04 eta 12:46:54
epoch [39/50] batch [1340/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.1024 (1.1769) lr 5.1825e-04 eta 12:46:12
epoch [39/50] batch [1360/2000] time 1.995 (2.029) data 0.000 (0.001) loss 0.2264 (1.1805) lr 5.1825e-04 eta 12:45:32
epoch [39/50] batch [1380/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.7304 (1.1804) lr 5.1825e-04 eta 12:44:53
epoch [39/50] batch [1400/2000] time 2.056 (2.029) data 0.000 (0.001) loss 0.8148 (1.1796) lr 5.1825e-04 eta 12:44:12
epoch [39/50] batch [1420/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.2614 (1.1772) lr 5.1825e-04 eta 12:43:31
epoch [39/50] batch [1440/2000] time 2.047 (2.029) data 0.000 (0.001) loss 1.0921 (1.1749) lr 5.1825e-04 eta 12:42:51
epoch [39/50] batch [1460/2000] time 2.001 (2.029) data 0.000 (0.001) loss 2.5899 (1.1782) lr 5.1825e-04 eta 12:42:10
epoch [39/50] batch [1480/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.9193 (1.1800) lr 5.1825e-04 eta 12:41:31
epoch [39/50] batch [1500/2000] time 2.028 (2.029) data 0.000 (0.001) loss 2.4502 (1.1796) lr 5.1825e-04 eta 12:40:50
epoch [39/50] batch [1520/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.7138 (1.1799) lr 5.1825e-04 eta 12:40:08
epoch [39/50] batch [1540/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.0438 (1.1812) lr 5.1825e-04 eta 12:39:29
epoch [39/50] batch [1560/2000] time 2.052 (2.029) data 0.000 (0.001) loss 2.1890 (1.1778) lr 5.1825e-04 eta 12:38:51
epoch [39/50] batch [1580/2000] time 1.999 (2.029) data 0.000 (0.001) loss 3.0544 (1.1794) lr 5.1825e-04 eta 12:38:09
epoch [39/50] batch [1600/2000] time 2.053 (2.029) data 0.000 (0.001) loss 2.1246 (1.1841) lr 5.1825e-04 eta 12:37:29
epoch [39/50] batch [1620/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.6149 (1.1862) lr 5.1825e-04 eta 12:36:50
epoch [39/50] batch [1640/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.6016 (1.1864) lr 5.1825e-04 eta 12:36:11
epoch [39/50] batch [1660/2000] time 2.031 (2.029) data 0.000 (0.001) loss 1.6132 (1.1899) lr 5.1825e-04 eta 12:35:29
epoch [39/50] batch [1680/2000] time 2.049 (2.029) data 0.001 (0.001) loss 1.2288 (1.1916) lr 5.1825e-04 eta 12:34:51
epoch [39/50] batch [1700/2000] time 2.053 (2.029) data 0.000 (0.001) loss 2.0742 (1.1917) lr 5.1825e-04 eta 12:34:11
epoch [39/50] batch [1720/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.6553 (1.1934) lr 5.1825e-04 eta 12:33:32
epoch [39/50] batch [1740/2000] time 2.052 (2.029) data 0.000 (0.001) loss 3.2125 (1.1918) lr 5.1825e-04 eta 12:32:52
epoch [39/50] batch [1760/2000] time 2.051 (2.029) data 0.000 (0.001) loss 2.1167 (1.1959) lr 5.1825e-04 eta 12:32:11
epoch [39/50] batch [1780/2000] time 2.053 (2.029) data 0.000 (0.000) loss 1.2488 (1.1925) lr 5.1825e-04 eta 12:31:31
epoch [39/50] batch [1800/2000] time 1.976 (2.029) data 0.000 (0.000) loss 2.3268 (1.1966) lr 5.1825e-04 eta 12:30:49
epoch [39/50] batch [1820/2000] time 2.053 (2.029) data 0.000 (0.000) loss 0.9801 (1.1952) lr 5.1825e-04 eta 12:30:08
epoch [39/50] batch [1840/2000] time 2.055 (2.029) data 0.000 (0.000) loss 2.9881 (1.1989) lr 5.1825e-04 eta 12:29:28
epoch [39/50] batch [1860/2000] time 1.997 (2.029) data 0.000 (0.000) loss 0.3411 (1.1991) lr 5.1825e-04 eta 12:28:47
epoch [39/50] batch [1880/2000] time 2.054 (2.029) data 0.000 (0.000) loss 1.5046 (1.1981) lr 5.1825e-04 eta 12:28:08
epoch [39/50] batch [1900/2000] time 2.027 (2.029) data 0.000 (0.000) loss 1.0920 (1.2012) lr 5.1825e-04 eta 12:27:27
epoch [39/50] batch [1920/2000] time 2.049 (2.029) data 0.000 (0.000) loss 2.6648 (1.2034) lr 5.1825e-04 eta 12:26:46
epoch [39/50] batch [1940/2000] time 2.050 (2.029) data 0.000 (0.000) loss 3.5750 (1.2038) lr 5.1825e-04 eta 12:26:06
epoch [39/50] batch [1960/2000] time 2.059 (2.029) data 0.000 (0.000) loss 0.2065 (1.1995) lr 5.1825e-04 eta 12:25:25
epoch [39/50] batch [1980/2000] time 1.999 (2.029) data 0.000 (0.000) loss 2.4174 (1.2003) lr 5.1825e-04 eta 12:24:44
epoch [39/50] batch [2000/2000] time 2.049 (2.029) data 0.000 (0.000) loss 0.3267 (1.1989) lr 4.6417e-04 eta 12:24:05
epoch [40/50] batch [20/2000] time 2.054 (2.065) data 0.000 (0.027) loss 2.1864 (1.0177) lr 4.6417e-04 eta 12:36:22
epoch [40/50] batch [40/2000] time 1.974 (2.047) data 0.000 (0.014) loss 0.8990 (1.1666) lr 4.6417e-04 eta 12:29:05
epoch [40/50] batch [60/2000] time 2.054 (2.041) data 0.001 (0.009) loss 2.1873 (1.0781) lr 4.6417e-04 eta 12:26:12
epoch [40/50] batch [80/2000] time 2.027 (2.037) data 0.000 (0.007) loss 1.2962 (1.0716) lr 4.6417e-04 eta 12:24:12
epoch [40/50] batch [100/2000] time 2.055 (2.036) data 0.000 (0.006) loss 0.5392 (1.0938) lr 4.6417e-04 eta 12:23:00
epoch [40/50] batch [120/2000] time 2.054 (2.035) data 0.000 (0.005) loss 1.5016 (1.0872) lr 4.6417e-04 eta 12:22:05
epoch [40/50] batch [140/2000] time 2.035 (2.034) data 0.000 (0.004) loss 0.9891 (1.1376) lr 4.6417e-04 eta 12:21:02
epoch [40/50] batch [160/2000] time 2.038 (2.034) data 0.000 (0.004) loss 1.9584 (1.1271) lr 4.6417e-04 eta 12:20:22
epoch [40/50] batch [180/2000] time 1.998 (2.033) data 0.000 (0.003) loss 0.2910 (1.1095) lr 4.6417e-04 eta 12:19:21
epoch [40/50] batch [200/2000] time 2.050 (2.033) data 0.000 (0.003) loss 0.3679 (1.1370) lr 4.6417e-04 eta 12:18:41
epoch [40/50] batch [220/2000] time 2.051 (2.032) data 0.000 (0.003) loss 0.6381 (1.1297) lr 4.6417e-04 eta 12:17:39
epoch [40/50] batch [240/2000] time 2.000 (2.032) data 0.000 (0.002) loss 0.3927 (1.1304) lr 4.6417e-04 eta 12:17:03
epoch [40/50] batch [260/2000] time 2.051 (2.032) data 0.000 (0.002) loss 2.8629 (1.1486) lr 4.6417e-04 eta 12:16:12
epoch [40/50] batch [280/2000] time 1.999 (2.032) data 0.000 (0.002) loss 0.3917 (1.1305) lr 4.6417e-04 eta 12:15:26
epoch [40/50] batch [300/2000] time 2.050 (2.032) data 0.000 (0.002) loss 0.0958 (1.1098) lr 4.6417e-04 eta 12:14:49
epoch [40/50] batch [320/2000] time 1.997 (2.032) data 0.000 (0.002) loss 0.0834 (1.1166) lr 4.6417e-04 eta 12:14:05
epoch [40/50] batch [340/2000] time 2.050 (2.031) data 0.000 (0.002) loss 1.5797 (1.1122) lr 4.6417e-04 eta 12:13:20
epoch [40/50] batch [360/2000] time 2.028 (2.031) data 0.000 (0.002) loss 0.4538 (1.1147) lr 4.6417e-04 eta 12:12:40
epoch [40/50] batch [380/2000] time 1.997 (2.031) data 0.000 (0.002) loss 1.3289 (1.1341) lr 4.6417e-04 eta 12:12:00
epoch [40/50] batch [400/2000] time 2.053 (2.031) data 0.000 (0.002) loss 1.6113 (1.1289) lr 4.6417e-04 eta 12:11:18
epoch [40/50] batch [420/2000] time 2.027 (2.031) data 0.000 (0.001) loss 0.1064 (1.1313) lr 4.6417e-04 eta 12:10:36
epoch [40/50] batch [440/2000] time 1.974 (2.031) data 0.000 (0.001) loss 1.2195 (1.1254) lr 4.6417e-04 eta 12:09:50
epoch [40/50] batch [460/2000] time 2.055 (2.031) data 0.000 (0.001) loss 0.9127 (1.1358) lr 4.6417e-04 eta 12:09:12
epoch [40/50] batch [480/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.7054 (1.1553) lr 4.6417e-04 eta 12:08:35
epoch [40/50] batch [500/2000] time 1.999 (2.031) data 0.000 (0.001) loss 2.4729 (1.1472) lr 4.6417e-04 eta 12:07:55
epoch [40/50] batch [520/2000] time 2.060 (2.031) data 0.000 (0.001) loss 2.3794 (1.1454) lr 4.6417e-04 eta 12:07:14
epoch [40/50] batch [540/2000] time 1.975 (2.032) data 0.000 (0.001) loss 0.0203 (1.1563) lr 4.6417e-04 eta 12:06:37
epoch [40/50] batch [560/2000] time 2.001 (2.032) data 0.000 (0.001) loss 0.4644 (1.1572) lr 4.6417e-04 eta 12:05:57
epoch [40/50] batch [580/2000] time 2.050 (2.032) data 0.000 (0.001) loss 0.2828 (1.1514) lr 4.6417e-04 eta 12:05:18
epoch [40/50] batch [600/2000] time 2.028 (2.032) data 0.001 (0.001) loss 0.5634 (1.1665) lr 4.6417e-04 eta 12:04:37
epoch [40/50] batch [620/2000] time 1.978 (2.032) data 0.000 (0.001) loss 0.0999 (1.1774) lr 4.6417e-04 eta 12:03:55
epoch [40/50] batch [640/2000] time 2.050 (2.032) data 0.000 (0.001) loss 2.4325 (1.1736) lr 4.6417e-04 eta 12:03:15
epoch [40/50] batch [660/2000] time 1.996 (2.032) data 0.000 (0.001) loss 0.1724 (1.1786) lr 4.6417e-04 eta 12:02:34
epoch [40/50] batch [680/2000] time 2.050 (2.031) data 0.000 (0.001) loss 1.3368 (1.1723) lr 4.6417e-04 eta 12:01:48
epoch [40/50] batch [700/2000] time 2.049 (2.031) data 0.000 (0.001) loss 0.7053 (1.1674) lr 4.6417e-04 eta 12:01:08
epoch [40/50] batch [720/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.2545 (1.1686) lr 4.6417e-04 eta 12:00:24
epoch [40/50] batch [740/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.1312 (1.1638) lr 4.6417e-04 eta 11:59:38
epoch [40/50] batch [760/2000] time 2.055 (2.031) data 0.000 (0.001) loss 0.3781 (1.1724) lr 4.6417e-04 eta 11:59:01
epoch [40/50] batch [780/2000] time 1.973 (2.031) data 0.000 (0.001) loss 2.4302 (1.1783) lr 4.6417e-04 eta 11:58:16
epoch [40/50] batch [800/2000] time 1.975 (2.031) data 0.000 (0.001) loss 1.4660 (1.1782) lr 4.6417e-04 eta 11:57:33
epoch [40/50] batch [820/2000] time 2.031 (2.031) data 0.000 (0.001) loss 0.8082 (1.1794) lr 4.6417e-04 eta 11:56:52
epoch [40/50] batch [840/2000] time 1.995 (2.031) data 0.000 (0.001) loss 2.0863 (1.1768) lr 4.6417e-04 eta 11:56:13
epoch [40/50] batch [860/2000] time 2.050 (2.031) data 0.000 (0.001) loss 1.6099 (1.1741) lr 4.6417e-04 eta 11:55:30
epoch [40/50] batch [880/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.2887 (1.1775) lr 4.6417e-04 eta 11:54:45
epoch [40/50] batch [900/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.3374 (1.1825) lr 4.6417e-04 eta 11:54:03
epoch [40/50] batch [920/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.9219 (1.1757) lr 4.6417e-04 eta 11:53:21
epoch [40/50] batch [940/2000] time 2.047 (2.031) data 0.000 (0.001) loss 0.7599 (1.1737) lr 4.6417e-04 eta 11:52:42
epoch [40/50] batch [960/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.5511 (1.1751) lr 4.6417e-04 eta 11:52:02
epoch [40/50] batch [980/2000] time 1.996 (2.031) data 0.000 (0.001) loss 1.1435 (1.1755) lr 4.6417e-04 eta 11:51:23
epoch [40/50] batch [1000/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.6172 (1.1650) lr 4.6417e-04 eta 11:50:42
epoch [40/50] batch [1020/2000] time 2.030 (2.031) data 0.000 (0.001) loss 0.7933 (1.1609) lr 4.6417e-04 eta 11:50:03
epoch [40/50] batch [1040/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.4876 (1.1645) lr 4.6417e-04 eta 11:49:25
epoch [40/50] batch [1060/2000] time 2.025 (2.031) data 0.000 (0.001) loss 3.2877 (1.1675) lr 4.6417e-04 eta 11:48:43
epoch [40/50] batch [1080/2000] time 2.026 (2.031) data 0.000 (0.001) loss 1.6088 (1.1636) lr 4.6417e-04 eta 11:48:03
epoch [40/50] batch [1100/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.0149 (1.1671) lr 4.6417e-04 eta 11:47:20
epoch [40/50] batch [1120/2000] time 2.029 (2.031) data 0.000 (0.001) loss 2.7959 (1.1725) lr 4.6417e-04 eta 11:46:42
epoch [40/50] batch [1140/2000] time 2.032 (2.031) data 0.001 (0.001) loss 1.4643 (1.1703) lr 4.6417e-04 eta 11:46:00
epoch [40/50] batch [1160/2000] time 2.049 (2.031) data 0.000 (0.001) loss 0.9670 (1.1709) lr 4.6417e-04 eta 11:45:19
epoch [40/50] batch [1180/2000] time 2.024 (2.031) data 0.000 (0.001) loss 1.6653 (1.1716) lr 4.6417e-04 eta 11:44:37
epoch [40/50] batch [1200/2000] time 2.019 (2.031) data 0.000 (0.001) loss 0.6551 (1.1774) lr 4.6417e-04 eta 11:43:54
epoch [40/50] batch [1220/2000] time 1.991 (2.030) data 0.000 (0.001) loss 0.4820 (1.1808) lr 4.6417e-04 eta 11:43:11
epoch [40/50] batch [1240/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.9242 (1.1796) lr 4.6417e-04 eta 11:42:28
epoch [40/50] batch [1260/2000] time 1.995 (2.030) data 0.000 (0.001) loss 0.7547 (1.1851) lr 4.6417e-04 eta 11:41:45
epoch [40/50] batch [1280/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.9467 (1.1902) lr 4.6417e-04 eta 11:41:02
epoch [40/50] batch [1300/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.5445 (1.1942) lr 4.6417e-04 eta 11:40:19
epoch [40/50] batch [1320/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.1938 (1.1941) lr 4.6417e-04 eta 11:39:38
epoch [40/50] batch [1340/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.3463 (1.1903) lr 4.6417e-04 eta 11:38:56
epoch [40/50] batch [1360/2000] time 2.053 (2.030) data 0.000 (0.001) loss 2.4230 (1.1892) lr 4.6417e-04 eta 11:38:15
epoch [40/50] batch [1380/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.2995 (1.2005) lr 4.6417e-04 eta 11:37:32
epoch [40/50] batch [1400/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.1246 (1.2070) lr 4.6417e-04 eta 11:36:52
epoch [40/50] batch [1420/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.3108 (1.2086) lr 4.6417e-04 eta 11:36:12
epoch [40/50] batch [1440/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.6623 (1.2081) lr 4.6417e-04 eta 11:35:30
epoch [40/50] batch [1460/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.7875 (1.2038) lr 4.6417e-04 eta 11:34:46
epoch [40/50] batch [1480/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.4269 (1.2059) lr 4.6417e-04 eta 11:34:06
epoch [40/50] batch [1500/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.1504 (1.2071) lr 4.6417e-04 eta 11:33:23
epoch [40/50] batch [1520/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.2554 (1.2124) lr 4.6417e-04 eta 11:32:46
epoch [40/50] batch [1540/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.1415 (1.2069) lr 4.6417e-04 eta 11:32:05
epoch [40/50] batch [1560/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.1987 (1.2073) lr 4.6417e-04 eta 11:31:24
epoch [40/50] batch [1580/2000] time 1.994 (2.030) data 0.000 (0.001) loss 1.7354 (1.2057) lr 4.6417e-04 eta 11:30:45
epoch [40/50] batch [1600/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.7372 (1.2028) lr 4.6417e-04 eta 11:30:03
epoch [40/50] batch [1620/2000] time 2.025 (2.030) data 0.000 (0.001) loss 0.3529 (1.1977) lr 4.6417e-04 eta 11:29:22
epoch [40/50] batch [1640/2000] time 2.049 (2.030) data 0.000 (0.001) loss 2.8803 (1.1998) lr 4.6417e-04 eta 11:28:42
epoch [40/50] batch [1660/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.6003 (1.1997) lr 4.6417e-04 eta 11:28:01
epoch [40/50] batch [1680/2000] time 2.026 (2.030) data 0.001 (0.001) loss 1.7693 (1.2018) lr 4.6417e-04 eta 11:27:21
epoch [40/50] batch [1700/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.5101 (1.1987) lr 4.6417e-04 eta 11:26:41
epoch [40/50] batch [1720/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.0337 (1.1990) lr 4.6417e-04 eta 11:26:00
epoch [40/50] batch [1740/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.3899 (1.1972) lr 4.6417e-04 eta 11:25:18
epoch [40/50] batch [1760/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.4040 (1.1949) lr 4.6417e-04 eta 11:24:36
epoch [40/50] batch [1780/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.7293 (1.1961) lr 4.6417e-04 eta 11:23:55
epoch [40/50] batch [1800/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.5547 (1.1938) lr 4.6417e-04 eta 11:23:14
epoch [40/50] batch [1820/2000] time 2.055 (2.029) data 0.000 (0.001) loss 1.5324 (1.1964) lr 4.6417e-04 eta 11:22:32
epoch [40/50] batch [1840/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.4614 (1.1962) lr 4.6417e-04 eta 11:21:52
epoch [40/50] batch [1860/2000] time 2.055 (2.029) data 0.000 (0.001) loss 0.6783 (1.1977) lr 4.6417e-04 eta 11:21:12
epoch [40/50] batch [1880/2000] time 2.029 (2.029) data 0.000 (0.000) loss 0.6244 (1.1944) lr 4.6417e-04 eta 11:20:30
epoch [40/50] batch [1900/2000] time 2.051 (2.029) data 0.000 (0.000) loss 0.3079 (1.1943) lr 4.6417e-04 eta 11:19:51
epoch [40/50] batch [1920/2000] time 1.997 (2.030) data 0.000 (0.000) loss 0.6569 (1.1938) lr 4.6417e-04 eta 11:19:12
epoch [40/50] batch [1940/2000] time 2.028 (2.029) data 0.000 (0.000) loss 0.3028 (1.1988) lr 4.6417e-04 eta 11:18:31
epoch [40/50] batch [1960/2000] time 2.055 (2.030) data 0.000 (0.000) loss 0.7619 (1.1985) lr 4.6417e-04 eta 11:17:51
epoch [40/50] batch [1980/2000] time 2.052 (2.030) data 0.000 (0.000) loss 1.2131 (1.1982) lr 4.6417e-04 eta 11:17:11
epoch [40/50] batch [2000/2000] time 1.998 (2.030) data 0.000 (0.000) loss 0.5469 (1.1942) lr 4.1221e-04 eta 11:16:30
epoch [41/50] batch [20/2000] time 1.998 (2.049) data 0.000 (0.026) loss 1.5170 (0.9869) lr 4.1221e-04 eta 11:22:23
epoch [41/50] batch [40/2000] time 2.052 (2.042) data 0.000 (0.013) loss 0.9703 (1.0219) lr 4.1221e-04 eta 11:19:17
epoch [41/50] batch [60/2000] time 2.029 (2.038) data 0.001 (0.009) loss 0.5746 (1.0355) lr 4.1221e-04 eta 11:17:26
epoch [41/50] batch [80/2000] time 2.030 (2.038) data 0.000 (0.007) loss 0.8148 (0.9949) lr 4.1221e-04 eta 11:16:30
epoch [41/50] batch [100/2000] time 1.998 (2.035) data 0.000 (0.005) loss 1.5502 (0.9931) lr 4.1221e-04 eta 11:14:50
epoch [41/50] batch [120/2000] time 2.052 (2.035) data 0.000 (0.005) loss 0.1561 (1.0197) lr 4.1221e-04 eta 11:14:18
epoch [41/50] batch [140/2000] time 1.998 (2.035) data 0.000 (0.004) loss 2.4697 (1.0378) lr 4.1221e-04 eta 11:13:26
epoch [41/50] batch [160/2000] time 2.025 (2.033) data 0.000 (0.003) loss 0.5340 (1.1103) lr 4.1221e-04 eta 11:12:14
epoch [41/50] batch [180/2000] time 1.971 (2.032) data 0.000 (0.003) loss 0.1078 (1.1024) lr 4.1221e-04 eta 11:11:22
epoch [41/50] batch [200/2000] time 2.049 (2.031) data 0.000 (0.003) loss 0.1418 (1.1027) lr 4.1221e-04 eta 11:10:10
epoch [41/50] batch [220/2000] time 2.027 (2.031) data 0.000 (0.003) loss 0.1267 (1.1171) lr 4.1221e-04 eta 11:09:31
epoch [41/50] batch [240/2000] time 2.057 (2.031) data 0.000 (0.002) loss 3.2445 (1.1345) lr 4.1221e-04 eta 11:08:56
epoch [41/50] batch [260/2000] time 1.998 (2.031) data 0.000 (0.002) loss 1.0960 (1.1485) lr 4.1221e-04 eta 11:08:10
epoch [41/50] batch [280/2000] time 2.029 (2.030) data 0.000 (0.002) loss 1.0308 (1.1423) lr 4.1221e-04 eta 11:07:20
epoch [41/50] batch [300/2000] time 2.050 (2.030) data 0.000 (0.002) loss 1.1123 (1.1530) lr 4.1221e-04 eta 11:06:36
epoch [41/50] batch [320/2000] time 2.029 (2.030) data 0.000 (0.002) loss 0.3007 (1.1559) lr 4.1221e-04 eta 11:05:45
epoch [41/50] batch [340/2000] time 2.051 (2.030) data 0.000 (0.002) loss 1.1364 (1.1503) lr 4.1221e-04 eta 11:05:10
epoch [41/50] batch [360/2000] time 2.052 (2.030) data 0.000 (0.002) loss 0.8463 (1.1384) lr 4.1221e-04 eta 11:04:23
epoch [41/50] batch [380/2000] time 2.002 (2.030) data 0.000 (0.002) loss 1.1733 (1.1346) lr 4.1221e-04 eta 11:03:44
epoch [41/50] batch [400/2000] time 2.051 (2.030) data 0.000 (0.002) loss 0.6964 (1.1463) lr 4.1221e-04 eta 11:03:03
epoch [41/50] batch [420/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.3323 (1.1522) lr 4.1221e-04 eta 11:02:25
epoch [41/50] batch [440/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.4320 (1.1567) lr 4.1221e-04 eta 11:01:51
epoch [41/50] batch [460/2000] time 1.997 (2.030) data 0.000 (0.001) loss 3.2620 (1.1697) lr 4.1221e-04 eta 11:01:09
epoch [41/50] batch [480/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.5189 (1.1771) lr 4.1221e-04 eta 11:00:28
epoch [41/50] batch [500/2000] time 2.026 (2.030) data 0.000 (0.001) loss 0.1682 (1.1601) lr 4.1221e-04 eta 10:59:50
epoch [41/50] batch [520/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.9437 (1.1772) lr 4.1221e-04 eta 10:59:12
epoch [41/50] batch [540/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.6677 (1.1788) lr 4.1221e-04 eta 10:58:33
epoch [41/50] batch [560/2000] time 2.031 (2.031) data 0.000 (0.001) loss 0.9028 (1.1805) lr 4.1221e-04 eta 10:57:53
epoch [41/50] batch [580/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.1457 (1.1806) lr 4.1221e-04 eta 10:57:07
epoch [41/50] batch [600/2000] time 2.051 (2.030) data 0.001 (0.001) loss 0.1400 (1.1937) lr 4.1221e-04 eta 10:56:25
epoch [41/50] batch [620/2000] time 1.975 (2.030) data 0.000 (0.001) loss 0.7035 (1.1963) lr 4.1221e-04 eta 10:55:45
epoch [41/50] batch [640/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.8149 (1.1930) lr 4.1221e-04 eta 10:54:58
epoch [41/50] batch [660/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.7343 (1.1967) lr 4.1221e-04 eta 10:54:14
epoch [41/50] batch [680/2000] time 1.978 (2.030) data 0.000 (0.001) loss 0.6808 (1.1910) lr 4.1221e-04 eta 10:53:35
epoch [41/50] batch [700/2000] time 2.001 (2.030) data 0.000 (0.001) loss 1.2297 (1.1956) lr 4.1221e-04 eta 10:52:56
epoch [41/50] batch [720/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.7471 (1.1995) lr 4.1221e-04 eta 10:52:15
epoch [41/50] batch [740/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.2600 (1.1970) lr 4.1221e-04 eta 10:51:36
epoch [41/50] batch [760/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.9630 (1.1970) lr 4.1221e-04 eta 10:50:57
epoch [41/50] batch [780/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.1381 (1.2022) lr 4.1221e-04 eta 10:50:18
epoch [41/50] batch [800/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.7574 (1.2018) lr 4.1221e-04 eta 10:49:37
epoch [41/50] batch [820/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.5224 (1.2010) lr 4.1221e-04 eta 10:48:58
epoch [41/50] batch [840/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.2944 (1.1971) lr 4.1221e-04 eta 10:48:22
epoch [41/50] batch [860/2000] time 2.030 (2.031) data 0.000 (0.001) loss 1.2541 (1.1919) lr 4.1221e-04 eta 10:47:44
epoch [41/50] batch [880/2000] time 2.050 (2.031) data 0.000 (0.001) loss 3.2182 (1.1990) lr 4.1221e-04 eta 10:47:05
epoch [41/50] batch [900/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.4492 (1.1978) lr 4.1221e-04 eta 10:46:24
epoch [41/50] batch [920/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.5492 (1.2031) lr 4.1221e-04 eta 10:45:40
epoch [41/50] batch [940/2000] time 2.003 (2.030) data 0.000 (0.001) loss 1.4662 (1.2092) lr 4.1221e-04 eta 10:44:57
epoch [41/50] batch [960/2000] time 1.977 (2.030) data 0.000 (0.001) loss 1.9641 (1.2077) lr 4.1221e-04 eta 10:44:17
epoch [41/50] batch [980/2000] time 2.027 (2.030) data 0.000 (0.001) loss 2.5217 (1.2132) lr 4.1221e-04 eta 10:43:35
epoch [41/50] batch [1000/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.1835 (1.2146) lr 4.1221e-04 eta 10:42:55
epoch [41/50] batch [1020/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.3496 (1.2122) lr 4.1221e-04 eta 10:42:14
epoch [41/50] batch [1040/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.1684 (1.2120) lr 4.1221e-04 eta 10:41:36
epoch [41/50] batch [1060/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.0368 (1.2085) lr 4.1221e-04 eta 10:40:54
epoch [41/50] batch [1080/2000] time 2.026 (2.030) data 0.000 (0.001) loss 1.4369 (1.2050) lr 4.1221e-04 eta 10:40:15
epoch [41/50] batch [1100/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.4181 (1.1995) lr 4.1221e-04 eta 10:39:33
epoch [41/50] batch [1120/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.4727 (1.2009) lr 4.1221e-04 eta 10:38:50
epoch [41/50] batch [1140/2000] time 2.051 (2.030) data 0.001 (0.001) loss 0.7879 (1.1986) lr 4.1221e-04 eta 10:38:10
epoch [41/50] batch [1160/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.3159 (1.1975) lr 4.1221e-04 eta 10:37:27
epoch [41/50] batch [1180/2000] time 1.999 (2.030) data 0.000 (0.001) loss 3.2218 (1.2025) lr 4.1221e-04 eta 10:36:47
epoch [41/50] batch [1200/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.3542 (1.2086) lr 4.1221e-04 eta 10:36:09
epoch [41/50] batch [1220/2000] time 2.056 (2.030) data 0.000 (0.001) loss 0.3201 (1.2084) lr 4.1221e-04 eta 10:35:29
epoch [41/50] batch [1240/2000] time 2.005 (2.030) data 0.000 (0.001) loss 2.2752 (1.2061) lr 4.1221e-04 eta 10:34:49
epoch [41/50] batch [1260/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.7434 (1.2079) lr 4.1221e-04 eta 10:34:09
epoch [41/50] batch [1280/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.2724 (1.2040) lr 4.1221e-04 eta 10:33:29
epoch [41/50] batch [1300/2000] time 1.977 (2.030) data 0.000 (0.001) loss 0.2990 (1.2015) lr 4.1221e-04 eta 10:32:49
epoch [41/50] batch [1320/2000] time 1.998 (2.030) data 0.000 (0.001) loss 2.2894 (1.2025) lr 4.1221e-04 eta 10:32:07
epoch [41/50] batch [1340/2000] time 2.050 (2.030) data 0.000 (0.001) loss 2.6908 (1.2008) lr 4.1221e-04 eta 10:31:28
epoch [41/50] batch [1360/2000] time 2.051 (2.030) data 0.000 (0.001) loss 2.0909 (1.1952) lr 4.1221e-04 eta 10:30:46
epoch [41/50] batch [1380/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.0947 (1.1907) lr 4.1221e-04 eta 10:30:06
epoch [41/50] batch [1400/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.9754 (1.1914) lr 4.1221e-04 eta 10:29:24
epoch [41/50] batch [1420/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.5017 (1.1913) lr 4.1221e-04 eta 10:28:45
epoch [41/50] batch [1440/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.7118 (1.1926) lr 4.1221e-04 eta 10:28:03
epoch [41/50] batch [1460/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.2695 (1.1908) lr 4.1221e-04 eta 10:27:22
epoch [41/50] batch [1480/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.7592 (1.1917) lr 4.1221e-04 eta 10:26:41
epoch [41/50] batch [1500/2000] time 1.973 (2.030) data 0.000 (0.001) loss 3.6028 (1.1932) lr 4.1221e-04 eta 10:25:58
epoch [41/50] batch [1520/2000] time 2.027 (2.030) data 0.000 (0.001) loss 2.1817 (1.1987) lr 4.1221e-04 eta 10:25:17
epoch [41/50] batch [1540/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.5237 (1.1956) lr 4.1221e-04 eta 10:24:35
epoch [41/50] batch [1560/2000] time 2.051 (2.030) data 0.000 (0.001) loss 2.6510 (1.1982) lr 4.1221e-04 eta 10:23:55
epoch [41/50] batch [1580/2000] time 2.002 (2.030) data 0.000 (0.001) loss 1.4691 (1.1979) lr 4.1221e-04 eta 10:23:15
epoch [41/50] batch [1600/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.1653 (1.1938) lr 4.1221e-04 eta 10:22:32
epoch [41/50] batch [1620/2000] time 2.030 (2.030) data 0.000 (0.001) loss 1.7646 (1.1975) lr 4.1221e-04 eta 10:21:50
epoch [41/50] batch [1640/2000] time 2.053 (2.030) data 0.000 (0.001) loss 2.1980 (1.1985) lr 4.1221e-04 eta 10:21:10
epoch [41/50] batch [1660/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.0749 (1.1981) lr 4.1221e-04 eta 10:20:29
epoch [41/50] batch [1680/2000] time 1.974 (2.030) data 0.001 (0.001) loss 0.8258 (1.1977) lr 4.1221e-04 eta 10:19:47
epoch [41/50] batch [1700/2000] time 2.002 (2.030) data 0.000 (0.001) loss 1.0757 (1.1968) lr 4.1221e-04 eta 10:19:06
epoch [41/50] batch [1720/2000] time 2.057 (2.030) data 0.000 (0.001) loss 1.1109 (1.1926) lr 4.1221e-04 eta 10:18:27
epoch [41/50] batch [1740/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.0365 (1.1946) lr 4.1221e-04 eta 10:17:46
epoch [41/50] batch [1760/2000] time 2.049 (2.030) data 0.000 (0.000) loss 0.2786 (1.1966) lr 4.1221e-04 eta 10:17:05
epoch [41/50] batch [1780/2000] time 2.031 (2.030) data 0.000 (0.000) loss 0.1421 (1.1958) lr 4.1221e-04 eta 10:16:23
epoch [41/50] batch [1800/2000] time 2.055 (2.030) data 0.000 (0.000) loss 0.3761 (1.1966) lr 4.1221e-04 eta 10:15:42
epoch [41/50] batch [1820/2000] time 2.031 (2.030) data 0.000 (0.000) loss 1.6631 (1.1968) lr 4.1221e-04 eta 10:15:01
epoch [41/50] batch [1840/2000] time 2.052 (2.030) data 0.000 (0.000) loss 2.2329 (1.2019) lr 4.1221e-04 eta 10:14:21
epoch [41/50] batch [1860/2000] time 2.055 (2.030) data 0.000 (0.000) loss 0.8396 (1.2015) lr 4.1221e-04 eta 10:13:39
epoch [41/50] batch [1880/2000] time 1.996 (2.030) data 0.000 (0.000) loss 0.3682 (1.2028) lr 4.1221e-04 eta 10:12:59
epoch [41/50] batch [1900/2000] time 2.051 (2.030) data 0.000 (0.000) loss 0.4838 (1.2040) lr 4.1221e-04 eta 10:12:19
epoch [41/50] batch [1920/2000] time 2.055 (2.030) data 0.000 (0.000) loss 3.6931 (1.2041) lr 4.1221e-04 eta 10:11:40
epoch [41/50] batch [1940/2000] time 2.051 (2.030) data 0.000 (0.000) loss 1.1906 (1.2032) lr 4.1221e-04 eta 10:11:00
epoch [41/50] batch [1960/2000] time 2.030 (2.030) data 0.000 (0.000) loss 2.0376 (1.2040) lr 4.1221e-04 eta 10:10:19
epoch [41/50] batch [1980/2000] time 2.055 (2.030) data 0.000 (0.000) loss 0.5638 (1.2021) lr 4.1221e-04 eta 10:09:39
epoch [41/50] batch [2000/2000] time 2.051 (2.030) data 0.000 (0.000) loss 1.5625 (1.2019) lr 3.6258e-04 eta 10:08:58
epoch [42/50] batch [20/2000] time 2.027 (2.068) data 0.000 (0.027) loss 1.1775 (1.1909) lr 3.6258e-04 eta 10:19:38
epoch [42/50] batch [40/2000] time 2.027 (2.047) data 0.000 (0.014) loss 2.0470 (1.0973) lr 3.6258e-04 eta 10:12:50
epoch [42/50] batch [60/2000] time 1.995 (2.041) data 0.001 (0.009) loss 1.0170 (1.1337) lr 3.6258e-04 eta 10:10:18
epoch [42/50] batch [80/2000] time 2.000 (2.038) data 0.000 (0.007) loss 0.0966 (1.1441) lr 3.6258e-04 eta 10:08:38
epoch [42/50] batch [100/2000] time 2.048 (2.036) data 0.000 (0.006) loss 0.9701 (1.1709) lr 3.6258e-04 eta 10:07:23
epoch [42/50] batch [120/2000] time 2.029 (2.035) data 0.000 (0.005) loss 2.1299 (1.2274) lr 3.6258e-04 eta 10:06:21
epoch [42/50] batch [140/2000] time 2.051 (2.034) data 0.000 (0.004) loss 0.5439 (1.2152) lr 3.6258e-04 eta 10:05:28
epoch [42/50] batch [160/2000] time 1.999 (2.033) data 0.000 (0.004) loss 1.6497 (1.1760) lr 3.6258e-04 eta 10:04:29
epoch [42/50] batch [180/2000] time 2.054 (2.032) data 0.000 (0.003) loss 1.0434 (1.1844) lr 3.6258e-04 eta 10:03:34
epoch [42/50] batch [200/2000] time 2.051 (2.032) data 0.000 (0.003) loss 0.2107 (1.1759) lr 3.6258e-04 eta 10:02:44
epoch [42/50] batch [220/2000] time 2.029 (2.031) data 0.000 (0.003) loss 0.6313 (1.1962) lr 3.6258e-04 eta 10:01:45
epoch [42/50] batch [240/2000] time 2.030 (2.030) data 0.000 (0.002) loss 1.7588 (1.1795) lr 3.6258e-04 eta 10:00:58
epoch [42/50] batch [260/2000] time 2.050 (2.030) data 0.000 (0.002) loss 3.0759 (1.2163) lr 3.6258e-04 eta 10:00:14
epoch [42/50] batch [280/2000] time 1.996 (2.030) data 0.000 (0.002) loss 1.0791 (1.2077) lr 3.6258e-04 eta 9:59:30
epoch [42/50] batch [300/2000] time 2.050 (2.030) data 0.000 (0.002) loss 2.2064 (1.2139) lr 3.6258e-04 eta 9:58:52
epoch [42/50] batch [320/2000] time 2.028 (2.030) data 0.000 (0.002) loss 0.9737 (1.2236) lr 3.6258e-04 eta 9:58:18
epoch [42/50] batch [340/2000] time 1.997 (2.030) data 0.000 (0.002) loss 2.6076 (1.2256) lr 3.6258e-04 eta 9:57:34
epoch [42/50] batch [360/2000] time 2.051 (2.030) data 0.000 (0.002) loss 0.1258 (1.2365) lr 3.6258e-04 eta 9:56:53
epoch [42/50] batch [380/2000] time 1.973 (2.030) data 0.000 (0.002) loss 0.8226 (1.2232) lr 3.6258e-04 eta 9:56:13
epoch [42/50] batch [400/2000] time 2.028 (2.030) data 0.000 (0.002) loss 0.2807 (1.2305) lr 3.6258e-04 eta 9:55:30
epoch [42/50] batch [420/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.2976 (1.2255) lr 3.6258e-04 eta 9:54:44
epoch [42/50] batch [440/2000] time 2.001 (2.030) data 0.000 (0.001) loss 2.3426 (1.2406) lr 3.6258e-04 eta 9:54:05
epoch [42/50] batch [460/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.9146 (1.2439) lr 3.6258e-04 eta 9:53:30
epoch [42/50] batch [480/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.5811 (1.2333) lr 3.6258e-04 eta 9:52:47
epoch [42/50] batch [500/2000] time 2.025 (2.030) data 0.000 (0.001) loss 1.2008 (1.2284) lr 3.6258e-04 eta 9:52:04
epoch [42/50] batch [520/2000] time 2.048 (2.030) data 0.000 (0.001) loss 1.6165 (1.2346) lr 3.6258e-04 eta 9:51:26
epoch [42/50] batch [540/2000] time 2.028 (2.030) data 0.000 (0.001) loss 3.1054 (1.2363) lr 3.6258e-04 eta 9:50:46
epoch [42/50] batch [560/2000] time 1.995 (2.030) data 0.000 (0.001) loss 3.8995 (1.2372) lr 3.6258e-04 eta 9:50:06
epoch [42/50] batch [580/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.3321 (1.2283) lr 3.6258e-04 eta 9:49:22
epoch [42/50] batch [600/2000] time 2.050 (2.030) data 0.001 (0.001) loss 1.1148 (1.2319) lr 3.6258e-04 eta 9:48:41
epoch [42/50] batch [620/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.9307 (1.2220) lr 3.6258e-04 eta 9:48:00
epoch [42/50] batch [640/2000] time 1.973 (2.030) data 0.000 (0.001) loss 0.1474 (1.2225) lr 3.6258e-04 eta 9:47:17
epoch [42/50] batch [660/2000] time 2.026 (2.030) data 0.000 (0.001) loss 1.3634 (1.2208) lr 3.6258e-04 eta 9:46:34
epoch [42/50] batch [680/2000] time 1.992 (2.030) data 0.000 (0.001) loss 1.8363 (1.2277) lr 3.6258e-04 eta 9:45:52
epoch [42/50] batch [700/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.8687 (1.2284) lr 3.6258e-04 eta 9:45:09
epoch [42/50] batch [720/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.0683 (1.2236) lr 3.6258e-04 eta 9:44:27
epoch [42/50] batch [740/2000] time 2.031 (2.029) data 0.000 (0.001) loss 1.0446 (1.2246) lr 3.6258e-04 eta 9:43:48
epoch [42/50] batch [760/2000] time 2.031 (2.029) data 0.000 (0.001) loss 2.1278 (1.2225) lr 3.6258e-04 eta 9:43:06
epoch [42/50] batch [780/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.0117 (1.2153) lr 3.6258e-04 eta 9:42:24
epoch [42/50] batch [800/2000] time 2.052 (2.029) data 0.000 (0.001) loss 3.8226 (1.2216) lr 3.6258e-04 eta 9:41:45
epoch [42/50] batch [820/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.9125 (1.2276) lr 3.6258e-04 eta 9:41:02
epoch [42/50] batch [840/2000] time 2.025 (2.029) data 0.000 (0.001) loss 1.7849 (1.2228) lr 3.6258e-04 eta 9:40:20
epoch [42/50] batch [860/2000] time 1.974 (2.029) data 0.000 (0.001) loss 1.5118 (1.2222) lr 3.6258e-04 eta 9:39:40
epoch [42/50] batch [880/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.7718 (1.2237) lr 3.6258e-04 eta 9:38:58
epoch [42/50] batch [900/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.2341 (1.2264) lr 3.6258e-04 eta 9:38:18
epoch [42/50] batch [920/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.3118 (1.2211) lr 3.6258e-04 eta 9:37:40
epoch [42/50] batch [940/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.7081 (1.2247) lr 3.6258e-04 eta 9:37:00
epoch [42/50] batch [960/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.8971 (1.2271) lr 3.6258e-04 eta 9:36:17
epoch [42/50] batch [980/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.3061 (1.2315) lr 3.6258e-04 eta 9:35:35
epoch [42/50] batch [1000/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.5579 (1.2385) lr 3.6258e-04 eta 9:34:53
epoch [42/50] batch [1020/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.1163 (1.2367) lr 3.6258e-04 eta 9:34:12
epoch [42/50] batch [1040/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.6144 (1.2354) lr 3.6258e-04 eta 9:33:33
epoch [42/50] batch [1060/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.3571 (1.2361) lr 3.6258e-04 eta 9:32:53
epoch [42/50] batch [1080/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.0522 (1.2300) lr 3.6258e-04 eta 9:32:13
epoch [42/50] batch [1100/2000] time 2.002 (2.029) data 0.000 (0.001) loss 0.3380 (1.2396) lr 3.6258e-04 eta 9:31:32
epoch [42/50] batch [1120/2000] time 2.053 (2.029) data 0.000 (0.001) loss 2.5863 (1.2427) lr 3.6258e-04 eta 9:30:51
epoch [42/50] batch [1140/2000] time 2.052 (2.029) data 0.001 (0.001) loss 0.4919 (1.2373) lr 3.6258e-04 eta 9:30:11
epoch [42/50] batch [1160/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.1411 (1.2328) lr 3.6258e-04 eta 9:29:31
epoch [42/50] batch [1180/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.0333 (1.2348) lr 3.6258e-04 eta 9:28:52
epoch [42/50] batch [1200/2000] time 2.049 (2.029) data 0.000 (0.001) loss 2.6557 (1.2346) lr 3.6258e-04 eta 9:28:12
epoch [42/50] batch [1220/2000] time 2.028 (2.029) data 0.000 (0.001) loss 3.2365 (1.2331) lr 3.6258e-04 eta 9:27:32
epoch [42/50] batch [1240/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.4490 (1.2338) lr 3.6258e-04 eta 9:26:50
epoch [42/50] batch [1260/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.7621 (1.2317) lr 3.6258e-04 eta 9:26:10
epoch [42/50] batch [1280/2000] time 2.056 (2.029) data 0.000 (0.001) loss 1.2084 (1.2285) lr 3.6258e-04 eta 9:25:29
epoch [42/50] batch [1300/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.5232 (1.2288) lr 3.6258e-04 eta 9:24:51
epoch [42/50] batch [1320/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.1915 (1.2248) lr 3.6258e-04 eta 9:24:10
epoch [42/50] batch [1340/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.0475 (1.2236) lr 3.6258e-04 eta 9:23:26
epoch [42/50] batch [1360/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.1163 (1.2245) lr 3.6258e-04 eta 9:22:46
epoch [42/50] batch [1380/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.7392 (1.2252) lr 3.6258e-04 eta 9:22:06
epoch [42/50] batch [1400/2000] time 2.031 (2.029) data 0.000 (0.001) loss 0.7551 (1.2225) lr 3.6258e-04 eta 9:21:25
epoch [42/50] batch [1420/2000] time 2.053 (2.029) data 0.000 (0.001) loss 2.7321 (1.2279) lr 3.6258e-04 eta 9:20:45
epoch [42/50] batch [1440/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.7706 (1.2322) lr 3.6258e-04 eta 9:20:03
epoch [42/50] batch [1460/2000] time 2.050 (2.029) data 0.000 (0.001) loss 3.5080 (1.2394) lr 3.6258e-04 eta 9:19:21
epoch [42/50] batch [1480/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.7816 (1.2410) lr 3.6258e-04 eta 9:18:40
epoch [42/50] batch [1500/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.8634 (1.2395) lr 3.6258e-04 eta 9:17:58
epoch [42/50] batch [1520/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.0273 (1.2437) lr 3.6258e-04 eta 9:17:17
epoch [42/50] batch [1540/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.3537 (1.2449) lr 3.6258e-04 eta 9:16:38
epoch [42/50] batch [1560/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.7613 (1.2488) lr 3.6258e-04 eta 9:15:58
epoch [42/50] batch [1580/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.6410 (1.2457) lr 3.6258e-04 eta 9:15:17
epoch [42/50] batch [1600/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.6511 (1.2402) lr 3.6258e-04 eta 9:14:37
epoch [42/50] batch [1620/2000] time 1.972 (2.029) data 0.000 (0.001) loss 0.9787 (1.2420) lr 3.6258e-04 eta 9:13:55
epoch [42/50] batch [1640/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.6639 (1.2405) lr 3.6258e-04 eta 9:13:12
epoch [42/50] batch [1660/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.1044 (1.2446) lr 3.6258e-04 eta 9:12:32
epoch [42/50] batch [1680/2000] time 2.001 (2.029) data 0.001 (0.001) loss 1.0345 (1.2414) lr 3.6258e-04 eta 9:11:53
epoch [42/50] batch [1700/2000] time 2.025 (2.029) data 0.000 (0.001) loss 1.0234 (1.2436) lr 3.6258e-04 eta 9:11:12
epoch [42/50] batch [1720/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.5517 (1.2431) lr 3.6258e-04 eta 9:10:31
epoch [42/50] batch [1740/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.6152 (1.2372) lr 3.6258e-04 eta 9:09:50
epoch [42/50] batch [1760/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.0851 (1.2389) lr 3.6258e-04 eta 9:09:09
epoch [42/50] batch [1780/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.8342 (1.2399) lr 3.6258e-04 eta 9:08:28
epoch [42/50] batch [1800/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.4493 (1.2361) lr 3.6258e-04 eta 9:07:47
epoch [42/50] batch [1820/2000] time 1.998 (2.029) data 0.000 (0.000) loss 2.0319 (1.2371) lr 3.6258e-04 eta 9:07:06
epoch [42/50] batch [1840/2000] time 2.050 (2.029) data 0.000 (0.000) loss 0.4290 (1.2384) lr 3.6258e-04 eta 9:06:26
epoch [42/50] batch [1860/2000] time 2.030 (2.029) data 0.000 (0.000) loss 1.2245 (1.2383) lr 3.6258e-04 eta 9:05:46
epoch [42/50] batch [1880/2000] time 2.001 (2.029) data 0.000 (0.000) loss 0.0658 (1.2375) lr 3.6258e-04 eta 9:05:05
epoch [42/50] batch [1900/2000] time 2.051 (2.029) data 0.000 (0.000) loss 1.8441 (1.2349) lr 3.6258e-04 eta 9:04:24
epoch [42/50] batch [1920/2000] time 1.997 (2.029) data 0.000 (0.000) loss 0.2172 (1.2334) lr 3.6258e-04 eta 9:03:43
epoch [42/50] batch [1940/2000] time 2.049 (2.029) data 0.000 (0.000) loss 1.3811 (1.2370) lr 3.6258e-04 eta 9:03:04
epoch [42/50] batch [1960/2000] time 2.050 (2.029) data 0.000 (0.000) loss 1.2145 (1.2342) lr 3.6258e-04 eta 9:02:23
epoch [42/50] batch [1980/2000] time 2.004 (2.029) data 0.000 (0.000) loss 0.9607 (1.2344) lr 3.6258e-04 eta 9:01:42
epoch [42/50] batch [2000/2000] time 2.000 (2.029) data 0.000 (0.000) loss 1.2012 (1.2329) lr 3.1545e-04 eta 9:01:01
epoch [43/50] batch [20/2000] time 2.052 (2.058) data 0.000 (0.026) loss 2.4244 (1.5238) lr 3.1545e-04 eta 9:07:59
epoch [43/50] batch [40/2000] time 1.977 (2.042) data 0.000 (0.013) loss 0.5134 (1.3527) lr 3.1545e-04 eta 9:03:11
epoch [43/50] batch [60/2000] time 2.032 (2.038) data 0.001 (0.009) loss 0.7303 (1.3344) lr 3.1545e-04 eta 9:01:18
epoch [43/50] batch [80/2000] time 2.057 (2.036) data 0.000 (0.007) loss 0.7228 (1.2836) lr 3.1545e-04 eta 9:00:13
epoch [43/50] batch [100/2000] time 2.050 (2.036) data 0.000 (0.005) loss 0.5089 (1.3101) lr 3.1545e-04 eta 8:59:35
epoch [43/50] batch [120/2000] time 1.995 (2.034) data 0.000 (0.005) loss 0.1674 (1.2543) lr 3.1545e-04 eta 8:58:18
epoch [43/50] batch [140/2000] time 2.049 (2.033) data 0.000 (0.004) loss 0.1918 (1.2326) lr 3.1545e-04 eta 8:57:26
epoch [43/50] batch [160/2000] time 2.048 (2.032) data 0.000 (0.003) loss 2.0101 (1.2102) lr 3.1545e-04 eta 8:56:24
epoch [43/50] batch [180/2000] time 2.026 (2.031) data 0.000 (0.003) loss 0.0527 (1.1973) lr 3.1545e-04 eta 8:55:36
epoch [43/50] batch [200/2000] time 2.030 (2.031) data 0.000 (0.003) loss 0.4995 (1.1986) lr 3.1545e-04 eta 8:54:47
epoch [43/50] batch [220/2000] time 2.028 (2.031) data 0.000 (0.003) loss 0.7316 (1.2422) lr 3.1545e-04 eta 8:54:13
epoch [43/50] batch [240/2000] time 2.053 (2.032) data 0.000 (0.002) loss 2.3837 (1.2362) lr 3.1545e-04 eta 8:53:36
epoch [43/50] batch [260/2000] time 2.048 (2.031) data 0.000 (0.002) loss 1.8839 (1.2506) lr 3.1545e-04 eta 8:52:49
epoch [43/50] batch [280/2000] time 2.051 (2.031) data 0.000 (0.002) loss 2.7456 (1.2564) lr 3.1545e-04 eta 8:52:14
epoch [43/50] batch [300/2000] time 2.051 (2.031) data 0.000 (0.002) loss 0.1739 (1.2311) lr 3.1545e-04 eta 8:51:32
epoch [43/50] batch [320/2000] time 2.050 (2.031) data 0.000 (0.002) loss 1.3368 (1.2280) lr 3.1545e-04 eta 8:50:50
epoch [43/50] batch [340/2000] time 1.996 (2.031) data 0.000 (0.002) loss 2.2571 (1.2372) lr 3.1545e-04 eta 8:50:02
epoch [43/50] batch [360/2000] time 2.048 (2.031) data 0.000 (0.002) loss 1.0397 (1.2196) lr 3.1545e-04 eta 8:49:18
epoch [43/50] batch [380/2000] time 1.973 (2.030) data 0.000 (0.002) loss 1.0069 (1.2064) lr 3.1545e-04 eta 8:48:35
epoch [43/50] batch [400/2000] time 2.051 (2.031) data 0.000 (0.002) loss 1.8432 (1.1903) lr 3.1545e-04 eta 8:47:56
epoch [43/50] batch [420/2000] time 1.974 (2.031) data 0.000 (0.001) loss 0.5270 (1.1835) lr 3.1545e-04 eta 8:47:17
epoch [43/50] batch [440/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.9720 (1.1736) lr 3.1545e-04 eta 8:46:31
epoch [43/50] batch [460/2000] time 2.054 (2.030) data 0.000 (0.001) loss 2.1851 (1.1628) lr 3.1545e-04 eta 8:45:51
epoch [43/50] batch [480/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.0593 (1.1743) lr 3.1545e-04 eta 8:45:10
epoch [43/50] batch [500/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.2315 (1.1830) lr 3.1545e-04 eta 8:44:26
epoch [43/50] batch [520/2000] time 2.050 (2.030) data 0.000 (0.001) loss 2.8367 (1.1775) lr 3.1545e-04 eta 8:43:43
epoch [43/50] batch [540/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.1474 (1.1817) lr 3.1545e-04 eta 8:43:00
epoch [43/50] batch [560/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.7164 (1.1885) lr 3.1545e-04 eta 8:42:24
epoch [43/50] batch [580/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.3805 (1.1957) lr 3.1545e-04 eta 8:41:42
epoch [43/50] batch [600/2000] time 2.055 (2.030) data 0.001 (0.001) loss 0.4980 (1.2072) lr 3.1545e-04 eta 8:41:02
epoch [43/50] batch [620/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.7881 (1.2033) lr 3.1545e-04 eta 8:40:26
epoch [43/50] batch [640/2000] time 2.001 (2.030) data 0.000 (0.001) loss 0.6948 (1.1981) lr 3.1545e-04 eta 8:39:43
epoch [43/50] batch [660/2000] time 2.052 (2.030) data 0.000 (0.001) loss 3.9018 (1.2116) lr 3.1545e-04 eta 8:39:03
epoch [43/50] batch [680/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.6832 (1.2161) lr 3.1545e-04 eta 8:38:25
epoch [43/50] batch [700/2000] time 2.059 (2.030) data 0.000 (0.001) loss 0.9494 (1.2125) lr 3.1545e-04 eta 8:37:43
epoch [43/50] batch [720/2000] time 2.030 (2.031) data 0.000 (0.001) loss 0.2901 (1.2144) lr 3.1545e-04 eta 8:37:06
epoch [43/50] batch [740/2000] time 2.000 (2.031) data 0.000 (0.001) loss 0.1490 (1.2211) lr 3.1545e-04 eta 8:36:27
epoch [43/50] batch [760/2000] time 2.000 (2.031) data 0.000 (0.001) loss 2.2323 (1.2293) lr 3.1545e-04 eta 8:35:46
epoch [43/50] batch [780/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.4299 (1.2290) lr 3.1545e-04 eta 8:35:06
epoch [43/50] batch [800/2000] time 2.030 (2.031) data 0.000 (0.001) loss 1.9594 (1.2336) lr 3.1545e-04 eta 8:34:25
epoch [43/50] batch [820/2000] time 2.002 (2.031) data 0.000 (0.001) loss 0.5746 (1.2278) lr 3.1545e-04 eta 8:33:44
epoch [43/50] batch [840/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.9915 (1.2378) lr 3.1545e-04 eta 8:33:02
epoch [43/50] batch [860/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.6751 (1.2336) lr 3.1545e-04 eta 8:32:19
epoch [43/50] batch [880/2000] time 2.001 (2.030) data 0.000 (0.001) loss 0.7328 (1.2386) lr 3.1545e-04 eta 8:31:38
epoch [43/50] batch [900/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.7307 (1.2360) lr 3.1545e-04 eta 8:30:59
epoch [43/50] batch [920/2000] time 2.030 (2.030) data 0.000 (0.001) loss 2.4695 (1.2372) lr 3.1545e-04 eta 8:30:19
epoch [43/50] batch [940/2000] time 1.975 (2.030) data 0.000 (0.001) loss 1.1533 (1.2291) lr 3.1545e-04 eta 8:29:37
epoch [43/50] batch [960/2000] time 2.059 (2.031) data 0.000 (0.001) loss 1.0366 (1.2270) lr 3.1545e-04 eta 8:28:59
epoch [43/50] batch [980/2000] time 2.054 (2.031) data 0.000 (0.001) loss 2.4401 (1.2235) lr 3.1545e-04 eta 8:28:19
epoch [43/50] batch [1000/2000] time 2.000 (2.031) data 0.000 (0.001) loss 3.4314 (1.2259) lr 3.1545e-04 eta 8:27:38
epoch [43/50] batch [1020/2000] time 2.053 (2.031) data 0.000 (0.001) loss 3.2563 (1.2216) lr 3.1545e-04 eta 8:26:57
epoch [43/50] batch [1040/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.8500 (1.2220) lr 3.1545e-04 eta 8:26:16
epoch [43/50] batch [1060/2000] time 2.031 (2.030) data 0.000 (0.001) loss 2.6317 (1.2219) lr 3.1545e-04 eta 8:25:35
epoch [43/50] batch [1080/2000] time 1.976 (2.031) data 0.000 (0.001) loss 2.3588 (1.2200) lr 3.1545e-04 eta 8:24:55
epoch [43/50] batch [1100/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.8231 (1.2215) lr 3.1545e-04 eta 8:24:15
epoch [43/50] batch [1120/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.1779 (1.2164) lr 3.1545e-04 eta 8:23:36
epoch [43/50] batch [1140/2000] time 1.997 (2.031) data 0.001 (0.001) loss 2.3148 (1.2243) lr 3.1545e-04 eta 8:22:55
epoch [43/50] batch [1160/2000] time 1.978 (2.031) data 0.000 (0.001) loss 0.7345 (1.2174) lr 3.1545e-04 eta 8:22:15
epoch [43/50] batch [1180/2000] time 2.000 (2.031) data 0.000 (0.001) loss 0.6012 (1.2139) lr 3.1545e-04 eta 8:21:34
epoch [43/50] batch [1200/2000] time 2.055 (2.031) data 0.000 (0.001) loss 1.3660 (1.2108) lr 3.1545e-04 eta 8:20:54
epoch [43/50] batch [1220/2000] time 2.056 (2.031) data 0.000 (0.001) loss 0.8208 (1.2069) lr 3.1545e-04 eta 8:20:15
epoch [43/50] batch [1240/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.3314 (1.2102) lr 3.1545e-04 eta 8:19:35
epoch [43/50] batch [1260/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.2916 (1.2070) lr 3.1545e-04 eta 8:18:55
epoch [43/50] batch [1280/2000] time 1.977 (2.031) data 0.000 (0.001) loss 0.0903 (1.2085) lr 3.1545e-04 eta 8:18:14
epoch [43/50] batch [1300/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.9623 (1.2108) lr 3.1545e-04 eta 8:17:34
epoch [43/50] batch [1320/2000] time 2.055 (2.031) data 0.000 (0.001) loss 0.1462 (1.2115) lr 3.1545e-04 eta 8:16:53
epoch [43/50] batch [1340/2000] time 2.056 (2.031) data 0.000 (0.001) loss 0.5353 (1.2077) lr 3.1545e-04 eta 8:16:13
epoch [43/50] batch [1360/2000] time 2.033 (2.031) data 0.000 (0.001) loss 0.9842 (1.2015) lr 3.1545e-04 eta 8:15:31
epoch [43/50] batch [1380/2000] time 2.028 (2.031) data 0.000 (0.001) loss 2.9164 (1.2013) lr 3.1545e-04 eta 8:14:50
epoch [43/50] batch [1400/2000] time 2.051 (2.031) data 0.000 (0.001) loss 2.0193 (1.1992) lr 3.1545e-04 eta 8:14:10
epoch [43/50] batch [1420/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.3897 (1.2012) lr 3.1545e-04 eta 8:13:29
epoch [43/50] batch [1440/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.2696 (1.2008) lr 3.1545e-04 eta 8:12:50
epoch [43/50] batch [1460/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.4245 (1.1963) lr 3.1545e-04 eta 8:12:09
epoch [43/50] batch [1480/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.9972 (1.1930) lr 3.1545e-04 eta 8:11:28
epoch [43/50] batch [1500/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.4324 (1.1968) lr 3.1545e-04 eta 8:10:47
epoch [43/50] batch [1520/2000] time 2.031 (2.031) data 0.000 (0.001) loss 2.9745 (1.1946) lr 3.1545e-04 eta 8:10:06
epoch [43/50] batch [1540/2000] time 2.030 (2.031) data 0.000 (0.001) loss 0.7266 (1.1924) lr 3.1545e-04 eta 8:09:25
epoch [43/50] batch [1560/2000] time 2.058 (2.031) data 0.000 (0.001) loss 1.9431 (1.1898) lr 3.1545e-04 eta 8:08:45
epoch [43/50] batch [1580/2000] time 1.995 (2.031) data 0.000 (0.001) loss 2.9732 (1.1894) lr 3.1545e-04 eta 8:08:04
epoch [43/50] batch [1600/2000] time 1.998 (2.031) data 0.000 (0.001) loss 1.0234 (1.1874) lr 3.1545e-04 eta 8:07:21
epoch [43/50] batch [1620/2000] time 2.050 (2.031) data 0.000 (0.001) loss 1.5322 (1.1849) lr 3.1545e-04 eta 8:06:41
epoch [43/50] batch [1640/2000] time 2.052 (2.031) data 0.000 (0.001) loss 2.8286 (1.1869) lr 3.1545e-04 eta 8:06:01
epoch [43/50] batch [1660/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.6290 (1.1845) lr 3.1545e-04 eta 8:05:19
epoch [43/50] batch [1680/2000] time 2.051 (2.031) data 0.001 (0.001) loss 0.0792 (1.1845) lr 3.1545e-04 eta 8:04:38
epoch [43/50] batch [1700/2000] time 1.969 (2.031) data 0.000 (0.001) loss 0.3510 (1.1835) lr 3.1545e-04 eta 8:03:57
epoch [43/50] batch [1720/2000] time 2.045 (2.030) data 0.000 (0.001) loss 1.7989 (1.1813) lr 3.1545e-04 eta 8:03:15
epoch [43/50] batch [1740/2000] time 2.047 (2.030) data 0.000 (0.001) loss 0.1512 (1.1819) lr 3.1545e-04 eta 8:02:33
epoch [43/50] batch [1760/2000] time 1.972 (2.030) data 0.000 (0.001) loss 1.3513 (1.1841) lr 3.1545e-04 eta 8:01:50
epoch [43/50] batch [1780/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.6031 (1.1819) lr 3.1545e-04 eta 8:01:10
epoch [43/50] batch [1800/2000] time 2.002 (2.030) data 0.000 (0.000) loss 1.7750 (1.1786) lr 3.1545e-04 eta 8:00:29
epoch [43/50] batch [1820/2000] time 2.053 (2.030) data 0.000 (0.000) loss 0.1403 (1.1754) lr 3.1545e-04 eta 7:59:48
epoch [43/50] batch [1840/2000] time 2.053 (2.030) data 0.000 (0.000) loss 1.4629 (1.1762) lr 3.1545e-04 eta 7:59:08
epoch [43/50] batch [1860/2000] time 2.029 (2.030) data 0.000 (0.000) loss 2.9789 (1.1770) lr 3.1545e-04 eta 7:58:26
epoch [43/50] batch [1880/2000] time 2.029 (2.030) data 0.000 (0.000) loss 0.6137 (1.1782) lr 3.1545e-04 eta 7:57:46
epoch [43/50] batch [1900/2000] time 1.997 (2.030) data 0.000 (0.000) loss 0.6735 (1.1797) lr 3.1545e-04 eta 7:57:05
epoch [43/50] batch [1920/2000] time 2.051 (2.030) data 0.000 (0.000) loss 0.7812 (1.1790) lr 3.1545e-04 eta 7:56:25
epoch [43/50] batch [1940/2000] time 2.002 (2.030) data 0.000 (0.000) loss 0.4048 (1.1776) lr 3.1545e-04 eta 7:55:45
epoch [43/50] batch [1960/2000] time 2.029 (2.030) data 0.000 (0.000) loss 0.1995 (1.1741) lr 3.1545e-04 eta 7:55:05
epoch [43/50] batch [1980/2000] time 2.004 (2.030) data 0.000 (0.000) loss 1.2137 (1.1779) lr 3.1545e-04 eta 7:54:24
epoch [43/50] batch [2000/2000] time 1.997 (2.030) data 0.000 (0.000) loss 0.6884 (1.1799) lr 2.7103e-04 eta 7:53:44
epoch [44/50] batch [20/2000] time 2.054 (2.055) data 0.000 (0.026) loss 1.7314 (1.0430) lr 2.7103e-04 eta 7:58:50
epoch [44/50] batch [40/2000] time 1.976 (2.041) data 0.000 (0.013) loss 0.7349 (1.1697) lr 2.7103e-04 eta 7:54:54
epoch [44/50] batch [60/2000] time 2.001 (2.037) data 0.001 (0.009) loss 0.3397 (1.1877) lr 2.7103e-04 eta 7:53:21
epoch [44/50] batch [80/2000] time 2.054 (2.034) data 0.000 (0.007) loss 0.6499 (1.2251) lr 2.7103e-04 eta 7:51:55
epoch [44/50] batch [100/2000] time 1.999 (2.034) data 0.000 (0.005) loss 0.9370 (1.1848) lr 2.7103e-04 eta 7:51:16
epoch [44/50] batch [120/2000] time 1.999 (2.034) data 0.000 (0.005) loss 0.7989 (1.1727) lr 2.7103e-04 eta 7:50:31
epoch [44/50] batch [140/2000] time 1.998 (2.033) data 0.000 (0.004) loss 0.7974 (1.1506) lr 2.7103e-04 eta 7:49:34
epoch [44/50] batch [160/2000] time 2.055 (2.033) data 0.000 (0.003) loss 0.9617 (1.1933) lr 2.7103e-04 eta 7:48:58
epoch [44/50] batch [180/2000] time 2.051 (2.033) data 0.000 (0.003) loss 1.3622 (1.2083) lr 2.7103e-04 eta 7:48:14
epoch [44/50] batch [200/2000] time 2.054 (2.032) data 0.000 (0.003) loss 0.9135 (1.1985) lr 2.7103e-04 eta 7:47:21
epoch [44/50] batch [220/2000] time 2.029 (2.032) data 0.000 (0.003) loss 2.0095 (1.2192) lr 2.7103e-04 eta 7:46:38
epoch [44/50] batch [240/2000] time 2.054 (2.032) data 0.000 (0.002) loss 0.1321 (1.1895) lr 2.7103e-04 eta 7:45:59
epoch [44/50] batch [260/2000] time 2.058 (2.033) data 0.000 (0.002) loss 1.5379 (1.1818) lr 2.7103e-04 eta 7:45:28
epoch [44/50] batch [280/2000] time 2.028 (2.032) data 0.000 (0.002) loss 0.5900 (1.1736) lr 2.7103e-04 eta 7:44:42
epoch [44/50] batch [300/2000] time 2.051 (2.032) data 0.000 (0.002) loss 0.7120 (1.1552) lr 2.7103e-04 eta 7:44:02
epoch [44/50] batch [320/2000] time 2.054 (2.032) data 0.000 (0.002) loss 0.6759 (1.1640) lr 2.7103e-04 eta 7:43:21
epoch [44/50] batch [340/2000] time 2.052 (2.032) data 0.000 (0.002) loss 1.6987 (1.1680) lr 2.7103e-04 eta 7:42:37
epoch [44/50] batch [360/2000] time 2.050 (2.032) data 0.000 (0.002) loss 0.5007 (1.1416) lr 2.7103e-04 eta 7:42:00
epoch [44/50] batch [380/2000] time 2.050 (2.032) data 0.000 (0.002) loss 3.3903 (1.1408) lr 2.7103e-04 eta 7:41:19
epoch [44/50] batch [400/2000] time 2.051 (2.032) data 0.000 (0.002) loss 1.3456 (1.1403) lr 2.7103e-04 eta 7:40:38
epoch [44/50] batch [420/2000] time 2.048 (2.032) data 0.000 (0.001) loss 0.5405 (1.1332) lr 2.7103e-04 eta 7:39:48
epoch [44/50] batch [440/2000] time 1.997 (2.031) data 0.000 (0.001) loss 1.4411 (1.1223) lr 2.7103e-04 eta 7:39:05
epoch [44/50] batch [460/2000] time 2.025 (2.031) data 0.000 (0.001) loss 0.6833 (1.1188) lr 2.7103e-04 eta 7:38:24
epoch [44/50] batch [480/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.7318 (1.1265) lr 2.7103e-04 eta 7:37:42
epoch [44/50] batch [500/2000] time 2.054 (2.031) data 0.000 (0.001) loss 0.0567 (1.1286) lr 2.7103e-04 eta 7:37:05
epoch [44/50] batch [520/2000] time 2.033 (2.032) data 0.000 (0.001) loss 0.9233 (1.1320) lr 2.7103e-04 eta 7:36:24
epoch [44/50] batch [540/2000] time 2.010 (2.031) data 0.000 (0.001) loss 0.7422 (1.1183) lr 2.7103e-04 eta 7:35:42
epoch [44/50] batch [560/2000] time 2.054 (2.032) data 0.000 (0.001) loss 1.1518 (1.1198) lr 2.7103e-04 eta 7:35:07
epoch [44/50] batch [580/2000] time 2.053 (2.032) data 0.000 (0.001) loss 1.9942 (1.1211) lr 2.7103e-04 eta 7:34:28
epoch [44/50] batch [600/2000] time 2.049 (2.032) data 0.001 (0.001) loss 0.5853 (1.1295) lr 2.7103e-04 eta 7:33:44
epoch [44/50] batch [620/2000] time 2.053 (2.032) data 0.000 (0.001) loss 1.9210 (1.1331) lr 2.7103e-04 eta 7:33:01
epoch [44/50] batch [640/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.4813 (1.1340) lr 2.7103e-04 eta 7:32:18
epoch [44/50] batch [660/2000] time 1.999 (2.031) data 0.000 (0.001) loss 1.7339 (1.1424) lr 2.7103e-04 eta 7:31:36
epoch [44/50] batch [680/2000] time 1.998 (2.031) data 0.000 (0.001) loss 3.1234 (1.1445) lr 2.7103e-04 eta 7:30:58
epoch [44/50] batch [700/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.7105 (1.1454) lr 2.7103e-04 eta 7:30:17
epoch [44/50] batch [720/2000] time 1.996 (2.031) data 0.000 (0.001) loss 2.2107 (1.1511) lr 2.7103e-04 eta 7:29:35
epoch [44/50] batch [740/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.7353 (1.1480) lr 2.7103e-04 eta 7:28:52
epoch [44/50] batch [760/2000] time 2.051 (2.031) data 0.000 (0.001) loss 2.6257 (1.1519) lr 2.7103e-04 eta 7:28:13
epoch [44/50] batch [780/2000] time 2.052 (2.031) data 0.000 (0.001) loss 2.2973 (1.1479) lr 2.7103e-04 eta 7:27:32
epoch [44/50] batch [800/2000] time 2.054 (2.031) data 0.000 (0.001) loss 1.0478 (1.1398) lr 2.7103e-04 eta 7:26:52
epoch [44/50] batch [820/2000] time 2.007 (2.031) data 0.000 (0.001) loss 2.0066 (1.1349) lr 2.7103e-04 eta 7:26:12
epoch [44/50] batch [840/2000] time 1.974 (2.031) data 0.000 (0.001) loss 2.3078 (1.1463) lr 2.7103e-04 eta 7:25:32
epoch [44/50] batch [860/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.8954 (1.1471) lr 2.7103e-04 eta 7:24:49
epoch [44/50] batch [880/2000] time 1.998 (2.031) data 0.000 (0.001) loss 1.2175 (1.1513) lr 2.7103e-04 eta 7:24:07
epoch [44/50] batch [900/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.3713 (1.1580) lr 2.7103e-04 eta 7:23:23
epoch [44/50] batch [920/2000] time 1.975 (2.031) data 0.000 (0.001) loss 0.9015 (1.1596) lr 2.7103e-04 eta 7:22:44
epoch [44/50] batch [940/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.5293 (1.1568) lr 2.7103e-04 eta 7:22:03
epoch [44/50] batch [960/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.2689 (1.1561) lr 2.7103e-04 eta 7:21:19
epoch [44/50] batch [980/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.9007 (1.1569) lr 2.7103e-04 eta 7:20:38
epoch [44/50] batch [1000/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.0589 (1.1554) lr 2.7103e-04 eta 7:19:56
epoch [44/50] batch [1020/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.1687 (1.1570) lr 2.7103e-04 eta 7:19:15
epoch [44/50] batch [1040/2000] time 2.000 (2.030) data 0.000 (0.001) loss 1.7235 (1.1526) lr 2.7103e-04 eta 7:18:34
epoch [44/50] batch [1060/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.2902 (1.1493) lr 2.7103e-04 eta 7:17:55
epoch [44/50] batch [1080/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.7400 (1.1510) lr 2.7103e-04 eta 7:17:13
epoch [44/50] batch [1100/2000] time 2.000 (2.030) data 0.000 (0.001) loss 2.0337 (1.1543) lr 2.7103e-04 eta 7:16:31
epoch [44/50] batch [1120/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.5605 (1.1535) lr 2.7103e-04 eta 7:15:52
epoch [44/50] batch [1140/2000] time 2.055 (2.030) data 0.001 (0.001) loss 2.6587 (1.1623) lr 2.7103e-04 eta 7:15:11
epoch [44/50] batch [1160/2000] time 2.030 (2.031) data 0.000 (0.001) loss 0.8422 (1.1679) lr 2.7103e-04 eta 7:14:31
epoch [44/50] batch [1180/2000] time 2.026 (2.030) data 0.000 (0.001) loss 0.8561 (1.1666) lr 2.7103e-04 eta 7:13:50
epoch [44/50] batch [1200/2000] time 1.998 (2.030) data 0.000 (0.001) loss 2.0464 (1.1655) lr 2.7103e-04 eta 7:13:08
epoch [44/50] batch [1220/2000] time 2.056 (2.030) data 0.000 (0.001) loss 0.6097 (1.1665) lr 2.7103e-04 eta 7:12:25
epoch [44/50] batch [1240/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.2436 (1.1742) lr 2.7103e-04 eta 7:11:45
epoch [44/50] batch [1260/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.1487 (1.1704) lr 2.7103e-04 eta 7:11:05
epoch [44/50] batch [1280/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.8637 (1.1667) lr 2.7103e-04 eta 7:10:25
epoch [44/50] batch [1300/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.3920 (1.1653) lr 2.7103e-04 eta 7:09:44
epoch [44/50] batch [1320/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.9405 (1.1699) lr 2.7103e-04 eta 7:09:04
epoch [44/50] batch [1340/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.0425 (1.1747) lr 2.7103e-04 eta 7:08:22
epoch [44/50] batch [1360/2000] time 2.032 (2.030) data 0.000 (0.001) loss 0.7149 (1.1726) lr 2.7103e-04 eta 7:07:42
epoch [44/50] batch [1380/2000] time 1.975 (2.030) data 0.000 (0.001) loss 0.8385 (1.1738) lr 2.7103e-04 eta 7:07:01
epoch [44/50] batch [1400/2000] time 2.026 (2.030) data 0.000 (0.001) loss 0.2185 (1.1764) lr 2.7103e-04 eta 7:06:21
epoch [44/50] batch [1420/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.2320 (1.1789) lr 2.7103e-04 eta 7:05:41
epoch [44/50] batch [1440/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.5818 (1.1792) lr 2.7103e-04 eta 7:04:59
epoch [44/50] batch [1460/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.1910 (1.1767) lr 2.7103e-04 eta 7:04:18
epoch [44/50] batch [1480/2000] time 2.028 (2.030) data 0.000 (0.001) loss 1.6090 (1.1784) lr 2.7103e-04 eta 7:03:37
epoch [44/50] batch [1500/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.7105 (1.1778) lr 2.7103e-04 eta 7:02:56
epoch [44/50] batch [1520/2000] time 1.974 (2.030) data 0.000 (0.001) loss 0.6904 (1.1779) lr 2.7103e-04 eta 7:02:15
epoch [44/50] batch [1540/2000] time 2.048 (2.030) data 0.000 (0.001) loss 0.7680 (1.1788) lr 2.7103e-04 eta 7:01:33
epoch [44/50] batch [1560/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.2799 (1.1761) lr 2.7103e-04 eta 7:00:52
epoch [44/50] batch [1580/2000] time 1.974 (2.030) data 0.000 (0.001) loss 0.1938 (1.1744) lr 2.7103e-04 eta 7:00:11
epoch [44/50] batch [1600/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.0364 (1.1738) lr 2.7103e-04 eta 6:59:30
epoch [44/50] batch [1620/2000] time 2.057 (2.030) data 0.000 (0.001) loss 0.0894 (1.1728) lr 2.7103e-04 eta 6:58:51
epoch [44/50] batch [1640/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.0860 (1.1715) lr 2.7103e-04 eta 6:58:09
epoch [44/50] batch [1660/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.7977 (1.1726) lr 2.7103e-04 eta 6:57:29
epoch [44/50] batch [1680/2000] time 2.030 (2.030) data 0.001 (0.001) loss 1.1440 (1.1728) lr 2.7103e-04 eta 6:56:49
epoch [44/50] batch [1700/2000] time 2.031 (2.030) data 0.000 (0.001) loss 1.0812 (1.1749) lr 2.7103e-04 eta 6:56:08
epoch [44/50] batch [1720/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.2465 (1.1726) lr 2.7103e-04 eta 6:55:29
epoch [44/50] batch [1740/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.7435 (1.1747) lr 2.7103e-04 eta 6:54:49
epoch [44/50] batch [1760/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.1101 (1.1733) lr 2.7103e-04 eta 6:54:09
epoch [44/50] batch [1780/2000] time 2.032 (2.030) data 0.000 (0.001) loss 0.6216 (1.1751) lr 2.7103e-04 eta 6:53:27
epoch [44/50] batch [1800/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.6485 (1.1785) lr 2.7103e-04 eta 6:52:47
epoch [44/50] batch [1820/2000] time 2.051 (2.030) data 0.000 (0.000) loss 2.2828 (1.1807) lr 2.7103e-04 eta 6:52:07
epoch [44/50] batch [1840/2000] time 1.998 (2.030) data 0.000 (0.000) loss 0.9376 (1.1809) lr 2.7103e-04 eta 6:51:26
epoch [44/50] batch [1860/2000] time 2.052 (2.030) data 0.000 (0.000) loss 0.2235 (1.1815) lr 2.7103e-04 eta 6:50:44
epoch [44/50] batch [1880/2000] time 1.973 (2.030) data 0.000 (0.000) loss 0.8991 (1.1788) lr 2.7103e-04 eta 6:50:05
epoch [44/50] batch [1900/2000] time 2.029 (2.030) data 0.000 (0.000) loss 0.9784 (1.1773) lr 2.7103e-04 eta 6:49:24
epoch [44/50] batch [1920/2000] time 2.051 (2.030) data 0.000 (0.000) loss 3.2096 (1.1775) lr 2.7103e-04 eta 6:48:43
epoch [44/50] batch [1940/2000] time 2.053 (2.030) data 0.000 (0.000) loss 0.9332 (1.1797) lr 2.7103e-04 eta 6:48:02
epoch [44/50] batch [1960/2000] time 2.000 (2.030) data 0.000 (0.000) loss 2.0127 (1.1789) lr 2.7103e-04 eta 6:47:21
epoch [44/50] batch [1980/2000] time 2.052 (2.030) data 0.000 (0.000) loss 0.6895 (1.1786) lr 2.7103e-04 eta 6:46:40
epoch [44/50] batch [2000/2000] time 2.049 (2.030) data 0.000 (0.000) loss 2.5998 (1.1790) lr 2.2949e-04 eta 6:45:59
epoch [45/50] batch [20/2000] time 2.053 (2.063) data 0.000 (0.027) loss 0.5217 (1.0535) lr 2.2949e-04 eta 6:51:52
epoch [45/50] batch [40/2000] time 1.999 (2.044) data 0.000 (0.014) loss 0.9184 (1.0855) lr 2.2949e-04 eta 6:47:28
epoch [45/50] batch [60/2000] time 2.000 (2.038) data 0.001 (0.009) loss 1.2504 (1.0684) lr 2.2949e-04 eta 6:45:38
epoch [45/50] batch [80/2000] time 2.054 (2.037) data 0.000 (0.007) loss 1.0822 (1.1086) lr 2.2949e-04 eta 6:44:35
epoch [45/50] batch [100/2000] time 2.052 (2.035) data 0.000 (0.006) loss 0.2592 (1.1597) lr 2.2949e-04 eta 6:43:31
epoch [45/50] batch [120/2000] time 1.975 (2.034) data 0.000 (0.005) loss 0.8283 (1.1655) lr 2.2949e-04 eta 6:42:46
epoch [45/50] batch [140/2000] time 1.998 (2.034) data 0.000 (0.004) loss 1.0161 (1.2238) lr 2.2949e-04 eta 6:42:06
epoch [45/50] batch [160/2000] time 2.029 (2.034) data 0.000 (0.004) loss 0.1585 (1.2408) lr 2.2949e-04 eta 6:41:20
epoch [45/50] batch [180/2000] time 2.029 (2.033) data 0.000 (0.003) loss 0.8648 (1.2443) lr 2.2949e-04 eta 6:40:25
epoch [45/50] batch [200/2000] time 2.028 (2.032) data 0.000 (0.003) loss 1.4099 (1.2457) lr 2.2949e-04 eta 6:39:38
epoch [45/50] batch [220/2000] time 1.997 (2.031) data 0.000 (0.003) loss 1.1743 (1.2677) lr 2.2949e-04 eta 6:38:51
epoch [45/50] batch [240/2000] time 2.025 (2.031) data 0.000 (0.002) loss 1.6780 (1.2547) lr 2.2949e-04 eta 6:38:08
epoch [45/50] batch [260/2000] time 2.052 (2.032) data 0.000 (0.002) loss 0.4798 (1.2423) lr 2.2949e-04 eta 6:37:30
epoch [45/50] batch [280/2000] time 1.997 (2.032) data 0.000 (0.002) loss 0.3409 (1.2305) lr 2.2949e-04 eta 6:36:51
epoch [45/50] batch [300/2000] time 2.053 (2.032) data 0.000 (0.002) loss 2.8389 (1.2352) lr 2.2949e-04 eta 6:36:10
epoch [45/50] batch [320/2000] time 2.053 (2.031) data 0.000 (0.002) loss 1.7555 (1.2215) lr 2.2949e-04 eta 6:35:27
epoch [45/50] batch [340/2000] time 2.000 (2.031) data 0.000 (0.002) loss 1.6681 (1.2177) lr 2.2949e-04 eta 6:34:42
epoch [45/50] batch [360/2000] time 2.030 (2.031) data 0.000 (0.002) loss 3.4927 (1.2260) lr 2.2949e-04 eta 6:34:02
epoch [45/50] batch [380/2000] time 2.052 (2.031) data 0.000 (0.002) loss 0.3754 (1.2242) lr 2.2949e-04 eta 6:33:21
epoch [45/50] batch [400/2000] time 2.029 (2.031) data 0.000 (0.002) loss 2.7712 (1.2221) lr 2.2949e-04 eta 6:32:37
epoch [45/50] batch [420/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.5274 (1.2126) lr 2.2949e-04 eta 6:31:52
epoch [45/50] batch [440/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.0133 (1.1983) lr 2.2949e-04 eta 6:31:11
epoch [45/50] batch [460/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.3720 (1.1975) lr 2.2949e-04 eta 6:30:31
epoch [45/50] batch [480/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.0205 (1.1885) lr 2.2949e-04 eta 6:29:48
epoch [45/50] batch [500/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.7235 (1.1918) lr 2.2949e-04 eta 6:29:08
epoch [45/50] batch [520/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.4565 (1.1860) lr 2.2949e-04 eta 6:28:26
epoch [45/50] batch [540/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.9388 (1.1931) lr 2.2949e-04 eta 6:27:46
epoch [45/50] batch [560/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.2060 (1.1960) lr 2.2949e-04 eta 6:27:02
epoch [45/50] batch [580/2000] time 1.973 (2.030) data 0.000 (0.001) loss 0.9477 (1.2044) lr 2.2949e-04 eta 6:26:17
epoch [45/50] batch [600/2000] time 2.051 (2.029) data 0.001 (0.001) loss 1.6448 (1.2143) lr 2.2949e-04 eta 6:25:35
epoch [45/50] batch [620/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.6012 (1.2095) lr 2.2949e-04 eta 6:24:51
epoch [45/50] batch [640/2000] time 1.997 (2.029) data 0.000 (0.001) loss 2.1431 (1.2140) lr 2.2949e-04 eta 6:24:10
epoch [45/50] batch [660/2000] time 1.977 (2.029) data 0.000 (0.001) loss 1.8827 (1.2030) lr 2.2949e-04 eta 6:23:28
epoch [45/50] batch [680/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.0694 (1.2038) lr 2.2949e-04 eta 6:22:51
epoch [45/50] batch [700/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.7523 (1.2100) lr 2.2949e-04 eta 6:22:10
epoch [45/50] batch [720/2000] time 2.001 (2.029) data 0.000 (0.001) loss 1.6219 (1.2065) lr 2.2949e-04 eta 6:21:28
epoch [45/50] batch [740/2000] time 2.031 (2.029) data 0.000 (0.001) loss 1.8226 (1.2108) lr 2.2949e-04 eta 6:20:46
epoch [45/50] batch [760/2000] time 2.001 (2.029) data 0.000 (0.001) loss 1.6295 (1.2170) lr 2.2949e-04 eta 6:20:05
epoch [45/50] batch [780/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.1520 (1.2255) lr 2.2949e-04 eta 6:19:25
epoch [45/50] batch [800/2000] time 2.053 (2.029) data 0.000 (0.001) loss 2.4436 (1.2165) lr 2.2949e-04 eta 6:18:44
epoch [45/50] batch [820/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.7041 (1.2125) lr 2.2949e-04 eta 6:18:03
epoch [45/50] batch [840/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.5101 (1.2174) lr 2.2949e-04 eta 6:17:23
epoch [45/50] batch [860/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.2411 (1.2139) lr 2.2949e-04 eta 6:16:42
epoch [45/50] batch [880/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.8015 (1.2130) lr 2.2949e-04 eta 6:16:03
epoch [45/50] batch [900/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.6102 (1.2082) lr 2.2949e-04 eta 6:15:21
epoch [45/50] batch [920/2000] time 2.000 (2.029) data 0.000 (0.001) loss 1.2425 (1.2056) lr 2.2949e-04 eta 6:14:40
epoch [45/50] batch [940/2000] time 2.053 (2.029) data 0.000 (0.001) loss 2.2616 (1.2046) lr 2.2949e-04 eta 6:14:00
epoch [45/50] batch [960/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.5087 (1.1987) lr 2.2949e-04 eta 6:13:21
epoch [45/50] batch [980/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.6298 (1.1982) lr 2.2949e-04 eta 6:12:39
epoch [45/50] batch [1000/2000] time 2.051 (2.029) data 0.000 (0.001) loss 2.2097 (1.2057) lr 2.2949e-04 eta 6:11:59
epoch [45/50] batch [1020/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.4110 (1.2113) lr 2.2949e-04 eta 6:11:18
epoch [45/50] batch [1040/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.7633 (1.2053) lr 2.2949e-04 eta 6:10:38
epoch [45/50] batch [1060/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.8806 (1.2059) lr 2.2949e-04 eta 6:09:56
epoch [45/50] batch [1080/2000] time 2.032 (2.029) data 0.000 (0.001) loss 1.8720 (1.2067) lr 2.2949e-04 eta 6:09:16
epoch [45/50] batch [1100/2000] time 2.031 (2.029) data 0.000 (0.001) loss 0.7029 (1.2110) lr 2.2949e-04 eta 6:08:36
epoch [45/50] batch [1120/2000] time 2.031 (2.029) data 0.000 (0.001) loss 0.2764 (1.2064) lr 2.2949e-04 eta 6:07:54
epoch [45/50] batch [1140/2000] time 1.978 (2.029) data 0.001 (0.001) loss 0.2763 (1.2079) lr 2.2949e-04 eta 6:07:14
epoch [45/50] batch [1160/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.0410 (1.2065) lr 2.2949e-04 eta 6:06:33
epoch [45/50] batch [1180/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.9927 (1.2077) lr 2.2949e-04 eta 6:05:53
epoch [45/50] batch [1200/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.9196 (1.2080) lr 2.2949e-04 eta 6:05:13
epoch [45/50] batch [1220/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.5508 (1.2038) lr 2.2949e-04 eta 6:04:32
epoch [45/50] batch [1240/2000] time 2.056 (2.029) data 0.000 (0.001) loss 0.7421 (1.2035) lr 2.2949e-04 eta 6:03:53
epoch [45/50] batch [1260/2000] time 2.033 (2.029) data 0.000 (0.001) loss 1.0593 (1.2063) lr 2.2949e-04 eta 6:03:12
epoch [45/50] batch [1280/2000] time 2.025 (2.029) data 0.000 (0.001) loss 1.6563 (1.2073) lr 2.2949e-04 eta 6:02:32
epoch [45/50] batch [1300/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.3762 (1.2132) lr 2.2949e-04 eta 6:01:50
epoch [45/50] batch [1320/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.4978 (1.2136) lr 2.2949e-04 eta 6:01:09
epoch [45/50] batch [1340/2000] time 2.055 (2.029) data 0.000 (0.001) loss 0.0708 (1.2098) lr 2.2949e-04 eta 6:00:29
epoch [45/50] batch [1360/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.0846 (1.2115) lr 2.2949e-04 eta 5:59:49
epoch [45/50] batch [1380/2000] time 1.973 (2.029) data 0.000 (0.001) loss 4.7199 (1.2098) lr 2.2949e-04 eta 5:59:07
epoch [45/50] batch [1400/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.4657 (1.2098) lr 2.2949e-04 eta 5:58:26
epoch [45/50] batch [1420/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.5110 (1.2071) lr 2.2949e-04 eta 5:57:45
epoch [45/50] batch [1440/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.9284 (1.2107) lr 2.2949e-04 eta 5:57:04
epoch [45/50] batch [1460/2000] time 2.056 (2.029) data 0.000 (0.001) loss 1.0689 (1.2081) lr 2.2949e-04 eta 5:56:23
epoch [45/50] batch [1480/2000] time 2.003 (2.029) data 0.000 (0.001) loss 0.2727 (1.2064) lr 2.2949e-04 eta 5:55:42
epoch [45/50] batch [1500/2000] time 1.977 (2.029) data 0.000 (0.001) loss 2.9093 (1.2044) lr 2.2949e-04 eta 5:55:01
epoch [45/50] batch [1520/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.1858 (1.2045) lr 2.2949e-04 eta 5:54:21
epoch [45/50] batch [1540/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.2995 (1.2096) lr 2.2949e-04 eta 5:53:40
epoch [45/50] batch [1560/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.2786 (1.2097) lr 2.2949e-04 eta 5:53:00
epoch [45/50] batch [1580/2000] time 2.001 (2.029) data 0.000 (0.001) loss 1.1083 (1.2098) lr 2.2949e-04 eta 5:52:19
epoch [45/50] batch [1600/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.4049 (1.2143) lr 2.2949e-04 eta 5:51:39
epoch [45/50] batch [1620/2000] time 2.031 (2.029) data 0.000 (0.001) loss 0.7311 (1.2092) lr 2.2949e-04 eta 5:50:59
epoch [45/50] batch [1640/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.9622 (1.2061) lr 2.2949e-04 eta 5:50:18
epoch [45/50] batch [1660/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.8495 (1.2053) lr 2.2949e-04 eta 5:49:38
epoch [45/50] batch [1680/2000] time 2.054 (2.029) data 0.001 (0.001) loss 0.5859 (1.2059) lr 2.2949e-04 eta 5:48:57
epoch [45/50] batch [1700/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.2537 (1.2061) lr 2.2949e-04 eta 5:48:15
epoch [45/50] batch [1720/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.2695 (1.2089) lr 2.2949e-04 eta 5:47:35
epoch [45/50] batch [1740/2000] time 2.030 (2.029) data 0.000 (0.001) loss 1.0158 (1.2098) lr 2.2949e-04 eta 5:46:54
epoch [45/50] batch [1760/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.6398 (1.2102) lr 2.2949e-04 eta 5:46:13
epoch [45/50] batch [1780/2000] time 2.000 (2.029) data 0.000 (0.001) loss 2.8526 (1.2082) lr 2.2949e-04 eta 5:45:31
epoch [45/50] batch [1800/2000] time 2.002 (2.028) data 0.000 (0.000) loss 0.4980 (1.2089) lr 2.2949e-04 eta 5:44:50
epoch [45/50] batch [1820/2000] time 2.050 (2.028) data 0.000 (0.000) loss 0.0378 (1.2053) lr 2.2949e-04 eta 5:44:09
epoch [45/50] batch [1840/2000] time 2.052 (2.028) data 0.000 (0.000) loss 0.7828 (1.2042) lr 2.2949e-04 eta 5:43:28
epoch [45/50] batch [1860/2000] time 2.051 (2.028) data 0.000 (0.000) loss 0.3832 (1.2023) lr 2.2949e-04 eta 5:42:48
epoch [45/50] batch [1880/2000] time 2.028 (2.029) data 0.000 (0.000) loss 0.5332 (1.1977) lr 2.2949e-04 eta 5:42:09
epoch [45/50] batch [1900/2000] time 2.003 (2.029) data 0.000 (0.000) loss 0.5041 (1.1916) lr 2.2949e-04 eta 5:41:28
epoch [45/50] batch [1920/2000] time 2.057 (2.029) data 0.000 (0.000) loss 0.0262 (1.1934) lr 2.2949e-04 eta 5:40:49
epoch [45/50] batch [1940/2000] time 2.007 (2.029) data 0.000 (0.000) loss 1.0629 (1.1920) lr 2.2949e-04 eta 5:40:09
epoch [45/50] batch [1960/2000] time 2.061 (2.029) data 0.000 (0.000) loss 1.3885 (1.1965) lr 2.2949e-04 eta 5:39:29
epoch [45/50] batch [1980/2000] time 2.004 (2.029) data 0.000 (0.000) loss 0.8132 (1.1948) lr 2.2949e-04 eta 5:38:50
epoch [45/50] batch [2000/2000] time 2.058 (2.029) data 0.000 (0.000) loss 1.9362 (1.1955) lr 1.9098e-04 eta 5:38:09
epoch [46/50] batch [20/2000] time 1.998 (2.056) data 0.000 (0.027) loss 0.5077 (1.0196) lr 1.9098e-04 eta 5:41:54
epoch [46/50] batch [40/2000] time 1.998 (2.044) data 0.000 (0.014) loss 2.5520 (1.0621) lr 1.9098e-04 eta 5:39:22
epoch [46/50] batch [60/2000] time 2.001 (2.041) data 0.000 (0.009) loss 1.9153 (0.9918) lr 1.9098e-04 eta 5:38:03
epoch [46/50] batch [80/2000] time 2.050 (2.040) data 0.000 (0.007) loss 0.8729 (1.0315) lr 1.9098e-04 eta 5:37:13
epoch [46/50] batch [100/2000] time 2.030 (2.037) data 0.000 (0.006) loss 1.5715 (1.0480) lr 1.9098e-04 eta 5:36:08
epoch [46/50] batch [120/2000] time 2.052 (2.037) data 0.000 (0.005) loss 2.0935 (1.0643) lr 1.9098e-04 eta 5:35:25
epoch [46/50] batch [140/2000] time 2.054 (2.036) data 0.000 (0.004) loss 0.8739 (1.0665) lr 1.9098e-04 eta 5:34:37
epoch [46/50] batch [160/2000] time 1.972 (2.035) data 0.000 (0.004) loss 0.8333 (1.0540) lr 1.9098e-04 eta 5:33:46
epoch [46/50] batch [180/2000] time 2.031 (2.035) data 0.000 (0.003) loss 1.1930 (1.0685) lr 1.9098e-04 eta 5:33:02
epoch [46/50] batch [200/2000] time 2.031 (2.034) data 0.000 (0.003) loss 0.0204 (1.0796) lr 1.9098e-04 eta 5:32:10
epoch [46/50] batch [220/2000] time 2.001 (2.033) data 0.000 (0.003) loss 2.5170 (1.0921) lr 1.9098e-04 eta 5:31:19
epoch [46/50] batch [240/2000] time 2.055 (2.033) data 0.000 (0.002) loss 0.0773 (1.1095) lr 1.9098e-04 eta 5:30:43
epoch [46/50] batch [260/2000] time 1.997 (2.033) data 0.000 (0.002) loss 0.4924 (1.1463) lr 1.9098e-04 eta 5:30:00
epoch [46/50] batch [280/2000] time 2.051 (2.032) data 0.000 (0.002) loss 0.7120 (1.1470) lr 1.9098e-04 eta 5:29:12
epoch [46/50] batch [300/2000] time 1.997 (2.032) data 0.000 (0.002) loss 1.0253 (1.1567) lr 1.9098e-04 eta 5:28:28
epoch [46/50] batch [320/2000] time 2.052 (2.032) data 0.000 (0.002) loss 3.3644 (1.1762) lr 1.9098e-04 eta 5:27:48
epoch [46/50] batch [340/2000] time 2.030 (2.031) data 0.000 (0.002) loss 3.8044 (1.1827) lr 1.9098e-04 eta 5:27:03
epoch [46/50] batch [360/2000] time 1.973 (2.031) data 0.000 (0.002) loss 1.8081 (1.2028) lr 1.9098e-04 eta 5:26:23
epoch [46/50] batch [380/2000] time 2.052 (2.031) data 0.000 (0.002) loss 2.7382 (1.2009) lr 1.9098e-04 eta 5:25:40
epoch [46/50] batch [400/2000] time 2.051 (2.031) data 0.000 (0.002) loss 2.5368 (1.2132) lr 1.9098e-04 eta 5:24:58
epoch [46/50] batch [420/2000] time 2.050 (2.031) data 0.000 (0.001) loss 2.0876 (1.2010) lr 1.9098e-04 eta 5:24:17
epoch [46/50] batch [440/2000] time 1.974 (2.031) data 0.000 (0.001) loss 1.2793 (1.2167) lr 1.9098e-04 eta 5:23:39
epoch [46/50] batch [460/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.8562 (1.2121) lr 1.9098e-04 eta 5:22:56
epoch [46/50] batch [480/2000] time 2.030 (2.031) data 0.000 (0.001) loss 4.5609 (1.2272) lr 1.9098e-04 eta 5:22:16
epoch [46/50] batch [500/2000] time 2.031 (2.031) data 0.000 (0.001) loss 2.1973 (1.2251) lr 1.9098e-04 eta 5:21:36
epoch [46/50] batch [520/2000] time 2.057 (2.032) data 0.000 (0.001) loss 1.7030 (1.2202) lr 1.9098e-04 eta 5:20:59
epoch [46/50] batch [540/2000] time 2.059 (2.032) data 0.000 (0.001) loss 3.7482 (1.2150) lr 1.9098e-04 eta 5:20:18
epoch [46/50] batch [560/2000] time 2.055 (2.032) data 0.000 (0.001) loss 0.8577 (1.2047) lr 1.9098e-04 eta 5:19:38
epoch [46/50] batch [580/2000] time 2.052 (2.032) data 0.000 (0.001) loss 1.4359 (1.2129) lr 1.9098e-04 eta 5:18:57
epoch [46/50] batch [600/2000] time 1.975 (2.031) data 0.001 (0.001) loss 0.7075 (1.2115) lr 1.9098e-04 eta 5:18:15
epoch [46/50] batch [620/2000] time 2.001 (2.031) data 0.000 (0.001) loss 2.5850 (1.2124) lr 1.9098e-04 eta 5:17:34
epoch [46/50] batch [640/2000] time 1.996 (2.031) data 0.000 (0.001) loss 1.1979 (1.2033) lr 1.9098e-04 eta 5:16:52
epoch [46/50] batch [660/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.5172 (1.2004) lr 1.9098e-04 eta 5:16:13
epoch [46/50] batch [680/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.6070 (1.2014) lr 1.9098e-04 eta 5:15:30
epoch [46/50] batch [700/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.6314 (1.1954) lr 1.9098e-04 eta 5:14:50
epoch [46/50] batch [720/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.4865 (1.1960) lr 1.9098e-04 eta 5:14:07
epoch [46/50] batch [740/2000] time 2.053 (2.031) data 0.000 (0.001) loss 2.0648 (1.2030) lr 1.9098e-04 eta 5:13:26
epoch [46/50] batch [760/2000] time 2.030 (2.031) data 0.000 (0.001) loss 0.1559 (1.2091) lr 1.9098e-04 eta 5:12:46
epoch [46/50] batch [780/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.4409 (1.2109) lr 1.9098e-04 eta 5:12:07
epoch [46/50] batch [800/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.3869 (1.2198) lr 1.9098e-04 eta 5:11:27
epoch [46/50] batch [820/2000] time 2.031 (2.031) data 0.000 (0.001) loss 0.2214 (1.2170) lr 1.9098e-04 eta 5:10:44
epoch [46/50] batch [840/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.5293 (1.2224) lr 1.9098e-04 eta 5:10:04
epoch [46/50] batch [860/2000] time 2.002 (2.031) data 0.000 (0.001) loss 0.4603 (1.2215) lr 1.9098e-04 eta 5:09:25
epoch [46/50] batch [880/2000] time 2.056 (2.031) data 0.000 (0.001) loss 0.2098 (1.2269) lr 1.9098e-04 eta 5:08:44
epoch [46/50] batch [900/2000] time 2.056 (2.031) data 0.000 (0.001) loss 0.4036 (1.2251) lr 1.9098e-04 eta 5:08:03
epoch [46/50] batch [920/2000] time 2.032 (2.031) data 0.000 (0.001) loss 0.0914 (1.2275) lr 1.9098e-04 eta 5:07:22
epoch [46/50] batch [940/2000] time 2.054 (2.031) data 0.000 (0.001) loss 0.7156 (1.2152) lr 1.9098e-04 eta 5:06:42
epoch [46/50] batch [960/2000] time 2.032 (2.031) data 0.000 (0.001) loss 2.5726 (1.2147) lr 1.9098e-04 eta 5:06:01
epoch [46/50] batch [980/2000] time 2.054 (2.031) data 0.000 (0.001) loss 1.4534 (1.2200) lr 1.9098e-04 eta 5:05:20
epoch [46/50] batch [1000/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.0908 (1.2159) lr 1.9098e-04 eta 5:04:39
epoch [46/50] batch [1020/2000] time 1.997 (2.031) data 0.000 (0.001) loss 1.2103 (1.2173) lr 1.9098e-04 eta 5:03:59
epoch [46/50] batch [1040/2000] time 2.030 (2.031) data 0.000 (0.001) loss 1.0541 (1.2171) lr 1.9098e-04 eta 5:03:19
epoch [46/50] batch [1060/2000] time 2.031 (2.031) data 0.000 (0.001) loss 0.4075 (1.2137) lr 1.9098e-04 eta 5:02:38
epoch [46/50] batch [1080/2000] time 2.032 (2.031) data 0.000 (0.001) loss 2.6306 (1.2184) lr 1.9098e-04 eta 5:01:58
epoch [46/50] batch [1100/2000] time 2.029 (2.031) data 0.000 (0.001) loss 2.8335 (1.2241) lr 1.9098e-04 eta 5:01:16
epoch [46/50] batch [1120/2000] time 2.000 (2.031) data 0.000 (0.001) loss 0.6296 (1.2292) lr 1.9098e-04 eta 5:00:35
epoch [46/50] batch [1140/2000] time 1.978 (2.031) data 0.001 (0.001) loss 0.6699 (1.2283) lr 1.9098e-04 eta 4:59:54
epoch [46/50] batch [1160/2000] time 2.054 (2.031) data 0.000 (0.001) loss 1.3356 (1.2323) lr 1.9098e-04 eta 4:59:13
epoch [46/50] batch [1180/2000] time 2.031 (2.031) data 0.000 (0.001) loss 0.6453 (1.2292) lr 1.9098e-04 eta 4:58:33
epoch [46/50] batch [1200/2000] time 1.997 (2.031) data 0.000 (0.001) loss 1.4208 (1.2288) lr 1.9098e-04 eta 4:57:53
epoch [46/50] batch [1220/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.9333 (1.2258) lr 1.9098e-04 eta 4:57:11
epoch [46/50] batch [1240/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.6833 (1.2241) lr 1.9098e-04 eta 4:56:31
epoch [46/50] batch [1260/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.3335 (1.2287) lr 1.9098e-04 eta 4:55:51
epoch [46/50] batch [1280/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.3393 (1.2260) lr 1.9098e-04 eta 4:55:10
epoch [46/50] batch [1300/2000] time 2.001 (2.031) data 0.000 (0.001) loss 1.3391 (1.2235) lr 1.9098e-04 eta 4:54:30
epoch [46/50] batch [1320/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.8386 (1.2165) lr 1.9098e-04 eta 4:53:50
epoch [46/50] batch [1340/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.2656 (1.2166) lr 1.9098e-04 eta 4:53:08
epoch [46/50] batch [1360/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.3642 (1.2215) lr 1.9098e-04 eta 4:52:27
epoch [46/50] batch [1380/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.1283 (1.2217) lr 1.9098e-04 eta 4:51:46
epoch [46/50] batch [1400/2000] time 2.056 (2.031) data 0.000 (0.001) loss 0.4367 (1.2174) lr 1.9098e-04 eta 4:51:05
epoch [46/50] batch [1420/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.2208 (1.2208) lr 1.9098e-04 eta 4:50:25
epoch [46/50] batch [1440/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.7778 (1.2269) lr 1.9098e-04 eta 4:49:44
epoch [46/50] batch [1460/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.7201 (1.2290) lr 1.9098e-04 eta 4:49:03
epoch [46/50] batch [1480/2000] time 2.048 (2.031) data 0.000 (0.001) loss 2.9042 (1.2333) lr 1.9098e-04 eta 4:48:23
epoch [46/50] batch [1500/2000] time 2.024 (2.031) data 0.000 (0.001) loss 1.9997 (1.2356) lr 1.9098e-04 eta 4:47:42
epoch [46/50] batch [1520/2000] time 2.055 (2.031) data 0.000 (0.001) loss 0.5568 (1.2336) lr 1.9098e-04 eta 4:47:01
epoch [46/50] batch [1540/2000] time 2.054 (2.031) data 0.000 (0.001) loss 0.9133 (1.2369) lr 1.9098e-04 eta 4:46:21
epoch [46/50] batch [1560/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.8166 (1.2354) lr 1.9098e-04 eta 4:45:40
epoch [46/50] batch [1580/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.1392 (1.2344) lr 1.9098e-04 eta 4:45:00
epoch [46/50] batch [1600/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.8355 (1.2335) lr 1.9098e-04 eta 4:44:19
epoch [46/50] batch [1620/2000] time 1.997 (2.031) data 0.000 (0.001) loss 1.7185 (1.2336) lr 1.9098e-04 eta 4:43:37
epoch [46/50] batch [1640/2000] time 2.002 (2.031) data 0.000 (0.001) loss 1.5727 (1.2286) lr 1.9098e-04 eta 4:42:58
epoch [46/50] batch [1660/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.3240 (1.2306) lr 1.9098e-04 eta 4:42:17
epoch [46/50] batch [1680/2000] time 1.975 (2.031) data 0.001 (0.001) loss 1.9513 (1.2287) lr 1.9098e-04 eta 4:41:37
epoch [46/50] batch [1700/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.2133 (1.2271) lr 1.9098e-04 eta 4:40:57
epoch [46/50] batch [1720/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.1882 (1.2224) lr 1.9098e-04 eta 4:40:16
epoch [46/50] batch [1740/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.3639 (1.2217) lr 1.9098e-04 eta 4:39:35
epoch [46/50] batch [1760/2000] time 2.031 (2.031) data 0.000 (0.001) loss 0.4754 (1.2202) lr 1.9098e-04 eta 4:38:54
epoch [46/50] batch [1780/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.3019 (1.2219) lr 1.9098e-04 eta 4:38:13
epoch [46/50] batch [1800/2000] time 2.031 (2.031) data 0.000 (0.001) loss 2.3565 (1.2222) lr 1.9098e-04 eta 4:37:33
epoch [46/50] batch [1820/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.0470 (1.2242) lr 1.9098e-04 eta 4:36:52
epoch [46/50] batch [1840/2000] time 2.057 (2.031) data 0.000 (0.000) loss 1.3772 (1.2231) lr 1.9098e-04 eta 4:36:12
epoch [46/50] batch [1860/2000] time 1.999 (2.031) data 0.000 (0.000) loss 2.4369 (1.2236) lr 1.9098e-04 eta 4:35:31
epoch [46/50] batch [1880/2000] time 2.000 (2.031) data 0.000 (0.000) loss 3.2961 (1.2237) lr 1.9098e-04 eta 4:34:50
epoch [46/50] batch [1900/2000] time 2.052 (2.031) data 0.000 (0.000) loss 1.2701 (1.2225) lr 1.9098e-04 eta 4:34:10
epoch [46/50] batch [1920/2000] time 2.051 (2.031) data 0.000 (0.000) loss 1.4337 (1.2197) lr 1.9098e-04 eta 4:33:29
epoch [46/50] batch [1940/2000] time 2.053 (2.031) data 0.000 (0.000) loss 1.9751 (1.2200) lr 1.9098e-04 eta 4:32:48
epoch [46/50] batch [1960/2000] time 2.029 (2.031) data 0.000 (0.000) loss 0.3527 (1.2198) lr 1.9098e-04 eta 4:32:07
epoch [46/50] batch [1980/2000] time 2.050 (2.031) data 0.000 (0.000) loss 0.7890 (1.2186) lr 1.9098e-04 eta 4:31:26
epoch [46/50] batch [2000/2000] time 2.050 (2.031) data 0.000 (0.000) loss 1.0149 (1.2216) lr 1.5567e-04 eta 4:30:44
epoch [47/50] batch [20/2000] time 2.052 (2.053) data 0.000 (0.027) loss 0.8816 (1.5160) lr 1.5567e-04 eta 4:33:06
epoch [47/50] batch [40/2000] time 2.052 (2.044) data 0.000 (0.014) loss 1.8756 (1.4969) lr 1.5567e-04 eta 4:31:11
epoch [47/50] batch [60/2000] time 1.999 (2.038) data 0.001 (0.009) loss 0.7604 (1.2841) lr 1.5567e-04 eta 4:29:38
epoch [47/50] batch [80/2000] time 2.053 (2.038) data 0.000 (0.007) loss 2.3241 (1.2636) lr 1.5567e-04 eta 4:28:57
epoch [47/50] batch [100/2000] time 2.050 (2.036) data 0.000 (0.006) loss 1.9770 (1.2164) lr 1.5567e-04 eta 4:28:02
epoch [47/50] batch [120/2000] time 2.054 (2.036) data 0.000 (0.005) loss 2.9753 (1.1682) lr 1.5567e-04 eta 4:27:27
epoch [47/50] batch [140/2000] time 2.054 (2.036) data 0.000 (0.004) loss 0.1146 (1.1755) lr 1.5567e-04 eta 4:26:46
epoch [47/50] batch [160/2000] time 2.033 (2.035) data 0.000 (0.004) loss 0.4307 (1.2135) lr 1.5567e-04 eta 4:25:58
epoch [47/50] batch [180/2000] time 2.003 (2.035) data 0.000 (0.003) loss 0.8325 (1.1966) lr 1.5567e-04 eta 4:25:17
epoch [47/50] batch [200/2000] time 1.997 (2.035) data 0.000 (0.003) loss 2.0148 (1.2150) lr 1.5567e-04 eta 4:24:35
epoch [47/50] batch [220/2000] time 2.052 (2.035) data 0.000 (0.003) loss 1.1722 (1.2073) lr 1.5567e-04 eta 4:23:50
epoch [47/50] batch [240/2000] time 2.051 (2.034) data 0.000 (0.002) loss 0.0993 (1.2149) lr 1.5567e-04 eta 4:23:03
epoch [47/50] batch [260/2000] time 1.975 (2.033) data 0.000 (0.002) loss 0.7534 (1.2190) lr 1.5567e-04 eta 4:22:18
epoch [47/50] batch [280/2000] time 2.051 (2.033) data 0.000 (0.002) loss 1.9628 (1.2072) lr 1.5567e-04 eta 4:21:35
epoch [47/50] batch [300/2000] time 2.053 (2.033) data 0.000 (0.002) loss 3.4138 (1.2230) lr 1.5567e-04 eta 4:20:54
epoch [47/50] batch [320/2000] time 2.052 (2.033) data 0.000 (0.002) loss 0.4990 (1.2272) lr 1.5567e-04 eta 4:20:11
epoch [47/50] batch [340/2000] time 2.051 (2.033) data 0.000 (0.002) loss 0.7526 (1.2325) lr 1.5567e-04 eta 4:19:29
epoch [47/50] batch [360/2000] time 1.995 (2.032) data 0.000 (0.002) loss 0.8290 (1.2269) lr 1.5567e-04 eta 4:18:46
epoch [47/50] batch [380/2000] time 1.995 (2.032) data 0.000 (0.002) loss 1.3206 (1.2201) lr 1.5567e-04 eta 4:18:04
epoch [47/50] batch [400/2000] time 2.053 (2.032) data 0.000 (0.002) loss 2.3244 (1.2298) lr 1.5567e-04 eta 4:17:25
epoch [47/50] batch [420/2000] time 1.999 (2.032) data 0.000 (0.002) loss 0.5021 (1.2335) lr 1.5567e-04 eta 4:16:43
epoch [47/50] batch [440/2000] time 1.974 (2.032) data 0.000 (0.001) loss 0.4811 (1.2311) lr 1.5567e-04 eta 4:16:01
epoch [47/50] batch [460/2000] time 2.030 (2.032) data 0.000 (0.001) loss 0.4790 (1.2322) lr 1.5567e-04 eta 4:15:20
epoch [47/50] batch [480/2000] time 1.998 (2.032) data 0.000 (0.001) loss 0.6715 (1.2257) lr 1.5567e-04 eta 4:14:38
epoch [47/50] batch [500/2000] time 2.032 (2.032) data 0.000 (0.001) loss 0.7712 (1.2152) lr 1.5567e-04 eta 4:13:58
epoch [47/50] batch [520/2000] time 2.029 (2.032) data 0.000 (0.001) loss 0.1892 (1.2151) lr 1.5567e-04 eta 4:13:15
epoch [47/50] batch [540/2000] time 2.058 (2.032) data 0.000 (0.001) loss 1.8835 (1.2070) lr 1.5567e-04 eta 4:12:36
epoch [47/50] batch [560/2000] time 2.029 (2.032) data 0.000 (0.001) loss 0.2204 (1.2006) lr 1.5567e-04 eta 4:11:56
epoch [47/50] batch [580/2000] time 1.997 (2.032) data 0.000 (0.001) loss 0.7492 (1.1902) lr 1.5567e-04 eta 4:11:14
epoch [47/50] batch [600/2000] time 2.053 (2.031) data 0.001 (0.001) loss 0.9512 (1.2019) lr 1.5567e-04 eta 4:10:32
epoch [47/50] batch [620/2000] time 1.997 (2.031) data 0.000 (0.001) loss 2.2492 (1.1976) lr 1.5567e-04 eta 4:09:52
epoch [47/50] batch [640/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.9271 (1.1882) lr 1.5567e-04 eta 4:09:10
epoch [47/50] batch [660/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.9292 (1.1898) lr 1.5567e-04 eta 4:08:29
epoch [47/50] batch [680/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.3227 (1.1869) lr 1.5567e-04 eta 4:07:46
epoch [47/50] batch [700/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.4812 (1.1806) lr 1.5567e-04 eta 4:07:06
epoch [47/50] batch [720/2000] time 2.055 (2.031) data 0.000 (0.001) loss 2.7211 (1.1798) lr 1.5567e-04 eta 4:06:25
epoch [47/50] batch [740/2000] time 1.998 (2.031) data 0.000 (0.001) loss 1.6651 (1.1788) lr 1.5567e-04 eta 4:05:45
epoch [47/50] batch [760/2000] time 1.996 (2.031) data 0.000 (0.001) loss 0.6489 (1.1776) lr 1.5567e-04 eta 4:05:04
epoch [47/50] batch [780/2000] time 1.996 (2.031) data 0.000 (0.001) loss 1.9208 (1.1739) lr 1.5567e-04 eta 4:04:23
epoch [47/50] batch [800/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.1319 (1.1690) lr 1.5567e-04 eta 4:03:42
epoch [47/50] batch [820/2000] time 2.050 (2.031) data 0.000 (0.001) loss 1.4661 (1.1718) lr 1.5567e-04 eta 4:03:01
epoch [47/50] batch [840/2000] time 2.001 (2.031) data 0.000 (0.001) loss 0.5895 (1.1784) lr 1.5567e-04 eta 4:02:21
epoch [47/50] batch [860/2000] time 2.059 (2.031) data 0.000 (0.001) loss 0.3984 (1.1751) lr 1.5567e-04 eta 4:01:40
epoch [47/50] batch [880/2000] time 2.002 (2.031) data 0.000 (0.001) loss 0.0796 (1.1713) lr 1.5567e-04 eta 4:01:00
epoch [47/50] batch [900/2000] time 2.060 (2.031) data 0.000 (0.001) loss 0.8030 (1.1735) lr 1.5567e-04 eta 4:00:19
epoch [47/50] batch [920/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.0769 (1.1683) lr 1.5567e-04 eta 3:59:40
epoch [47/50] batch [940/2000] time 1.999 (2.031) data 0.000 (0.001) loss 4.5009 (1.1811) lr 1.5567e-04 eta 3:59:00
epoch [47/50] batch [960/2000] time 2.032 (2.031) data 0.000 (0.001) loss 0.6988 (1.1864) lr 1.5567e-04 eta 3:58:19
epoch [47/50] batch [980/2000] time 2.057 (2.031) data 0.000 (0.001) loss 0.8013 (1.1853) lr 1.5567e-04 eta 3:57:38
epoch [47/50] batch [1000/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.4195 (1.1777) lr 1.5567e-04 eta 3:56:57
epoch [47/50] batch [1020/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.2384 (1.1751) lr 1.5567e-04 eta 3:56:17
epoch [47/50] batch [1040/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.4507 (1.1712) lr 1.5567e-04 eta 3:55:35
epoch [47/50] batch [1060/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.1971 (1.1670) lr 1.5567e-04 eta 3:54:55
epoch [47/50] batch [1080/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.0686 (1.1702) lr 1.5567e-04 eta 3:54:13
epoch [47/50] batch [1100/2000] time 2.001 (2.031) data 0.000 (0.001) loss 0.9870 (1.1679) lr 1.5567e-04 eta 3:53:33
epoch [47/50] batch [1120/2000] time 2.055 (2.031) data 0.000 (0.001) loss 0.6136 (1.1642) lr 1.5567e-04 eta 3:52:53
epoch [47/50] batch [1140/2000] time 2.053 (2.031) data 0.001 (0.001) loss 0.2374 (1.1653) lr 1.5567e-04 eta 3:52:13
epoch [47/50] batch [1160/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.3212 (1.1672) lr 1.5567e-04 eta 3:51:32
epoch [47/50] batch [1180/2000] time 1.999 (2.031) data 0.000 (0.001) loss 1.3014 (1.1593) lr 1.5567e-04 eta 3:50:51
epoch [47/50] batch [1200/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.5156 (1.1600) lr 1.5567e-04 eta 3:50:10
epoch [47/50] batch [1220/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.6281 (1.1579) lr 1.5567e-04 eta 3:49:29
epoch [47/50] batch [1240/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.8897 (1.1543) lr 1.5567e-04 eta 3:48:48
epoch [47/50] batch [1260/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.6971 (1.1541) lr 1.5567e-04 eta 3:48:07
epoch [47/50] batch [1280/2000] time 2.050 (2.031) data 0.000 (0.001) loss 1.7483 (1.1546) lr 1.5567e-04 eta 3:47:26
epoch [47/50] batch [1300/2000] time 1.974 (2.031) data 0.000 (0.001) loss 3.4997 (1.1563) lr 1.5567e-04 eta 3:46:45
epoch [47/50] batch [1320/2000] time 2.033 (2.031) data 0.000 (0.001) loss 1.2021 (1.1605) lr 1.5567e-04 eta 3:46:04
epoch [47/50] batch [1340/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.0590 (1.1599) lr 1.5567e-04 eta 3:45:25
epoch [47/50] batch [1360/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.7812 (1.1570) lr 1.5567e-04 eta 3:44:44
epoch [47/50] batch [1380/2000] time 2.058 (2.031) data 0.000 (0.001) loss 0.3857 (1.1577) lr 1.5567e-04 eta 3:44:04
epoch [47/50] batch [1400/2000] time 2.057 (2.031) data 0.000 (0.001) loss 1.1977 (1.1520) lr 1.5567e-04 eta 3:43:23
epoch [47/50] batch [1420/2000] time 2.032 (2.031) data 0.000 (0.001) loss 0.8265 (1.1528) lr 1.5567e-04 eta 3:42:44
epoch [47/50] batch [1440/2000] time 1.995 (2.031) data 0.000 (0.001) loss 1.5985 (1.1569) lr 1.5567e-04 eta 3:42:03
epoch [47/50] batch [1460/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.9310 (1.1521) lr 1.5567e-04 eta 3:41:21
epoch [47/50] batch [1480/2000] time 2.030 (2.031) data 0.000 (0.001) loss 1.7217 (1.1542) lr 1.5567e-04 eta 3:40:41
epoch [47/50] batch [1500/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.1004 (1.1528) lr 1.5567e-04 eta 3:40:00
epoch [47/50] batch [1520/2000] time 2.050 (2.031) data 0.000 (0.001) loss 1.6338 (1.1528) lr 1.5567e-04 eta 3:39:19
epoch [47/50] batch [1540/2000] time 1.977 (2.031) data 0.000 (0.001) loss 3.6992 (1.1557) lr 1.5567e-04 eta 3:38:38
epoch [47/50] batch [1560/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.3779 (1.1560) lr 1.5567e-04 eta 3:37:58
epoch [47/50] batch [1580/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.8098 (1.1604) lr 1.5567e-04 eta 3:37:17
epoch [47/50] batch [1600/2000] time 2.057 (2.031) data 0.000 (0.001) loss 1.1978 (1.1586) lr 1.5567e-04 eta 3:36:36
epoch [47/50] batch [1620/2000] time 2.058 (2.031) data 0.000 (0.001) loss 2.0604 (1.1590) lr 1.5567e-04 eta 3:35:55
epoch [47/50] batch [1640/2000] time 1.997 (2.031) data 0.000 (0.001) loss 1.1825 (1.1564) lr 1.5567e-04 eta 3:35:15
epoch [47/50] batch [1660/2000] time 2.002 (2.031) data 0.000 (0.001) loss 0.2340 (1.1573) lr 1.5567e-04 eta 3:34:34
epoch [47/50] batch [1680/2000] time 1.998 (2.031) data 0.001 (0.001) loss 1.2977 (1.1562) lr 1.5567e-04 eta 3:33:54
epoch [47/50] batch [1700/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.0986 (1.1617) lr 1.5567e-04 eta 3:33:13
epoch [47/50] batch [1720/2000] time 2.028 (2.031) data 0.000 (0.001) loss 3.7968 (1.1608) lr 1.5567e-04 eta 3:32:32
epoch [47/50] batch [1740/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.9535 (1.1625) lr 1.5567e-04 eta 3:31:51
epoch [47/50] batch [1760/2000] time 1.997 (2.031) data 0.000 (0.001) loss 1.3571 (1.1616) lr 1.5567e-04 eta 3:31:11
epoch [47/50] batch [1780/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.6573 (1.1612) lr 1.5567e-04 eta 3:30:30
epoch [47/50] batch [1800/2000] time 2.048 (2.031) data 0.000 (0.001) loss 1.1607 (1.1626) lr 1.5567e-04 eta 3:29:49
epoch [47/50] batch [1820/2000] time 2.027 (2.031) data 0.000 (0.001) loss 0.2371 (1.1622) lr 1.5567e-04 eta 3:29:09
epoch [47/50] batch [1840/2000] time 2.056 (2.031) data 0.000 (0.001) loss 1.0924 (1.1613) lr 1.5567e-04 eta 3:28:28
epoch [47/50] batch [1860/2000] time 1.980 (2.031) data 0.000 (0.001) loss 2.5313 (1.1640) lr 1.5567e-04 eta 3:27:48
epoch [47/50] batch [1880/2000] time 2.054 (2.031) data 0.000 (0.000) loss 0.4297 (1.1634) lr 1.5567e-04 eta 3:27:08
epoch [47/50] batch [1900/2000] time 2.049 (2.031) data 0.000 (0.000) loss 1.4216 (1.1656) lr 1.5567e-04 eta 3:26:27
epoch [47/50] batch [1920/2000] time 2.028 (2.031) data 0.000 (0.000) loss 1.2008 (1.1638) lr 1.5567e-04 eta 3:25:46
epoch [47/50] batch [1940/2000] time 2.050 (2.031) data 0.000 (0.000) loss 0.2996 (1.1637) lr 1.5567e-04 eta 3:25:05
epoch [47/50] batch [1960/2000] time 2.050 (2.031) data 0.000 (0.000) loss 0.8324 (1.1623) lr 1.5567e-04 eta 3:24:24
epoch [47/50] batch [1980/2000] time 2.052 (2.030) data 0.000 (0.000) loss 1.1482 (1.1628) lr 1.5567e-04 eta 3:23:43
epoch [47/50] batch [2000/2000] time 2.046 (2.031) data 0.000 (0.000) loss 2.9718 (1.1677) lr 1.2369e-04 eta 3:23:03
epoch [48/50] batch [20/2000] time 2.049 (2.056) data 0.000 (0.027) loss 2.0850 (1.2150) lr 1.2369e-04 eta 3:24:55
epoch [48/50] batch [40/2000] time 2.026 (2.043) data 0.000 (0.014) loss 2.7184 (1.2963) lr 1.2369e-04 eta 3:22:57
epoch [48/50] batch [60/2000] time 2.031 (2.039) data 0.001 (0.009) loss 1.0952 (1.3232) lr 1.2369e-04 eta 3:21:50
epoch [48/50] batch [80/2000] time 2.050 (2.036) data 0.000 (0.007) loss 0.8078 (1.2603) lr 1.2369e-04 eta 3:20:50
epoch [48/50] batch [100/2000] time 2.029 (2.036) data 0.000 (0.006) loss 0.7464 (1.2462) lr 1.2369e-04 eta 3:20:11
epoch [48/50] batch [120/2000] time 2.000 (2.034) data 0.000 (0.005) loss 0.7893 (1.2201) lr 1.2369e-04 eta 3:19:18
epoch [48/50] batch [140/2000] time 2.000 (2.033) data 0.000 (0.004) loss 3.0567 (1.2177) lr 1.2369e-04 eta 3:18:35
epoch [48/50] batch [160/2000] time 2.051 (2.033) data 0.000 (0.004) loss 1.0714 (1.2283) lr 1.2369e-04 eta 3:17:54
epoch [48/50] batch [180/2000] time 2.052 (2.033) data 0.000 (0.003) loss 1.6909 (1.2255) lr 1.2369e-04 eta 3:17:14
epoch [48/50] batch [200/2000] time 2.054 (2.033) data 0.000 (0.003) loss 1.0110 (1.2088) lr 1.2369e-04 eta 3:16:32
epoch [48/50] batch [220/2000] time 2.054 (2.033) data 0.000 (0.003) loss 0.4031 (1.1996) lr 1.2369e-04 eta 3:15:51
epoch [48/50] batch [240/2000] time 2.052 (2.033) data 0.000 (0.002) loss 2.1513 (1.2145) lr 1.2369e-04 eta 3:15:08
epoch [48/50] batch [260/2000] time 2.052 (2.033) data 0.000 (0.002) loss 0.6552 (1.2008) lr 1.2369e-04 eta 3:14:27
epoch [48/50] batch [280/2000] time 1.996 (2.032) data 0.000 (0.002) loss 0.8746 (1.1905) lr 1.2369e-04 eta 3:13:45
epoch [48/50] batch [300/2000] time 2.053 (2.033) data 0.000 (0.002) loss 0.9053 (1.1987) lr 1.2369e-04 eta 3:13:07
epoch [48/50] batch [320/2000] time 2.051 (2.033) data 0.000 (0.002) loss 1.9265 (1.2046) lr 1.2369e-04 eta 3:12:25
epoch [48/50] batch [340/2000] time 2.051 (2.033) data 0.000 (0.002) loss 0.2397 (1.1981) lr 1.2369e-04 eta 3:11:44
epoch [48/50] batch [360/2000] time 2.027 (2.032) data 0.000 (0.002) loss 1.2271 (1.2007) lr 1.2369e-04 eta 3:11:00
epoch [48/50] batch [380/2000] time 1.999 (2.032) data 0.000 (0.002) loss 1.8429 (1.2057) lr 1.2369e-04 eta 3:10:17
epoch [48/50] batch [400/2000] time 2.027 (2.031) data 0.000 (0.002) loss 0.5170 (1.2132) lr 1.2369e-04 eta 3:09:35
epoch [48/50] batch [420/2000] time 1.996 (2.031) data 0.000 (0.001) loss 0.4583 (1.2119) lr 1.2369e-04 eta 3:08:53
epoch [48/50] batch [440/2000] time 1.996 (2.031) data 0.000 (0.001) loss 0.2184 (1.1974) lr 1.2369e-04 eta 3:08:09
epoch [48/50] batch [460/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.6977 (1.1923) lr 1.2369e-04 eta 3:07:28
epoch [48/50] batch [480/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.6063 (1.1896) lr 1.2369e-04 eta 3:06:46
epoch [48/50] batch [500/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.7547 (1.1864) lr 1.2369e-04 eta 3:06:06
epoch [48/50] batch [520/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.6317 (1.2060) lr 1.2369e-04 eta 3:05:27
epoch [48/50] batch [540/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.2296 (1.1951) lr 1.2369e-04 eta 3:04:46
epoch [48/50] batch [560/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.5862 (1.1855) lr 1.2369e-04 eta 3:04:05
epoch [48/50] batch [580/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.8345 (1.1864) lr 1.2369e-04 eta 3:03:24
epoch [48/50] batch [600/2000] time 2.055 (2.030) data 0.001 (0.001) loss 0.3256 (1.1872) lr 1.2369e-04 eta 3:02:44
epoch [48/50] batch [620/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.8093 (1.2007) lr 1.2369e-04 eta 3:02:02
epoch [48/50] batch [640/2000] time 2.055 (2.030) data 0.000 (0.001) loss 2.8167 (1.2109) lr 1.2369e-04 eta 3:01:23
epoch [48/50] batch [660/2000] time 2.052 (2.031) data 0.000 (0.001) loss 3.9310 (1.2151) lr 1.2369e-04 eta 3:00:43
epoch [48/50] batch [680/2000] time 2.027 (2.031) data 0.000 (0.001) loss 1.1659 (1.2153) lr 1.2369e-04 eta 3:00:03
epoch [48/50] batch [700/2000] time 2.052 (2.031) data 0.000 (0.001) loss 3.1741 (1.2163) lr 1.2369e-04 eta 2:59:21
epoch [48/50] batch [720/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.0866 (1.2105) lr 1.2369e-04 eta 2:58:41
epoch [48/50] batch [740/2000] time 2.001 (2.030) data 0.000 (0.001) loss 0.6653 (1.2177) lr 1.2369e-04 eta 2:58:00
epoch [48/50] batch [760/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.0769 (1.2148) lr 1.2369e-04 eta 2:57:20
epoch [48/50] batch [780/2000] time 2.028 (2.031) data 0.000 (0.001) loss 1.5198 (1.2102) lr 1.2369e-04 eta 2:56:39
epoch [48/50] batch [800/2000] time 2.031 (2.031) data 0.000 (0.001) loss 4.3889 (1.2198) lr 1.2369e-04 eta 2:55:59
epoch [48/50] batch [820/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.5685 (1.2151) lr 1.2369e-04 eta 2:55:17
epoch [48/50] batch [840/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.7847 (1.2139) lr 1.2369e-04 eta 2:54:36
epoch [48/50] batch [860/2000] time 1.998 (2.030) data 0.000 (0.001) loss 2.0883 (1.2177) lr 1.2369e-04 eta 2:53:56
epoch [48/50] batch [880/2000] time 1.977 (2.030) data 0.000 (0.001) loss 0.0295 (1.2168) lr 1.2369e-04 eta 2:53:15
epoch [48/50] batch [900/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.6046 (1.2191) lr 1.2369e-04 eta 2:52:35
epoch [48/50] batch [920/2000] time 1.977 (2.031) data 0.000 (0.001) loss 0.1359 (1.2178) lr 1.2369e-04 eta 2:51:55
epoch [48/50] batch [940/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.1794 (1.2113) lr 1.2369e-04 eta 2:51:13
epoch [48/50] batch [960/2000] time 1.998 (2.030) data 0.000 (0.001) loss 2.4212 (1.2114) lr 1.2369e-04 eta 2:50:32
epoch [48/50] batch [980/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.1591 (1.2172) lr 1.2369e-04 eta 2:49:52
epoch [48/50] batch [1000/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.5497 (1.2203) lr 1.2369e-04 eta 2:49:11
epoch [48/50] batch [1020/2000] time 2.000 (2.030) data 0.000 (0.001) loss 2.1353 (1.2186) lr 1.2369e-04 eta 2:48:30
epoch [48/50] batch [1040/2000] time 1.974 (2.030) data 0.000 (0.001) loss 0.1148 (1.2174) lr 1.2369e-04 eta 2:47:49
epoch [48/50] batch [1060/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.3759 (1.2143) lr 1.2369e-04 eta 2:47:09
epoch [48/50] batch [1080/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.8579 (1.2122) lr 1.2369e-04 eta 2:46:29
epoch [48/50] batch [1100/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.3747 (1.2117) lr 1.2369e-04 eta 2:45:48
epoch [48/50] batch [1120/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.0818 (1.2128) lr 1.2369e-04 eta 2:45:07
epoch [48/50] batch [1140/2000] time 2.000 (2.030) data 0.001 (0.001) loss 4.1931 (1.2094) lr 1.2369e-04 eta 2:44:26
epoch [48/50] batch [1160/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.5268 (1.2025) lr 1.2369e-04 eta 2:43:46
epoch [48/50] batch [1180/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.4120 (1.2092) lr 1.2369e-04 eta 2:43:05
epoch [48/50] batch [1200/2000] time 1.999 (2.030) data 0.000 (0.001) loss 2.0260 (1.2065) lr 1.2369e-04 eta 2:42:24
epoch [48/50] batch [1220/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.7881 (1.2100) lr 1.2369e-04 eta 2:41:44
epoch [48/50] batch [1240/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.3474 (1.2100) lr 1.2369e-04 eta 2:41:03
epoch [48/50] batch [1260/2000] time 2.025 (2.030) data 0.000 (0.001) loss 0.3788 (1.2084) lr 1.2369e-04 eta 2:40:22
epoch [48/50] batch [1280/2000] time 2.033 (2.030) data 0.000 (0.001) loss 0.7125 (1.2030) lr 1.2369e-04 eta 2:39:41
epoch [48/50] batch [1300/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.9471 (1.1975) lr 1.2369e-04 eta 2:39:01
epoch [48/50] batch [1320/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.4818 (1.1973) lr 1.2369e-04 eta 2:38:20
epoch [48/50] batch [1340/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.2482 (1.1933) lr 1.2369e-04 eta 2:37:40
epoch [48/50] batch [1360/2000] time 2.030 (2.030) data 0.000 (0.001) loss 1.5999 (1.1926) lr 1.2369e-04 eta 2:36:59
epoch [48/50] batch [1380/2000] time 2.000 (2.030) data 0.000 (0.001) loss 5.1210 (1.1946) lr 1.2369e-04 eta 2:36:18
epoch [48/50] batch [1400/2000] time 2.003 (2.030) data 0.000 (0.001) loss 0.2298 (1.1928) lr 1.2369e-04 eta 2:35:37
epoch [48/50] batch [1420/2000] time 2.056 (2.030) data 0.000 (0.001) loss 2.8855 (1.1989) lr 1.2369e-04 eta 2:34:57
epoch [48/50] batch [1440/2000] time 2.004 (2.030) data 0.000 (0.001) loss 1.3342 (1.1997) lr 1.2369e-04 eta 2:34:16
epoch [48/50] batch [1460/2000] time 2.036 (2.030) data 0.000 (0.001) loss 1.2033 (1.2034) lr 1.2369e-04 eta 2:33:36
epoch [48/50] batch [1480/2000] time 2.008 (2.030) data 0.000 (0.001) loss 1.0804 (1.2023) lr 1.2369e-04 eta 2:32:55
epoch [48/50] batch [1500/2000] time 2.001 (2.030) data 0.000 (0.001) loss 1.2283 (1.2021) lr 1.2369e-04 eta 2:32:15
epoch [48/50] batch [1520/2000] time 2.054 (2.030) data 0.000 (0.001) loss 2.4416 (1.2054) lr 1.2369e-04 eta 2:31:35
epoch [48/50] batch [1540/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.5619 (1.2064) lr 1.2369e-04 eta 2:30:54
epoch [48/50] batch [1560/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.5915 (1.2068) lr 1.2369e-04 eta 2:30:13
epoch [48/50] batch [1580/2000] time 2.057 (2.030) data 0.000 (0.001) loss 1.3180 (1.2087) lr 1.2369e-04 eta 2:29:33
epoch [48/50] batch [1600/2000] time 2.050 (2.030) data 0.000 (0.001) loss 2.2383 (1.2093) lr 1.2369e-04 eta 2:28:52
epoch [48/50] batch [1620/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.5860 (1.2090) lr 1.2369e-04 eta 2:28:12
epoch [48/50] batch [1640/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.6642 (1.2122) lr 1.2369e-04 eta 2:27:31
epoch [48/50] batch [1660/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.1114 (1.2138) lr 1.2369e-04 eta 2:26:51
epoch [48/50] batch [1680/2000] time 2.055 (2.030) data 0.001 (0.001) loss 1.1469 (1.2139) lr 1.2369e-04 eta 2:26:10
epoch [48/50] batch [1700/2000] time 2.034 (2.030) data 0.000 (0.001) loss 1.4298 (1.2140) lr 1.2369e-04 eta 2:25:30
epoch [48/50] batch [1720/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.8696 (1.2083) lr 1.2369e-04 eta 2:24:49
epoch [48/50] batch [1740/2000] time 1.974 (2.030) data 0.000 (0.001) loss 3.0492 (1.2066) lr 1.2369e-04 eta 2:24:09
epoch [48/50] batch [1760/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.0629 (1.2049) lr 1.2369e-04 eta 2:23:28
epoch [48/50] batch [1780/2000] time 2.035 (2.030) data 0.000 (0.001) loss 0.0598 (1.2012) lr 1.2369e-04 eta 2:22:48
epoch [48/50] batch [1800/2000] time 2.049 (2.031) data 0.000 (0.001) loss 1.6876 (1.2014) lr 1.2369e-04 eta 2:22:08
epoch [48/50] batch [1820/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.2505 (1.1992) lr 1.2369e-04 eta 2:21:27
epoch [48/50] batch [1840/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.5955 (1.1973) lr 1.2369e-04 eta 2:20:46
epoch [48/50] batch [1860/2000] time 2.027 (2.030) data 0.000 (0.000) loss 0.2878 (1.1979) lr 1.2369e-04 eta 2:20:06
epoch [48/50] batch [1880/2000] time 2.050 (2.031) data 0.000 (0.000) loss 0.6334 (1.1943) lr 1.2369e-04 eta 2:19:25
epoch [48/50] batch [1900/2000] time 2.054 (2.031) data 0.000 (0.000) loss 1.1015 (1.1927) lr 1.2369e-04 eta 2:18:45
epoch [48/50] batch [1920/2000] time 2.050 (2.031) data 0.000 (0.000) loss 2.2003 (1.1965) lr 1.2369e-04 eta 2:18:05
epoch [48/50] batch [1940/2000] time 2.025 (2.031) data 0.000 (0.000) loss 1.2087 (1.1956) lr 1.2369e-04 eta 2:17:24
epoch [48/50] batch [1960/2000] time 2.048 (2.031) data 0.000 (0.000) loss 0.2980 (1.1953) lr 1.2369e-04 eta 2:16:43
epoch [48/50] batch [1980/2000] time 1.973 (2.031) data 0.000 (0.000) loss 2.9606 (1.1923) lr 1.2369e-04 eta 2:16:02
epoch [48/50] batch [2000/2000] time 2.050 (2.031) data 0.000 (0.000) loss 0.7284 (1.1924) lr 9.5173e-05 eta 2:15:22
epoch [49/50] batch [20/2000] time 2.030 (2.056) data 0.000 (0.027) loss 1.7522 (1.0447) lr 9.5173e-05 eta 2:16:21
epoch [49/50] batch [40/2000] time 2.003 (2.044) data 0.000 (0.014) loss 1.6927 (0.9829) lr 9.5173e-05 eta 2:14:53
epoch [49/50] batch [60/2000] time 2.030 (2.038) data 0.000 (0.009) loss 0.7967 (1.0403) lr 9.5173e-05 eta 2:13:50
epoch [49/50] batch [80/2000] time 2.051 (2.037) data 0.000 (0.007) loss 2.0205 (1.1759) lr 9.5173e-05 eta 2:13:03
epoch [49/50] batch [100/2000] time 2.030 (2.035) data 0.000 (0.006) loss 1.5010 (1.1193) lr 9.5173e-05 eta 2:12:17
epoch [49/50] batch [120/2000] time 2.055 (2.035) data 0.000 (0.005) loss 1.7672 (1.1575) lr 9.5173e-05 eta 2:11:34
epoch [49/50] batch [140/2000] time 2.051 (2.035) data 0.000 (0.004) loss 0.2842 (1.1609) lr 9.5173e-05 eta 2:10:55
epoch [49/50] batch [160/2000] time 2.056 (2.035) data 0.000 (0.004) loss 0.7494 (1.1502) lr 9.5173e-05 eta 2:10:15
epoch [49/50] batch [180/2000] time 2.054 (2.034) data 0.000 (0.003) loss 1.3459 (1.1788) lr 9.5173e-05 eta 2:09:31
epoch [49/50] batch [200/2000] time 1.998 (2.033) data 0.000 (0.003) loss 0.7492 (1.1650) lr 9.5173e-05 eta 2:08:46
epoch [49/50] batch [220/2000] time 1.998 (2.032) data 0.000 (0.003) loss 1.3505 (1.1730) lr 9.5173e-05 eta 2:08:01
epoch [49/50] batch [240/2000] time 2.028 (2.033) data 0.000 (0.002) loss 1.4943 (1.1603) lr 9.5173e-05 eta 2:07:25
epoch [49/50] batch [260/2000] time 2.052 (2.033) data 0.000 (0.002) loss 0.7193 (1.1607) lr 9.5173e-05 eta 2:06:44
epoch [49/50] batch [280/2000] time 2.030 (2.033) data 0.000 (0.002) loss 3.2283 (1.1743) lr 9.5173e-05 eta 2:06:03
epoch [49/50] batch [300/2000] time 2.025 (2.033) data 0.000 (0.002) loss 4.7684 (1.1891) lr 9.5173e-05 eta 2:05:21
epoch [49/50] batch [320/2000] time 1.997 (2.032) data 0.000 (0.002) loss 2.6237 (1.1838) lr 9.5173e-05 eta 2:04:37
epoch [49/50] batch [340/2000] time 1.998 (2.032) data 0.000 (0.002) loss 0.6428 (1.1835) lr 9.5173e-05 eta 2:03:57
epoch [49/50] batch [360/2000] time 1.998 (2.032) data 0.000 (0.002) loss 0.8310 (1.1617) lr 9.5173e-05 eta 2:03:15
epoch [49/50] batch [380/2000] time 2.028 (2.032) data 0.000 (0.002) loss 2.0754 (1.1688) lr 9.5173e-05 eta 2:02:34
epoch [49/50] batch [400/2000] time 2.029 (2.031) data 0.000 (0.002) loss 0.6051 (1.1740) lr 9.5173e-05 eta 2:01:51
epoch [49/50] batch [420/2000] time 2.000 (2.031) data 0.000 (0.001) loss 0.1522 (1.1828) lr 9.5173e-05 eta 2:01:09
epoch [49/50] batch [440/2000] time 2.001 (2.030) data 0.000 (0.001) loss 0.4458 (1.1843) lr 9.5173e-05 eta 2:00:28
epoch [49/50] batch [460/2000] time 2.051 (2.030) data 0.000 (0.001) loss 2.5349 (1.1871) lr 9.5173e-05 eta 1:59:47
epoch [49/50] batch [480/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.1208 (1.1809) lr 9.5173e-05 eta 1:59:07
epoch [49/50] batch [500/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.0408 (1.1880) lr 9.5173e-05 eta 1:58:26
epoch [49/50] batch [520/2000] time 2.000 (2.030) data 0.000 (0.001) loss 3.8148 (1.1966) lr 9.5173e-05 eta 1:57:45
epoch [49/50] batch [540/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.4767 (1.2075) lr 9.5173e-05 eta 1:57:04
epoch [49/50] batch [560/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.9136 (1.2129) lr 9.5173e-05 eta 1:56:24
epoch [49/50] batch [580/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.4013 (1.2097) lr 9.5173e-05 eta 1:55:43
epoch [49/50] batch [600/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.3492 (1.1992) lr 9.5173e-05 eta 1:55:02
epoch [49/50] batch [620/2000] time 1.973 (2.030) data 0.000 (0.001) loss 0.9167 (1.1865) lr 9.5173e-05 eta 1:54:21
epoch [49/50] batch [640/2000] time 2.027 (2.030) data 0.000 (0.001) loss 4.5504 (1.1947) lr 9.5173e-05 eta 1:53:40
epoch [49/50] batch [660/2000] time 1.972 (2.030) data 0.000 (0.001) loss 0.4387 (1.1881) lr 9.5173e-05 eta 1:52:59
epoch [49/50] batch [680/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.0257 (1.1870) lr 9.5173e-05 eta 1:52:18
epoch [49/50] batch [700/2000] time 2.032 (2.030) data 0.000 (0.001) loss 1.5663 (1.1859) lr 9.5173e-05 eta 1:51:38
epoch [49/50] batch [720/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.4971 (1.1873) lr 9.5173e-05 eta 1:50:56
epoch [49/50] batch [740/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.1478 (1.1874) lr 9.5173e-05 eta 1:50:16
epoch [49/50] batch [760/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.5877 (1.1893) lr 9.5173e-05 eta 1:49:35
epoch [49/50] batch [780/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.9878 (1.1882) lr 9.5173e-05 eta 1:48:55
epoch [49/50] batch [800/2000] time 2.002 (2.029) data 0.000 (0.001) loss 0.1205 (1.1885) lr 9.5173e-05 eta 1:48:14
epoch [49/50] batch [820/2000] time 1.998 (2.030) data 0.000 (0.001) loss 2.8217 (1.1908) lr 9.5173e-05 eta 1:47:33
epoch [49/50] batch [840/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.4173 (1.1968) lr 9.5173e-05 eta 1:46:53
epoch [49/50] batch [860/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.8127 (1.2007) lr 9.5173e-05 eta 1:46:12
epoch [49/50] batch [880/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.2070 (1.1968) lr 9.5173e-05 eta 1:45:32
epoch [49/50] batch [900/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.1863 (1.1924) lr 9.5173e-05 eta 1:44:51
epoch [49/50] batch [920/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.2389 (1.1854) lr 9.5173e-05 eta 1:44:11
epoch [49/50] batch [940/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.6253 (1.1817) lr 9.5173e-05 eta 1:43:30
epoch [49/50] batch [960/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.7674 (1.1811) lr 9.5173e-05 eta 1:42:49
epoch [49/50] batch [980/2000] time 2.057 (2.029) data 0.000 (0.001) loss 1.2778 (1.1758) lr 9.5173e-05 eta 1:42:08
epoch [49/50] batch [1000/2000] time 2.033 (2.030) data 0.000 (0.001) loss 4.2054 (1.1751) lr 9.5173e-05 eta 1:41:28
epoch [49/50] batch [1020/2000] time 2.004 (2.030) data 0.000 (0.001) loss 2.9939 (1.1790) lr 9.5173e-05 eta 1:40:48
epoch [49/50] batch [1040/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.9382 (1.1785) lr 9.5173e-05 eta 1:40:07
epoch [49/50] batch [1060/2000] time 2.002 (2.030) data 0.000 (0.001) loss 2.5383 (1.1811) lr 9.5173e-05 eta 1:39:26
epoch [49/50] batch [1080/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.1187 (1.1794) lr 9.5173e-05 eta 1:38:46
epoch [49/50] batch [1100/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.1847 (1.1770) lr 9.5173e-05 eta 1:38:05
epoch [49/50] batch [1120/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.8704 (1.1772) lr 9.5173e-05 eta 1:37:24
epoch [49/50] batch [1140/2000] time 2.055 (2.029) data 0.001 (0.001) loss 0.0487 (1.1749) lr 9.5173e-05 eta 1:36:44
epoch [49/50] batch [1160/2000] time 2.033 (2.030) data 0.000 (0.001) loss 1.1599 (1.1786) lr 9.5173e-05 eta 1:36:03
epoch [49/50] batch [1180/2000] time 2.031 (2.030) data 0.000 (0.001) loss 2.0008 (1.1834) lr 9.5173e-05 eta 1:35:23
epoch [49/50] batch [1200/2000] time 2.053 (2.030) data 0.000 (0.001) loss 3.2207 (1.1832) lr 9.5173e-05 eta 1:34:43
epoch [49/50] batch [1220/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.1440 (1.1814) lr 9.5173e-05 eta 1:34:02
epoch [49/50] batch [1240/2000] time 1.976 (2.030) data 0.000 (0.001) loss 1.8488 (1.1809) lr 9.5173e-05 eta 1:33:22
epoch [49/50] batch [1260/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.9832 (1.1793) lr 9.5173e-05 eta 1:32:41
epoch [49/50] batch [1280/2000] time 2.033 (2.030) data 0.000 (0.001) loss 2.6015 (1.1816) lr 9.5173e-05 eta 1:32:00
epoch [49/50] batch [1300/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.2368 (1.1811) lr 9.5173e-05 eta 1:31:20
epoch [49/50] batch [1320/2000] time 1.997 (2.030) data 0.000 (0.001) loss 2.3097 (1.1838) lr 9.5173e-05 eta 1:30:39
epoch [49/50] batch [1340/2000] time 1.998 (2.030) data 0.000 (0.001) loss 2.4584 (1.1875) lr 9.5173e-05 eta 1:29:58
epoch [49/50] batch [1360/2000] time 1.975 (2.030) data 0.000 (0.001) loss 2.4533 (1.1822) lr 9.5173e-05 eta 1:29:18
epoch [49/50] batch [1380/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.7006 (1.1853) lr 9.5173e-05 eta 1:28:37
epoch [49/50] batch [1400/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.6370 (1.1874) lr 9.5173e-05 eta 1:27:57
epoch [49/50] batch [1420/2000] time 2.049 (2.030) data 0.000 (0.001) loss 3.0436 (1.1919) lr 9.5173e-05 eta 1:27:16
epoch [49/50] batch [1440/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.9633 (1.1910) lr 9.5173e-05 eta 1:26:35
epoch [49/50] batch [1460/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.3624 (1.1867) lr 9.5173e-05 eta 1:25:55
epoch [49/50] batch [1480/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.7355 (1.1803) lr 9.5173e-05 eta 1:25:14
epoch [49/50] batch [1500/2000] time 2.055 (2.029) data 0.000 (0.001) loss 2.2702 (1.1796) lr 9.5173e-05 eta 1:24:33
epoch [49/50] batch [1520/2000] time 2.025 (2.029) data 0.000 (0.001) loss 2.3420 (1.1792) lr 9.5173e-05 eta 1:23:53
epoch [49/50] batch [1540/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.7958 (1.1875) lr 9.5173e-05 eta 1:23:12
epoch [49/50] batch [1560/2000] time 1.977 (2.029) data 0.000 (0.001) loss 0.4949 (1.1869) lr 9.5173e-05 eta 1:22:31
epoch [49/50] batch [1580/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.7043 (1.1868) lr 9.5173e-05 eta 1:21:51
epoch [49/50] batch [1600/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.5595 (1.1836) lr 9.5173e-05 eta 1:21:10
epoch [49/50] batch [1620/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.2446 (1.1831) lr 9.5173e-05 eta 1:20:30
epoch [49/50] batch [1640/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.3644 (1.1833) lr 9.5173e-05 eta 1:19:49
epoch [49/50] batch [1660/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.4557 (1.1844) lr 9.5173e-05 eta 1:19:08
epoch [49/50] batch [1680/2000] time 1.997 (2.029) data 0.001 (0.001) loss 2.2981 (1.1822) lr 9.5173e-05 eta 1:18:28
epoch [49/50] batch [1700/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.5995 (1.1856) lr 9.5173e-05 eta 1:17:47
epoch [49/50] batch [1720/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.0058 (1.1842) lr 9.5173e-05 eta 1:17:06
epoch [49/50] batch [1740/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.3683 (1.1827) lr 9.5173e-05 eta 1:16:26
epoch [49/50] batch [1760/2000] time 1.998 (2.029) data 0.000 (0.001) loss 2.2909 (1.1828) lr 9.5173e-05 eta 1:15:45
epoch [49/50] batch [1780/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.8750 (1.1813) lr 9.5173e-05 eta 1:15:05
epoch [49/50] batch [1800/2000] time 2.056 (2.030) data 0.000 (0.001) loss 1.1059 (1.1801) lr 9.5173e-05 eta 1:14:25
epoch [49/50] batch [1820/2000] time 2.053 (2.030) data 0.000 (0.000) loss 0.6699 (1.1783) lr 9.5173e-05 eta 1:13:44
epoch [49/50] batch [1840/2000] time 1.998 (2.030) data 0.000 (0.000) loss 0.5268 (1.1762) lr 9.5173e-05 eta 1:13:03
epoch [49/50] batch [1860/2000] time 2.000 (2.030) data 0.000 (0.000) loss 0.4644 (1.1762) lr 9.5173e-05 eta 1:12:23
epoch [49/50] batch [1880/2000] time 1.973 (2.030) data 0.000 (0.000) loss 0.7902 (1.1771) lr 9.5173e-05 eta 1:11:42
epoch [49/50] batch [1900/2000] time 2.051 (2.030) data 0.000 (0.000) loss 0.4999 (1.1778) lr 9.5173e-05 eta 1:11:02
epoch [49/50] batch [1920/2000] time 1.996 (2.030) data 0.000 (0.000) loss 0.7278 (1.1774) lr 9.5173e-05 eta 1:10:21
epoch [49/50] batch [1940/2000] time 1.996 (2.029) data 0.000 (0.000) loss 4.0635 (1.1808) lr 9.5173e-05 eta 1:09:40
epoch [49/50] batch [1960/2000] time 1.997 (2.029) data 0.000 (0.000) loss 0.9279 (1.1788) lr 9.5173e-05 eta 1:09:00
epoch [49/50] batch [1980/2000] time 2.028 (2.029) data 0.000 (0.000) loss 1.9111 (1.1770) lr 9.5173e-05 eta 1:08:19
epoch [49/50] batch [2000/2000] time 2.002 (2.029) data 0.000 (0.000) loss 0.9796 (1.1769) lr 7.0224e-05 eta 1:07:38
epoch [50/50] batch [20/2000] time 1.998 (2.054) data 0.000 (0.026) loss 1.9757 (1.5790) lr 7.0224e-05 eta 1:07:46
epoch [50/50] batch [40/2000] time 2.052 (2.045) data 0.000 (0.013) loss 1.5390 (1.3538) lr 7.0224e-05 eta 1:06:47
epoch [50/50] batch [60/2000] time 2.052 (2.038) data 0.001 (0.009) loss 3.1245 (1.3999) lr 7.0224e-05 eta 1:05:53
epoch [50/50] batch [80/2000] time 1.998 (2.036) data 0.000 (0.007) loss 2.0681 (1.3124) lr 7.0224e-05 eta 1:05:08
epoch [50/50] batch [100/2000] time 2.030 (2.034) data 0.000 (0.005) loss 2.9103 (1.3133) lr 7.0224e-05 eta 1:04:25
epoch [50/50] batch [120/2000] time 2.054 (2.034) data 0.000 (0.005) loss 0.5666 (1.2911) lr 7.0224e-05 eta 1:03:43
epoch [50/50] batch [140/2000] time 2.052 (2.033) data 0.000 (0.004) loss 0.9730 (1.2425) lr 7.0224e-05 eta 1:03:01
epoch [50/50] batch [160/2000] time 2.052 (2.032) data 0.000 (0.003) loss 2.1122 (1.2649) lr 7.0224e-05 eta 1:02:19
epoch [50/50] batch [180/2000] time 2.028 (2.031) data 0.000 (0.003) loss 1.8069 (1.2440) lr 7.0224e-05 eta 1:01:36
epoch [50/50] batch [200/2000] time 2.025 (2.031) data 0.000 (0.003) loss 1.0107 (1.2459) lr 7.0224e-05 eta 1:00:55
epoch [50/50] batch [220/2000] time 1.999 (2.031) data 0.000 (0.003) loss 0.2529 (1.2676) lr 7.0224e-05 eta 1:00:14
epoch [50/50] batch [240/2000] time 2.034 (2.031) data 0.000 (0.002) loss 0.9366 (1.2456) lr 7.0224e-05 eta 0:59:34
epoch [50/50] batch [260/2000] time 2.002 (2.030) data 0.000 (0.002) loss 1.2213 (1.2189) lr 7.0224e-05 eta 0:58:52
epoch [50/50] batch [280/2000] time 2.035 (2.031) data 0.000 (0.002) loss 0.6378 (1.2124) lr 7.0224e-05 eta 0:58:13
epoch [50/50] batch [300/2000] time 2.057 (2.032) data 0.000 (0.002) loss 2.4275 (1.1984) lr 7.0224e-05 eta 0:57:34
epoch [50/50] batch [320/2000] time 1.979 (2.032) data 0.000 (0.002) loss 0.0729 (1.1889) lr 7.0224e-05 eta 0:56:54
epoch [50/50] batch [340/2000] time 1.977 (2.033) data 0.000 (0.002) loss 1.0089 (1.1798) lr 7.0224e-05 eta 0:56:14
epoch [50/50] batch [360/2000] time 2.052 (2.033) data 0.000 (0.002) loss 1.0930 (1.1667) lr 7.0224e-05 eta 0:55:33
epoch [50/50] batch [380/2000] time 2.054 (2.033) data 0.000 (0.002) loss 1.0004 (1.1624) lr 7.0224e-05 eta 0:54:53
epoch [50/50] batch [400/2000] time 1.999 (2.033) data 0.000 (0.002) loss 1.5833 (1.1678) lr 7.0224e-05 eta 0:54:12
epoch [50/50] batch [420/2000] time 2.050 (2.032) data 0.000 (0.001) loss 0.7002 (1.1737) lr 7.0224e-05 eta 0:53:31
epoch [50/50] batch [440/2000] time 2.054 (2.032) data 0.000 (0.001) loss 1.4693 (1.1622) lr 7.0224e-05 eta 0:52:50
epoch [50/50] batch [460/2000] time 2.050 (2.032) data 0.000 (0.001) loss 1.2839 (1.1608) lr 7.0224e-05 eta 0:52:09
epoch [50/50] batch [480/2000] time 2.030 (2.032) data 0.000 (0.001) loss 1.6566 (1.1669) lr 7.0224e-05 eta 0:51:29
epoch [50/50] batch [500/2000] time 1.999 (2.032) data 0.000 (0.001) loss 1.2357 (1.1722) lr 7.0224e-05 eta 0:50:48
epoch [50/50] batch [520/2000] time 1.999 (2.032) data 0.000 (0.001) loss 0.8977 (1.1692) lr 7.0224e-05 eta 0:50:08
epoch [50/50] batch [540/2000] time 2.055 (2.033) data 0.000 (0.001) loss 0.4091 (1.1692) lr 7.0224e-05 eta 0:49:27
epoch [50/50] batch [560/2000] time 2.054 (2.033) data 0.000 (0.001) loss 0.1554 (1.1808) lr 7.0224e-05 eta 0:48:46
epoch [50/50] batch [580/2000] time 2.055 (2.033) data 0.000 (0.001) loss 0.2084 (1.1859) lr 7.0224e-05 eta 0:48:06
epoch [50/50] batch [600/2000] time 2.049 (2.033) data 0.001 (0.001) loss 2.0683 (1.1779) lr 7.0224e-05 eta 0:47:25
epoch [50/50] batch [620/2000] time 1.994 (2.032) data 0.000 (0.001) loss 0.8460 (1.1804) lr 7.0224e-05 eta 0:46:44
epoch [50/50] batch [640/2000] time 1.997 (2.032) data 0.000 (0.001) loss 1.3884 (1.1703) lr 7.0224e-05 eta 0:46:03
epoch [50/50] batch [660/2000] time 2.045 (2.032) data 0.000 (0.001) loss 1.5320 (1.1774) lr 7.0224e-05 eta 0:45:22
epoch [50/50] batch [680/2000] time 2.031 (2.032) data 0.000 (0.001) loss 2.3087 (1.1776) lr 7.0224e-05 eta 0:44:41
epoch [50/50] batch [700/2000] time 2.047 (2.031) data 0.000 (0.001) loss 1.2233 (1.1851) lr 7.0224e-05 eta 0:44:00
epoch [50/50] batch [720/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.3564 (1.1770) lr 7.0224e-05 eta 0:43:20
epoch [50/50] batch [740/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.5974 (1.1801) lr 7.0224e-05 eta 0:42:39
epoch [50/50] batch [760/2000] time 2.028 (2.031) data 0.000 (0.001) loss 1.4444 (1.1838) lr 7.0224e-05 eta 0:41:58
epoch [50/50] batch [780/2000] time 1.999 (2.031) data 0.000 (0.001) loss 1.0246 (1.1791) lr 7.0224e-05 eta 0:41:17
epoch [50/50] batch [800/2000] time 2.030 (2.031) data 0.000 (0.001) loss 0.2062 (1.1740) lr 7.0224e-05 eta 0:40:37
epoch [50/50] batch [820/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.5360 (1.1683) lr 7.0224e-05 eta 0:39:56
epoch [50/50] batch [840/2000] time 2.000 (2.031) data 0.000 (0.001) loss 2.5154 (1.1743) lr 7.0224e-05 eta 0:39:15
epoch [50/50] batch [860/2000] time 2.056 (2.031) data 0.000 (0.001) loss 0.4366 (1.1812) lr 7.0224e-05 eta 0:38:35
epoch [50/50] batch [880/2000] time 2.052 (2.031) data 0.000 (0.001) loss 1.1161 (1.1784) lr 7.0224e-05 eta 0:37:54
epoch [50/50] batch [900/2000] time 2.001 (2.031) data 0.000 (0.001) loss 1.2080 (1.1851) lr 7.0224e-05 eta 0:37:13
epoch [50/50] batch [920/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.2578 (1.1796) lr 7.0224e-05 eta 0:36:33
epoch [50/50] batch [940/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.7158 (1.1808) lr 7.0224e-05 eta 0:35:52
epoch [50/50] batch [960/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.2706 (1.1856) lr 7.0224e-05 eta 0:35:11
epoch [50/50] batch [980/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.1575 (1.1830) lr 7.0224e-05 eta 0:34:31
epoch [50/50] batch [1000/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.5488 (1.1771) lr 7.0224e-05 eta 0:33:50
epoch [50/50] batch [1020/2000] time 2.052 (2.030) data 0.000 (0.001) loss 2.1401 (1.1793) lr 7.0224e-05 eta 0:33:09
epoch [50/50] batch [1040/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.8986 (1.1822) lr 7.0224e-05 eta 0:32:29
epoch [50/50] batch [1060/2000] time 2.054 (2.031) data 0.000 (0.001) loss 0.4701 (1.1780) lr 7.0224e-05 eta 0:31:48
epoch [50/50] batch [1080/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.7268 (1.1764) lr 7.0224e-05 eta 0:31:08
epoch [50/50] batch [1100/2000] time 2.030 (2.030) data 0.000 (0.001) loss 1.8153 (1.1771) lr 7.0224e-05 eta 0:30:27
epoch [50/50] batch [1120/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.5117 (1.1779) lr 7.0224e-05 eta 0:29:46
epoch [50/50] batch [1140/2000] time 2.052 (2.030) data 0.001 (0.001) loss 4.2649 (1.1811) lr 7.0224e-05 eta 0:29:06
epoch [50/50] batch [1160/2000] time 2.032 (2.030) data 0.000 (0.001) loss 1.3979 (1.1795) lr 7.0224e-05 eta 0:28:25
epoch [50/50] batch [1180/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.2716 (1.1778) lr 7.0224e-05 eta 0:27:44
epoch [50/50] batch [1200/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.4850 (1.1764) lr 7.0224e-05 eta 0:27:04
epoch [50/50] batch [1220/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.6727 (1.1755) lr 7.0224e-05 eta 0:26:23
epoch [50/50] batch [1240/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.0210 (1.1758) lr 7.0224e-05 eta 0:25:43
epoch [50/50] batch [1260/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.5975 (1.1750) lr 7.0224e-05 eta 0:25:02
epoch [50/50] batch [1280/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.1965 (1.1793) lr 7.0224e-05 eta 0:24:21
epoch [50/50] batch [1300/2000] time 1.974 (2.030) data 0.000 (0.001) loss 1.5607 (1.1774) lr 7.0224e-05 eta 0:23:41
epoch [50/50] batch [1320/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.6264 (1.1757) lr 7.0224e-05 eta 0:23:00
epoch [50/50] batch [1340/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.1892 (1.1782) lr 7.0224e-05 eta 0:22:19
epoch [50/50] batch [1360/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.7956 (1.1729) lr 7.0224e-05 eta 0:21:39
epoch [50/50] batch [1380/2000] time 2.030 (2.030) data 0.000 (0.001) loss 2.1583 (1.1739) lr 7.0224e-05 eta 0:20:58
epoch [50/50] batch [1400/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.3295 (1.1706) lr 7.0224e-05 eta 0:20:18
epoch [50/50] batch [1420/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.0525 (1.1696) lr 7.0224e-05 eta 0:19:37
epoch [50/50] batch [1440/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.6230 (1.1706) lr 7.0224e-05 eta 0:18:56
epoch [50/50] batch [1460/2000] time 2.000 (2.030) data 0.000 (0.001) loss 1.4458 (1.1674) lr 7.0224e-05 eta 0:18:16
epoch [50/50] batch [1480/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.1391 (1.1698) lr 7.0224e-05 eta 0:17:35
epoch [50/50] batch [1500/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.2455 (1.1736) lr 7.0224e-05 eta 0:16:55
epoch [50/50] batch [1520/2000] time 2.028 (2.030) data 0.000 (0.001) loss 1.1294 (1.1740) lr 7.0224e-05 eta 0:16:14
epoch [50/50] batch [1540/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.8457 (1.1735) lr 7.0224e-05 eta 0:15:33
epoch [50/50] batch [1560/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.2275 (1.1769) lr 7.0224e-05 eta 0:14:53
epoch [50/50] batch [1580/2000] time 1.976 (2.030) data 0.000 (0.001) loss 0.7428 (1.1788) lr 7.0224e-05 eta 0:14:12
epoch [50/50] batch [1600/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.3250 (1.1783) lr 7.0224e-05 eta 0:13:32
epoch [50/50] batch [1620/2000] time 2.026 (2.030) data 0.000 (0.001) loss 0.6505 (1.1752) lr 7.0224e-05 eta 0:12:51
epoch [50/50] batch [1640/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.4284 (1.1761) lr 7.0224e-05 eta 0:12:10
epoch [50/50] batch [1660/2000] time 2.031 (2.030) data 0.000 (0.001) loss 1.1780 (1.1775) lr 7.0224e-05 eta 0:11:30
epoch [50/50] batch [1680/2000] time 2.029 (2.030) data 0.001 (0.001) loss 0.1193 (1.1806) lr 7.0224e-05 eta 0:10:49
epoch [50/50] batch [1700/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.6732 (1.1817) lr 7.0224e-05 eta 0:10:09
epoch [50/50] batch [1720/2000] time 2.051 (2.030) data 0.000 (0.001) loss 3.1558 (1.1845) lr 7.0224e-05 eta 0:09:28
epoch [50/50] batch [1740/2000] time 2.055 (2.030) data 0.000 (0.001) loss 2.8113 (1.1814) lr 7.0224e-05 eta 0:08:47
epoch [50/50] batch [1760/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.8911 (1.1810) lr 7.0224e-05 eta 0:08:07
epoch [50/50] batch [1780/2000] time 1.999 (2.030) data 0.000 (0.000) loss 0.0991 (1.1845) lr 7.0224e-05 eta 0:07:26
epoch [50/50] batch [1800/2000] time 2.054 (2.030) data 0.000 (0.000) loss 1.9629 (1.1815) lr 7.0224e-05 eta 0:06:46
epoch [50/50] batch [1820/2000] time 2.034 (2.030) data 0.000 (0.000) loss 0.9684 (1.1768) lr 7.0224e-05 eta 0:06:05
epoch [50/50] batch [1840/2000] time 2.006 (2.030) data 0.000 (0.000) loss 1.0220 (1.1733) lr 7.0224e-05 eta 0:05:24
epoch [50/50] batch [1860/2000] time 2.009 (2.030) data 0.000 (0.000) loss 0.3914 (1.1747) lr 7.0224e-05 eta 0:04:44
epoch [50/50] batch [1880/2000] time 2.052 (2.030) data 0.000 (0.000) loss 1.4092 (1.1717) lr 7.0224e-05 eta 0:04:03
epoch [50/50] batch [1900/2000] time 1.979 (2.030) data 0.000 (0.000) loss 0.7564 (1.1719) lr 7.0224e-05 eta 0:03:23
epoch [50/50] batch [1920/2000] time 2.053 (2.030) data 0.000 (0.000) loss 0.0222 (1.1717) lr 7.0224e-05 eta 0:02:42
epoch [50/50] batch [1940/2000] time 2.053 (2.030) data 0.000 (0.000) loss 0.7575 (1.1728) lr 7.0224e-05 eta 0:02:01
epoch [50/50] batch [1960/2000] time 2.033 (2.030) data 0.000 (0.000) loss 0.6414 (1.1731) lr 7.0224e-05 eta 0:01:21
epoch [50/50] batch [1980/2000] time 2.050 (2.030) data 0.000 (0.000) loss 0.8755 (1.1766) lr 7.0224e-05 eta 0:00:40
epoch [50/50] batch [2000/2000] time 2.025 (2.030) data 0.000 (0.000) loss 0.7711 (1.1785) lr 4.8943e-05 eta 0:00:00
Checkpoint saved to output/base2new/train_base/imagenet/vit_b16_ep50_c4_BZ4_ProDA/seed3/prompt_learner/model.pth.tar-50
Finish training
Deploy the last-epoch model
Evaluate on the *test* set
=> result
* total: 25,000
* correct: 19,935
* accuracy: 79.74%
* error: 20.26%
* macro_f1: 79.35%
Elapsed: 2 days, 8:37:18
