***************
** Arguments **
***************
backbone: 
config_file: configs/trainers/ProDA/vit_b16_ep50_c4_BZ4_ProDA.yaml
dataset_config_file: configs/datasets/imagenet.yaml
eval_only: False
head: 
load_epoch: None
model_dir: 
no_train: False
opts: ['DATASET.NUM_SHOTS', '16', 'DATASET.SUBSAMPLE_CLASSES', 'base']
output_dir: output/base2new/train_base/imagenet/vit_b16_ep50_c4_BZ4_ProDA/seed2
resume: 
root: /mnt/hdd/DATA
seed: 2
source_domains: None
target_domains: None
trainer: ProDA
transforms: None
************
** Config **
************
DATALOADER:
  K_TRANSFORMS: 1
  NUM_WORKERS: 8
  RETURN_IMG0: False
  TEST:
    BATCH_SIZE: 100
    SAMPLER: SequentialSampler
  TRAIN_U:
    BATCH_SIZE: 32
    N_DOMAIN: 0
    N_INS: 16
    SAME_AS_X: True
    SAMPLER: RandomSampler
  TRAIN_X:
    BATCH_SIZE: 4
    N_DOMAIN: 0
    N_INS: 16
    SAMPLER: RandomSampler
DATASET:
  ALL_AS_UNLABELED: False
  CIFAR_C_LEVEL: 1
  CIFAR_C_TYPE: 
  NAME: ImageNet
  NUM_LABELED: -1
  NUM_SHOTS: 16
  ROOT: /mnt/hdd/DATA
  SOURCE_DOMAINS: ()
  STL10_FOLD: -1
  SUBSAMPLE_CLASSES: base
  TARGET_DOMAINS: ()
  VAL_PERCENT: 0.1
INPUT:
  COLORJITTER_B: 0.4
  COLORJITTER_C: 0.4
  COLORJITTER_H: 0.1
  COLORJITTER_S: 0.4
  CROP_PADDING: 4
  CUTOUT_LEN: 16
  CUTOUT_N: 1
  GB_K: 21
  GB_P: 0.5
  GN_MEAN: 0.0
  GN_STD: 0.15
  INTERPOLATION: bicubic
  NO_TRANSFORM: False
  PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
  PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
  RANDAUGMENT_M: 10
  RANDAUGMENT_N: 2
  RGS_P: 0.2
  RRCROP_SCALE: (0.08, 1.0)
  SIZE: (224, 224)
  TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize')
MODEL:
  BACKBONE:
    NAME: ViT-B/16
    PRETRAINED: True
  HEAD:
    ACTIVATION: relu
    BN: True
    DROPOUT: 0.0
    HIDDEN_LAYERS: ()
    NAME: 
  INIT_WEIGHTS: 
OPTIM:
  ADAM_BETA1: 0.9
  ADAM_BETA2: 0.999
  BASE_LR_MULT: 0.1
  GAMMA: 0.1
  LR: 0.002
  LR_SCHEDULER: cosine
  MAX_EPOCH: 50
  MOMENTUM: 0.9
  NAME: sgd
  NEW_LAYERS: ()
  RMSPROP_ALPHA: 0.99
  SGD_DAMPNING: 0
  SGD_NESTEROV: False
  STAGED_LR: False
  STEPSIZE: (-1,)
  WARMUP_CONS_LR: 1e-05
  WARMUP_EPOCH: 5
  WARMUP_MIN_LR: 1e-05
  WARMUP_RECOUNT: True
  WARMUP_TYPE: constant
  WEIGHT_DECAY: 0.0005
OUTPUT_DIR: output/base2new/train_base/imagenet/vit_b16_ep50_c4_BZ4_ProDA/seed2
RESUME: 
SEED: 2
TEST:
  COMPUTE_CMAT: False
  EVALUATOR: Classification
  FINAL_MODEL: last_step
  NO_TEST: False
  PER_CLASS_RESULT: False
  SPLIT: test
TRAIN:
  CHECKPOINT_FREQ: 0
  COUNT_ITER: train_x
  PRINT_FREQ: 20
TRAINER:
  CDAC:
    CLASS_LR_MULTI: 10
    P_THRESH: 0.95
    RAMPUP_COEF: 30
    RAMPUP_ITRS: 1000
    STRONG_TRANSFORMS: ()
    TOPK_MATCH: 5
  COCOOP:
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  COOP:
    CLASS_TOKEN_POSITION: end
    CSC: False
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  CROSSGRAD:
    ALPHA_D: 0.5
    ALPHA_F: 0.5
    EPS_D: 1.0
    EPS_F: 1.0
  DAEL:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DAELDG:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DDAIG:
    ALPHA: 0.5
    CLAMP: False
    CLAMP_MAX: 1.0
    CLAMP_MIN: -1.0
    G_ARCH: 
    LMDA: 0.3
    WARMUP: 0
  DOMAINMIX:
    ALPHA: 1.0
    BETA: 1.0
    TYPE: crossdomain
  ENTMIN:
    LMDA: 0.001
  FIXMATCH:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 1.0
  IVLP:
    CTX_INIT: a photo of a
    N_CTX_TEXT: 2
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_TEXT: 9
    PROMPT_DEPTH_VISION: 9
  M3SDA:
    LMDA: 0.5
    N_STEP_F: 4
  MAPLE:
    CTX_INIT: a photo of a
    N_CTX: 4
    PREC: fp16
    PROMPT_DEPTH: 9
  MCD:
    N_STEP_F: 4
  MEANTEACHER:
    EMA_ALPHA: 0.999
    RAMPUP: 5
    WEIGHT_U: 1.0
  MIXMATCH:
    MIXUP_BETA: 0.75
    RAMPUP: 20000
    TEMP: 2.0
    WEIGHT_U: 100.0
  MME:
    LMDA: 0.1
  NAME: ProDA
  ProDA:
    N_CTX: 4
    N_PROMPT: 32
    PREC: fp16
  SE:
    CONF_THRE: 0.95
    EMA_ALPHA: 0.999
    RAMPUP: 300
  VPT:
    CTX_INIT: a photo of a
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_VISION: 1
USE_CUDA: True
VERBOSE: True
VERSION: 1
Collecting env info ...
** System info **
PyTorch version: 2.2.1+cu121
Is debug build: False
CUDA used to build PyTorch: 12.1
ROCM used to build PyTorch: N/A

OS: Debian GNU/Linux 12 (bookworm) (x86_64)
GCC version: (Debian 12.2.0-14) 12.2.0
Clang version: Could not collect
CMake version: Could not collect
Libc version: glibc-2.36

Python version: 3.11.2 (main, Mar 13 2023, 12:18:29) [GCC 12.2.0] (64-bit runtime)
Python platform: Linux-6.5.13-3-pve-x86_64-with-glibc2.36
Is CUDA available: True
CUDA runtime version: 11.8.89
CUDA_MODULE_LOADING set to: LAZY
GPU models and configuration: 
GPU 0: NVIDIA A800 80GB PCIe
GPU 1: NVIDIA A800 80GB PCIe

Nvidia driver version: 525.147.05
cuDNN version: Could not collect
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True

CPU:
Architecture:                       x86_64
CPU op-mode(s):                     32-bit, 64-bit
Address sizes:                      46 bits physical, 57 bits virtual
Byte Order:                         Little Endian
CPU(s):                             64
On-line CPU(s) list:                18,20,22,23,25-27,29,31,32,34,37,46-49
Off-line CPU(s) list:               0-17,19,21,24,28,30,33,35,36,38-45,50-63
Vendor ID:                          GenuineIntel
Model name:                         Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz
CPU family:                         6
Model:                              106
Thread(s) per core:                 2
Core(s) per socket:                 16
Socket(s):                          2
Stepping:                           6
CPU(s) scaling MHz:                 98%
CPU max MHz:                        3500.0000
CPU min MHz:                        800.0000
BogoMIPS:                           5800.00
Flags:                              fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect wbnoinvd dtherm ida arat pln pts vnmi avx512vbmi umip pku ospke avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid fsrm md_clear pconfig flush_l1d arch_capabilities
Virtualization:                     VT-x
L1d cache:                          1.5 MiB (32 instances)
L1i cache:                          1 MiB (32 instances)
L2 cache:                           40 MiB (32 instances)
L3 cache:                           48 MiB (2 instances)
NUMA node(s):                       2
NUMA node0 CPU(s):                  0-15,32-47
NUMA node1 CPU(s):                  16-31,48-63
Vulnerability Gather data sampling: Vulnerable: No microcode
Vulnerability Itlb multihit:        Not affected
Vulnerability L1tf:                 Not affected
Vulnerability Mds:                  Not affected
Vulnerability Meltdown:             Not affected
Vulnerability Mmio stale data:      Mitigation; Clear CPU buffers; SMT vulnerable
Vulnerability Retbleed:             Not affected
Vulnerability Spec rstack overflow: Not affected
Vulnerability Spec store bypass:    Mitigation; Speculative Store Bypass disabled via prctl
Vulnerability Spectre v1:           Mitigation; usercopy/swapgs barriers and __user pointer sanitization
Vulnerability Spectre v2:           Mitigation; Enhanced / Automatic IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
Vulnerability Srbds:                Not affected
Vulnerability Tsx async abort:      Not affected

Versions of relevant libraries:
[pip3] flake8==3.7.9
[pip3] flake8==3.7.9
[pip3] numpy==1.26.4
[pip3] torch==2.2.1
[pip3] torchaudio==2.2.1
[pip3] torchvision==0.17.1
[pip3] triton==2.2.0
[conda] Could not collect
        Pillow (10.2.0)

Loading trainer: ProDA
Loading dataset: ImageNet
Loading preprocessed few-shot data from /mnt/hdd/DATA/imagenet/split_fewshot/shot_16_shuffled-seed_2.pkl
SUBSAMPLE BASE CLASSES!
Building transform_train
+ random resized crop (size=(224, 224), scale=(0.08, 1.0))
+ random flip
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
Building transform_test
+ resize the smaller edge to 224
+ 224x224 center crop
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
---------  --------
Dataset    ImageNet
# classes  500
# train_x  8,000
# val      25,000
# test     25,000
---------  --------
Loading CLIP (backbone: ViT-B/16)
Building custom CLIP
Turning off gradients in both the image and the text encoder
Parameters to be updated: {'prompt_learner.ctx'}
Loading evaluator: Classification
No checkpoint found, train from scratch
Initialize tensorboard (log_dir=output/base2new/train_base/imagenet/vit_b16_ep50_c4_BZ4_ProDA/seed2/tensorboard)
epoch [1/50] batch [20/2000] time 2.023 (2.158) data 0.000 (0.030) loss 0.6529 (2.4562) lr 1.0000e-05 eta 2 days, 11:56:40
epoch [1/50] batch [40/2000] time 2.049 (2.093) data 0.000 (0.015) loss 3.2053 (2.4718) lr 1.0000e-05 eta 2 days, 10:07:12
epoch [1/50] batch [60/2000] time 2.005 (2.075) data 0.000 (0.010) loss 2.1863 (2.2759) lr 1.0000e-05 eta 2 days, 9:36:31
epoch [1/50] batch [80/2000] time 2.057 (2.065) data 0.000 (0.008) loss 1.6371 (2.1272) lr 1.0000e-05 eta 2 days, 9:19:17
epoch [1/50] batch [100/2000] time 2.054 (2.059) data 0.000 (0.006) loss 2.2995 (2.0928) lr 1.0000e-05 eta 2 days, 9:07:40
epoch [1/50] batch [120/2000] time 2.031 (2.053) data 0.000 (0.005) loss 1.8275 (2.0511) lr 1.0000e-05 eta 2 days, 8:57:55
epoch [1/50] batch [140/2000] time 2.052 (2.050) data 0.000 (0.004) loss 2.4632 (2.0524) lr 1.0000e-05 eta 2 days, 8:51:30
epoch [1/50] batch [160/2000] time 2.049 (2.047) data 0.000 (0.004) loss 0.2458 (2.0618) lr 1.0000e-05 eta 2 days, 8:46:33
epoch [1/50] batch [180/2000] time 1.997 (2.045) data 0.000 (0.004) loss 3.5027 (2.0434) lr 1.0000e-05 eta 2 days, 8:41:30
epoch [1/50] batch [200/2000] time 2.051 (2.043) data 0.000 (0.003) loss 4.1770 (2.0251) lr 1.0000e-05 eta 2 days, 8:38:08
epoch [1/50] batch [220/2000] time 1.976 (2.041) data 0.000 (0.003) loss 2.5055 (2.0092) lr 1.0000e-05 eta 2 days, 8:34:32
epoch [1/50] batch [240/2000] time 2.026 (2.040) data 0.000 (0.003) loss 1.4282 (1.9928) lr 1.0000e-05 eta 2 days, 8:32:15
epoch [1/50] batch [260/2000] time 1.998 (2.039) data 0.000 (0.003) loss 1.2297 (1.9858) lr 1.0000e-05 eta 2 days, 8:29:06
epoch [1/50] batch [280/2000] time 2.026 (2.038) data 0.000 (0.002) loss 3.1731 (1.9773) lr 1.0000e-05 eta 2 days, 8:27:18
epoch [1/50] batch [300/2000] time 1.980 (2.038) data 0.000 (0.002) loss 2.3839 (1.9617) lr 1.0000e-05 eta 2 days, 8:25:43
epoch [1/50] batch [320/2000] time 2.005 (2.038) data 0.000 (0.002) loss 3.7646 (1.9697) lr 1.0000e-05 eta 2 days, 8:25:12
epoch [1/50] batch [340/2000] time 2.053 (2.037) data 0.000 (0.002) loss 2.8732 (1.9699) lr 1.0000e-05 eta 2 days, 8:23:36
epoch [1/50] batch [360/2000] time 2.030 (2.037) data 0.000 (0.002) loss 2.1311 (1.9754) lr 1.0000e-05 eta 2 days, 8:22:22
epoch [1/50] batch [380/2000] time 2.027 (2.036) data 0.000 (0.002) loss 2.7274 (1.9729) lr 1.0000e-05 eta 2 days, 8:21:08
epoch [1/50] batch [400/2000] time 2.030 (2.036) data 0.000 (0.002) loss 0.7858 (1.9684) lr 1.0000e-05 eta 2 days, 8:20:04
epoch [1/50] batch [420/2000] time 2.051 (2.036) data 0.000 (0.002) loss 1.8373 (1.9906) lr 1.0000e-05 eta 2 days, 8:18:34
epoch [1/50] batch [440/2000] time 2.005 (2.036) data 0.000 (0.002) loss 1.5470 (1.9763) lr 1.0000e-05 eta 2 days, 8:17:42
epoch [1/50] batch [460/2000] time 2.034 (2.036) data 0.000 (0.002) loss 4.1796 (1.9825) lr 1.0000e-05 eta 2 days, 8:17:01
epoch [1/50] batch [480/2000] time 2.028 (2.035) data 0.000 (0.001) loss 1.5782 (1.9869) lr 1.0000e-05 eta 2 days, 8:16:10
epoch [1/50] batch [500/2000] time 2.000 (2.035) data 0.000 (0.001) loss 1.2801 (2.0043) lr 1.0000e-05 eta 2 days, 8:14:54
epoch [1/50] batch [520/2000] time 2.050 (2.035) data 0.000 (0.001) loss 0.8744 (1.9897) lr 1.0000e-05 eta 2 days, 8:13:48
epoch [1/50] batch [540/2000] time 2.002 (2.035) data 0.000 (0.001) loss 3.2413 (1.9680) lr 1.0000e-05 eta 2 days, 8:12:49
epoch [1/50] batch [560/2000] time 2.029 (2.035) data 0.000 (0.001) loss 1.9909 (1.9596) lr 1.0000e-05 eta 2 days, 8:11:53
epoch [1/50] batch [580/2000] time 2.055 (2.035) data 0.000 (0.001) loss 1.8531 (1.9475) lr 1.0000e-05 eta 2 days, 8:11:10
epoch [1/50] batch [600/2000] time 2.030 (2.034) data 0.001 (0.001) loss 1.8008 (1.9414) lr 1.0000e-05 eta 2 days, 8:10:11
epoch [1/50] batch [620/2000] time 2.056 (2.034) data 0.000 (0.001) loss 3.3829 (1.9400) lr 1.0000e-05 eta 2 days, 8:09:18
epoch [1/50] batch [640/2000] time 2.055 (2.034) data 0.000 (0.001) loss 3.7221 (1.9450) lr 1.0000e-05 eta 2 days, 8:08:33
epoch [1/50] batch [660/2000] time 2.052 (2.034) data 0.000 (0.001) loss 2.2737 (1.9317) lr 1.0000e-05 eta 2 days, 8:07:41
epoch [1/50] batch [680/2000] time 2.056 (2.034) data 0.000 (0.001) loss 1.7890 (1.9432) lr 1.0000e-05 eta 2 days, 8:07:20
epoch [1/50] batch [700/2000] time 2.050 (2.034) data 0.000 (0.001) loss 2.2331 (1.9354) lr 1.0000e-05 eta 2 days, 8:06:39
epoch [1/50] batch [720/2000] time 1.998 (2.034) data 0.000 (0.001) loss 2.5373 (1.9252) lr 1.0000e-05 eta 2 days, 8:05:48
epoch [1/50] batch [740/2000] time 1.999 (2.034) data 0.000 (0.001) loss 2.1669 (1.9306) lr 1.0000e-05 eta 2 days, 8:05:00
epoch [1/50] batch [760/2000] time 1.998 (2.034) data 0.000 (0.001) loss 1.3422 (1.9331) lr 1.0000e-05 eta 2 days, 8:04:13
epoch [1/50] batch [780/2000] time 2.004 (2.034) data 0.000 (0.001) loss 0.4962 (1.9380) lr 1.0000e-05 eta 2 days, 8:03:16
epoch [1/50] batch [800/2000] time 2.030 (2.034) data 0.000 (0.001) loss 3.5490 (1.9283) lr 1.0000e-05 eta 2 days, 8:02:30
epoch [1/50] batch [820/2000] time 2.000 (2.034) data 0.000 (0.001) loss 2.9605 (1.9308) lr 1.0000e-05 eta 2 days, 8:01:32
epoch [1/50] batch [840/2000] time 2.054 (2.034) data 0.000 (0.001) loss 1.5613 (1.9198) lr 1.0000e-05 eta 2 days, 8:00:54
epoch [1/50] batch [860/2000] time 2.057 (2.033) data 0.000 (0.001) loss 2.4617 (1.9115) lr 1.0000e-05 eta 2 days, 7:59:58
epoch [1/50] batch [880/2000] time 2.037 (2.033) data 0.000 (0.001) loss 1.3864 (1.9103) lr 1.0000e-05 eta 2 days, 7:59:19
epoch [1/50] batch [900/2000] time 2.003 (2.034) data 0.000 (0.001) loss 1.1238 (1.9061) lr 1.0000e-05 eta 2 days, 7:58:45
epoch [1/50] batch [920/2000] time 2.036 (2.034) data 0.000 (0.001) loss 2.1634 (1.9032) lr 1.0000e-05 eta 2 days, 7:58:04
epoch [1/50] batch [940/2000] time 2.004 (2.033) data 0.000 (0.001) loss 2.0791 (1.9073) lr 1.0000e-05 eta 2 days, 7:57:14
epoch [1/50] batch [960/2000] time 2.055 (2.034) data 0.000 (0.001) loss 1.4064 (1.9028) lr 1.0000e-05 eta 2 days, 7:56:46
epoch [1/50] batch [980/2000] time 2.052 (2.033) data 0.000 (0.001) loss 2.1337 (1.9025) lr 1.0000e-05 eta 2 days, 7:55:53
epoch [1/50] batch [1000/2000] time 2.000 (2.033) data 0.000 (0.001) loss 0.5697 (1.8905) lr 1.0000e-05 eta 2 days, 7:55:05
epoch [1/50] batch [1020/2000] time 1.995 (2.033) data 0.000 (0.001) loss 1.5448 (1.8884) lr 1.0000e-05 eta 2 days, 7:54:17
epoch [1/50] batch [1040/2000] time 2.051 (2.033) data 0.000 (0.001) loss 0.5881 (1.8844) lr 1.0000e-05 eta 2 days, 7:53:28
epoch [1/50] batch [1060/2000] time 1.978 (2.033) data 0.000 (0.001) loss 1.2059 (1.8818) lr 1.0000e-05 eta 2 days, 7:52:33
epoch [1/50] batch [1080/2000] time 2.030 (2.033) data 0.000 (0.001) loss 1.3316 (1.8782) lr 1.0000e-05 eta 2 days, 7:51:49
epoch [1/50] batch [1100/2000] time 2.050 (2.033) data 0.000 (0.001) loss 1.7048 (1.8710) lr 1.0000e-05 eta 2 days, 7:50:53
epoch [1/50] batch [1120/2000] time 2.052 (2.033) data 0.000 (0.001) loss 2.2397 (1.8648) lr 1.0000e-05 eta 2 days, 7:50:13
epoch [1/50] batch [1140/2000] time 2.050 (2.033) data 0.001 (0.001) loss 0.3275 (1.8653) lr 1.0000e-05 eta 2 days, 7:49:23
epoch [1/50] batch [1160/2000] time 2.031 (2.033) data 0.000 (0.001) loss 1.9123 (1.8639) lr 1.0000e-05 eta 2 days, 7:48:38
epoch [1/50] batch [1180/2000] time 2.053 (2.033) data 0.000 (0.001) loss 2.4229 (1.8647) lr 1.0000e-05 eta 2 days, 7:47:50
epoch [1/50] batch [1200/2000] time 2.053 (2.033) data 0.000 (0.001) loss 3.4163 (1.8632) lr 1.0000e-05 eta 2 days, 7:47:08
epoch [1/50] batch [1220/2000] time 2.054 (2.033) data 0.000 (0.001) loss 2.9483 (1.8584) lr 1.0000e-05 eta 2 days, 7:46:19
epoch [1/50] batch [1240/2000] time 2.030 (2.033) data 0.000 (0.001) loss 0.6475 (1.8532) lr 1.0000e-05 eta 2 days, 7:45:31
epoch [1/50] batch [1260/2000] time 2.052 (2.033) data 0.000 (0.001) loss 2.8218 (1.8486) lr 1.0000e-05 eta 2 days, 7:44:55
epoch [1/50] batch [1280/2000] time 2.030 (2.033) data 0.000 (0.001) loss 2.6710 (1.8451) lr 1.0000e-05 eta 2 days, 7:44:23
epoch [1/50] batch [1300/2000] time 2.053 (2.033) data 0.000 (0.001) loss 1.2674 (1.8394) lr 1.0000e-05 eta 2 days, 7:43:30
epoch [1/50] batch [1320/2000] time 1.998 (2.032) data 0.000 (0.001) loss 0.2122 (1.8378) lr 1.0000e-05 eta 2 days, 7:42:40
epoch [1/50] batch [1340/2000] time 2.054 (2.033) data 0.000 (0.001) loss 3.5133 (1.8387) lr 1.0000e-05 eta 2 days, 7:42:07
epoch [1/50] batch [1360/2000] time 2.060 (2.032) data 0.000 (0.001) loss 2.5643 (1.8375) lr 1.0000e-05 eta 2 days, 7:41:23
epoch [1/50] batch [1380/2000] time 2.051 (2.033) data 0.000 (0.001) loss 0.4514 (1.8308) lr 1.0000e-05 eta 2 days, 7:40:48
epoch [1/50] batch [1400/2000] time 2.054 (2.032) data 0.000 (0.001) loss 0.4521 (1.8231) lr 1.0000e-05 eta 2 days, 7:39:59
epoch [1/50] batch [1420/2000] time 2.052 (2.032) data 0.000 (0.001) loss 3.5148 (1.8262) lr 1.0000e-05 eta 2 days, 7:39:17
epoch [1/50] batch [1440/2000] time 2.031 (2.032) data 0.000 (0.001) loss 1.1714 (1.8240) lr 1.0000e-05 eta 2 days, 7:38:36
epoch [1/50] batch [1460/2000] time 1.998 (2.032) data 0.000 (0.001) loss 1.5675 (1.8215) lr 1.0000e-05 eta 2 days, 7:37:56
epoch [1/50] batch [1480/2000] time 2.001 (2.032) data 0.000 (0.001) loss 0.9456 (1.8188) lr 1.0000e-05 eta 2 days, 7:37:17
epoch [1/50] batch [1500/2000] time 2.053 (2.032) data 0.000 (0.001) loss 3.4914 (1.8195) lr 1.0000e-05 eta 2 days, 7:36:34
epoch [1/50] batch [1520/2000] time 1.999 (2.032) data 0.000 (0.001) loss 1.1043 (1.8174) lr 1.0000e-05 eta 2 days, 7:35:44
epoch [1/50] batch [1540/2000] time 2.054 (2.032) data 0.000 (0.001) loss 1.9451 (1.8168) lr 1.0000e-05 eta 2 days, 7:35:07
epoch [1/50] batch [1560/2000] time 2.027 (2.032) data 0.000 (0.001) loss 1.7275 (1.8186) lr 1.0000e-05 eta 2 days, 7:34:21
epoch [1/50] batch [1580/2000] time 2.031 (2.032) data 0.000 (0.001) loss 2.3324 (1.8217) lr 1.0000e-05 eta 2 days, 7:33:35
epoch [1/50] batch [1600/2000] time 2.053 (2.032) data 0.000 (0.001) loss 1.9045 (1.8252) lr 1.0000e-05 eta 2 days, 7:32:52
epoch [1/50] batch [1620/2000] time 2.028 (2.032) data 0.000 (0.001) loss 0.0124 (1.8247) lr 1.0000e-05 eta 2 days, 7:32:07
epoch [1/50] batch [1640/2000] time 2.052 (2.032) data 0.000 (0.001) loss 2.0592 (1.8242) lr 1.0000e-05 eta 2 days, 7:31:30
epoch [1/50] batch [1660/2000] time 2.055 (2.032) data 0.000 (0.001) loss 2.0161 (1.8211) lr 1.0000e-05 eta 2 days, 7:30:56
epoch [1/50] batch [1680/2000] time 2.051 (2.032) data 0.001 (0.001) loss 1.5156 (1.8174) lr 1.0000e-05 eta 2 days, 7:30:10
epoch [1/50] batch [1700/2000] time 2.029 (2.032) data 0.000 (0.001) loss 1.8218 (1.8131) lr 1.0000e-05 eta 2 days, 7:29:21
epoch [1/50] batch [1720/2000] time 2.049 (2.032) data 0.000 (0.001) loss 1.7053 (1.8167) lr 1.0000e-05 eta 2 days, 7:28:27
epoch [1/50] batch [1740/2000] time 2.048 (2.032) data 0.000 (0.001) loss 1.4516 (1.8123) lr 1.0000e-05 eta 2 days, 7:27:36
epoch [1/50] batch [1760/2000] time 2.001 (2.032) data 0.000 (0.001) loss 1.0309 (1.8108) lr 1.0000e-05 eta 2 days, 7:26:57
epoch [1/50] batch [1780/2000] time 2.051 (2.032) data 0.000 (0.001) loss 2.4185 (1.8109) lr 1.0000e-05 eta 2 days, 7:26:17
epoch [1/50] batch [1800/2000] time 2.053 (2.032) data 0.000 (0.001) loss 1.6617 (1.8085) lr 1.0000e-05 eta 2 days, 7:25:36
epoch [1/50] batch [1820/2000] time 2.056 (2.032) data 0.000 (0.001) loss 1.5420 (1.8076) lr 1.0000e-05 eta 2 days, 7:24:54
epoch [1/50] batch [1840/2000] time 2.058 (2.032) data 0.000 (0.001) loss 3.4306 (1.8064) lr 1.0000e-05 eta 2 days, 7:24:21
epoch [1/50] batch [1860/2000] time 2.033 (2.032) data 0.000 (0.001) loss 1.8165 (1.8016) lr 1.0000e-05 eta 2 days, 7:23:38
epoch [1/50] batch [1880/2000] time 2.056 (2.032) data 0.000 (0.001) loss 2.8989 (1.8000) lr 1.0000e-05 eta 2 days, 7:22:58
epoch [1/50] batch [1900/2000] time 2.032 (2.032) data 0.000 (0.001) loss 4.5996 (1.7975) lr 1.0000e-05 eta 2 days, 7:22:16
epoch [1/50] batch [1920/2000] time 2.031 (2.032) data 0.000 (0.001) loss 1.9176 (1.7957) lr 1.0000e-05 eta 2 days, 7:21:29
epoch [1/50] batch [1940/2000] time 2.030 (2.032) data 0.000 (0.001) loss 1.7035 (1.7952) lr 1.0000e-05 eta 2 days, 7:20:44
epoch [1/50] batch [1960/2000] time 1.999 (2.032) data 0.000 (0.001) loss 0.2131 (1.7894) lr 1.0000e-05 eta 2 days, 7:20:05
epoch [1/50] batch [1980/2000] time 2.000 (2.032) data 0.000 (0.001) loss 3.4629 (1.7887) lr 1.0000e-05 eta 2 days, 7:19:22
epoch [1/50] batch [2000/2000] time 2.027 (2.032) data 0.000 (0.001) loss 1.5910 (1.7874) lr 1.0000e-05 eta 2 days, 7:18:43
epoch [2/50] batch [20/2000] time 2.000 (2.057) data 0.000 (0.027) loss 3.0014 (1.4786) lr 1.0000e-05 eta 2 days, 7:58:48
epoch [2/50] batch [40/2000] time 2.054 (2.044) data 0.000 (0.014) loss 1.5961 (1.5698) lr 1.0000e-05 eta 2 days, 7:37:10
epoch [2/50] batch [60/2000] time 2.052 (2.039) data 0.001 (0.009) loss 1.7489 (1.4472) lr 1.0000e-05 eta 2 days, 7:27:49
epoch [2/50] batch [80/2000] time 2.053 (2.036) data 0.000 (0.007) loss 3.8009 (1.4501) lr 1.0000e-05 eta 2 days, 7:23:03
epoch [2/50] batch [100/2000] time 2.029 (2.035) data 0.000 (0.006) loss 1.9543 (1.5561) lr 1.0000e-05 eta 2 days, 7:20:31
epoch [2/50] batch [120/2000] time 1.996 (2.035) data 0.000 (0.005) loss 0.9298 (1.5187) lr 1.0000e-05 eta 2 days, 7:19:21
epoch [2/50] batch [140/2000] time 2.030 (2.034) data 0.000 (0.004) loss 1.9730 (1.4457) lr 1.0000e-05 eta 2 days, 7:17:38
epoch [2/50] batch [160/2000] time 2.053 (2.033) data 0.000 (0.004) loss 1.9274 (1.4614) lr 1.0000e-05 eta 2 days, 7:15:55
epoch [2/50] batch [180/2000] time 2.050 (2.034) data 0.000 (0.003) loss 1.7466 (1.4745) lr 1.0000e-05 eta 2 days, 7:15:23
epoch [2/50] batch [200/2000] time 2.051 (2.033) data 0.000 (0.003) loss 1.3169 (1.4904) lr 1.0000e-05 eta 2 days, 7:13:54
epoch [2/50] batch [220/2000] time 1.995 (2.032) data 0.000 (0.003) loss 1.9668 (1.5075) lr 1.0000e-05 eta 2 days, 7:12:08
epoch [2/50] batch [240/2000] time 2.053 (2.032) data 0.000 (0.003) loss 0.6119 (1.5230) lr 1.0000e-05 eta 2 days, 7:11:16
epoch [2/50] batch [260/2000] time 2.004 (2.032) data 0.000 (0.002) loss 2.0980 (1.5525) lr 1.0000e-05 eta 2 days, 7:10:42
epoch [2/50] batch [280/2000] time 2.027 (2.032) data 0.000 (0.002) loss 0.7579 (1.5502) lr 1.0000e-05 eta 2 days, 7:10:02
epoch [2/50] batch [300/2000] time 1.998 (2.032) data 0.000 (0.002) loss 0.3950 (1.5426) lr 1.0000e-05 eta 2 days, 7:08:58
epoch [2/50] batch [320/2000] time 2.051 (2.032) data 0.000 (0.002) loss 1.8423 (1.5384) lr 1.0000e-05 eta 2 days, 7:07:43
epoch [2/50] batch [340/2000] time 1.998 (2.031) data 0.000 (0.002) loss 0.5549 (1.5418) lr 1.0000e-05 eta 2 days, 7:06:31
epoch [2/50] batch [360/2000] time 2.002 (2.032) data 0.000 (0.002) loss 0.6654 (1.5415) lr 1.0000e-05 eta 2 days, 7:06:12
epoch [2/50] batch [380/2000] time 2.001 (2.032) data 0.000 (0.002) loss 0.6722 (1.5300) lr 1.0000e-05 eta 2 days, 7:05:47
epoch [2/50] batch [400/2000] time 2.052 (2.032) data 0.000 (0.002) loss 2.9681 (1.5412) lr 1.0000e-05 eta 2 days, 7:04:39
epoch [2/50] batch [420/2000] time 2.053 (2.032) data 0.000 (0.002) loss 2.4955 (1.5583) lr 1.0000e-05 eta 2 days, 7:03:57
epoch [2/50] batch [440/2000] time 2.040 (2.031) data 0.000 (0.001) loss 0.5694 (1.5398) lr 1.0000e-05 eta 2 days, 7:03:09
epoch [2/50] batch [460/2000] time 2.053 (2.032) data 0.000 (0.001) loss 2.6942 (1.5313) lr 1.0000e-05 eta 2 days, 7:02:38
epoch [2/50] batch [480/2000] time 2.005 (2.032) data 0.000 (0.001) loss 1.2271 (1.5496) lr 1.0000e-05 eta 2 days, 7:01:53
epoch [2/50] batch [500/2000] time 2.004 (2.031) data 0.000 (0.001) loss 3.0393 (1.5566) lr 1.0000e-05 eta 2 days, 7:00:57
epoch [2/50] batch [520/2000] time 2.057 (2.032) data 0.000 (0.001) loss 0.4734 (1.5520) lr 1.0000e-05 eta 2 days, 7:00:36
epoch [2/50] batch [540/2000] time 2.053 (2.032) data 0.000 (0.001) loss 1.2884 (1.5543) lr 1.0000e-05 eta 2 days, 6:59:55
epoch [2/50] batch [560/2000] time 2.054 (2.032) data 0.000 (0.001) loss 4.0588 (1.5518) lr 1.0000e-05 eta 2 days, 6:59:21
epoch [2/50] batch [580/2000] time 2.054 (2.032) data 0.000 (0.001) loss 2.3430 (1.5472) lr 1.0000e-05 eta 2 days, 6:58:58
epoch [2/50] batch [600/2000] time 2.055 (2.032) data 0.001 (0.001) loss 2.9526 (1.5512) lr 1.0000e-05 eta 2 days, 6:58:00
epoch [2/50] batch [620/2000] time 2.056 (2.032) data 0.000 (0.001) loss 1.7905 (1.5456) lr 1.0000e-05 eta 2 days, 6:57:23
epoch [2/50] batch [640/2000] time 1.976 (2.032) data 0.000 (0.001) loss 2.2178 (1.5489) lr 1.0000e-05 eta 2 days, 6:56:33
epoch [2/50] batch [660/2000] time 2.004 (2.031) data 0.000 (0.001) loss 3.9621 (1.5467) lr 1.0000e-05 eta 2 days, 6:55:44
epoch [2/50] batch [680/2000] time 2.030 (2.032) data 0.000 (0.001) loss 1.0124 (1.5433) lr 1.0000e-05 eta 2 days, 6:55:10
epoch [2/50] batch [700/2000] time 2.001 (2.031) data 0.000 (0.001) loss 3.7818 (1.5394) lr 1.0000e-05 eta 2 days, 6:54:17
epoch [2/50] batch [720/2000] time 2.030 (2.031) data 0.000 (0.001) loss 1.4108 (1.5476) lr 1.0000e-05 eta 2 days, 6:53:26
epoch [2/50] batch [740/2000] time 2.057 (2.031) data 0.000 (0.001) loss 0.0963 (1.5425) lr 1.0000e-05 eta 2 days, 6:52:46
epoch [2/50] batch [760/2000] time 2.056 (2.031) data 0.000 (0.001) loss 1.9724 (1.5487) lr 1.0000e-05 eta 2 days, 6:51:45
epoch [2/50] batch [780/2000] time 2.032 (2.031) data 0.000 (0.001) loss 0.8241 (1.5478) lr 1.0000e-05 eta 2 days, 6:51:02
epoch [2/50] batch [800/2000] time 2.034 (2.031) data 0.000 (0.001) loss 0.1977 (1.5446) lr 1.0000e-05 eta 2 days, 6:50:21
epoch [2/50] batch [820/2000] time 2.038 (2.031) data 0.000 (0.001) loss 0.1197 (1.5370) lr 1.0000e-05 eta 2 days, 6:50:07
epoch [2/50] batch [840/2000] time 2.003 (2.031) data 0.000 (0.001) loss 1.1529 (1.5348) lr 1.0000e-05 eta 2 days, 6:49:23
epoch [2/50] batch [860/2000] time 2.033 (2.031) data 0.000 (0.001) loss 1.6788 (1.5290) lr 1.0000e-05 eta 2 days, 6:48:58
epoch [2/50] batch [880/2000] time 2.056 (2.031) data 0.000 (0.001) loss 1.7439 (1.5368) lr 1.0000e-05 eta 2 days, 6:48:16
epoch [2/50] batch [900/2000] time 2.001 (2.031) data 0.000 (0.001) loss 0.9874 (1.5365) lr 1.0000e-05 eta 2 days, 6:47:33
epoch [2/50] batch [920/2000] time 2.034 (2.032) data 0.000 (0.001) loss 1.2583 (1.5341) lr 1.0000e-05 eta 2 days, 6:47:08
epoch [2/50] batch [940/2000] time 1.977 (2.032) data 0.000 (0.001) loss 3.5955 (1.5356) lr 1.0000e-05 eta 2 days, 6:46:35
epoch [2/50] batch [960/2000] time 2.002 (2.032) data 0.000 (0.001) loss 0.2604 (1.5366) lr 1.0000e-05 eta 2 days, 6:46:01
epoch [2/50] batch [980/2000] time 1.979 (2.032) data 0.000 (0.001) loss 1.8701 (1.5358) lr 1.0000e-05 eta 2 days, 6:45:15
epoch [2/50] batch [1000/2000] time 2.055 (2.032) data 0.000 (0.001) loss 1.6729 (1.5356) lr 1.0000e-05 eta 2 days, 6:44:46
epoch [2/50] batch [1020/2000] time 2.056 (2.032) data 0.000 (0.001) loss 1.2353 (1.5320) lr 1.0000e-05 eta 2 days, 6:44:06
epoch [2/50] batch [1040/2000] time 2.056 (2.032) data 0.000 (0.001) loss 1.2649 (1.5302) lr 1.0000e-05 eta 2 days, 6:43:27
epoch [2/50] batch [1060/2000] time 2.034 (2.032) data 0.000 (0.001) loss 0.1349 (1.5332) lr 1.0000e-05 eta 2 days, 6:42:52
epoch [2/50] batch [1080/2000] time 2.004 (2.032) data 0.000 (0.001) loss 0.1126 (1.5326) lr 1.0000e-05 eta 2 days, 6:42:08
epoch [2/50] batch [1100/2000] time 2.006 (2.032) data 0.000 (0.001) loss 2.2943 (1.5334) lr 1.0000e-05 eta 2 days, 6:41:30
epoch [2/50] batch [1120/2000] time 2.033 (2.032) data 0.000 (0.001) loss 0.6980 (1.5335) lr 1.0000e-05 eta 2 days, 6:40:42
epoch [2/50] batch [1140/2000] time 2.005 (2.032) data 0.001 (0.001) loss 0.5580 (1.5369) lr 1.0000e-05 eta 2 days, 6:40:03
epoch [2/50] batch [1160/2000] time 2.002 (2.032) data 0.000 (0.001) loss 1.3276 (1.5426) lr 1.0000e-05 eta 2 days, 6:39:30
epoch [2/50] batch [1180/2000] time 2.003 (2.032) data 0.000 (0.001) loss 1.7446 (1.5403) lr 1.0000e-05 eta 2 days, 6:38:48
epoch [2/50] batch [1200/2000] time 2.056 (2.032) data 0.000 (0.001) loss 1.6638 (1.5389) lr 1.0000e-05 eta 2 days, 6:38:10
epoch [2/50] batch [1220/2000] time 2.059 (2.032) data 0.000 (0.001) loss 1.2600 (1.5433) lr 1.0000e-05 eta 2 days, 6:37:29
epoch [2/50] batch [1240/2000] time 1.981 (2.032) data 0.000 (0.001) loss 2.6978 (1.5373) lr 1.0000e-05 eta 2 days, 6:36:53
epoch [2/50] batch [1260/2000] time 2.055 (2.032) data 0.000 (0.001) loss 1.9788 (1.5448) lr 1.0000e-05 eta 2 days, 6:36:25
epoch [2/50] batch [1280/2000] time 2.035 (2.032) data 0.000 (0.001) loss 3.8123 (1.5476) lr 1.0000e-05 eta 2 days, 6:35:51
epoch [2/50] batch [1300/2000] time 2.055 (2.032) data 0.000 (0.001) loss 0.2719 (1.5486) lr 1.0000e-05 eta 2 days, 6:35:16
epoch [2/50] batch [1320/2000] time 1.975 (2.032) data 0.000 (0.001) loss 0.1017 (1.5526) lr 1.0000e-05 eta 2 days, 6:34:38
epoch [2/50] batch [1340/2000] time 2.058 (2.032) data 0.000 (0.001) loss 1.2666 (1.5530) lr 1.0000e-05 eta 2 days, 6:34:07
epoch [2/50] batch [1360/2000] time 2.037 (2.032) data 0.000 (0.001) loss 1.1487 (1.5524) lr 1.0000e-05 eta 2 days, 6:33:20
epoch [2/50] batch [1380/2000] time 2.061 (2.032) data 0.000 (0.001) loss 1.1106 (1.5565) lr 1.0000e-05 eta 2 days, 6:32:42
epoch [2/50] batch [1400/2000] time 2.036 (2.032) data 0.000 (0.001) loss 0.6214 (1.5532) lr 1.0000e-05 eta 2 days, 6:31:58
epoch [2/50] batch [1420/2000] time 2.035 (2.032) data 0.000 (0.001) loss 2.2792 (1.5538) lr 1.0000e-05 eta 2 days, 6:31:29
epoch [2/50] batch [1440/2000] time 2.064 (2.032) data 0.000 (0.001) loss 0.5253 (1.5535) lr 1.0000e-05 eta 2 days, 6:30:47
epoch [2/50] batch [1460/2000] time 2.034 (2.032) data 0.000 (0.001) loss 1.2039 (1.5542) lr 1.0000e-05 eta 2 days, 6:30:12
epoch [2/50] batch [1480/2000] time 1.975 (2.032) data 0.000 (0.001) loss 0.1920 (1.5552) lr 1.0000e-05 eta 2 days, 6:29:36
epoch [2/50] batch [1500/2000] time 1.972 (2.032) data 0.000 (0.001) loss 0.5247 (1.5568) lr 1.0000e-05 eta 2 days, 6:28:43
epoch [2/50] batch [1520/2000] time 1.999 (2.032) data 0.000 (0.001) loss 1.2215 (1.5581) lr 1.0000e-05 eta 2 days, 6:27:54
epoch [2/50] batch [1540/2000] time 2.008 (2.032) data 0.000 (0.001) loss 1.4958 (1.5604) lr 1.0000e-05 eta 2 days, 6:27:18
epoch [2/50] batch [1560/2000] time 2.004 (2.032) data 0.000 (0.001) loss 1.6124 (1.5655) lr 1.0000e-05 eta 2 days, 6:26:44
epoch [2/50] batch [1580/2000] time 2.058 (2.032) data 0.000 (0.001) loss 1.6325 (1.5608) lr 1.0000e-05 eta 2 days, 6:26:02
epoch [2/50] batch [1600/2000] time 2.032 (2.032) data 0.000 (0.001) loss 2.1449 (1.5594) lr 1.0000e-05 eta 2 days, 6:25:18
epoch [2/50] batch [1620/2000] time 2.060 (2.032) data 0.000 (0.001) loss 2.0856 (1.5612) lr 1.0000e-05 eta 2 days, 6:24:33
epoch [2/50] batch [1640/2000] time 2.032 (2.032) data 0.000 (0.001) loss 0.3182 (1.5642) lr 1.0000e-05 eta 2 days, 6:23:49
epoch [2/50] batch [1660/2000] time 2.034 (2.032) data 0.000 (0.001) loss 0.6992 (1.5656) lr 1.0000e-05 eta 2 days, 6:23:16
epoch [2/50] batch [1680/2000] time 2.059 (2.032) data 0.001 (0.001) loss 3.0477 (1.5640) lr 1.0000e-05 eta 2 days, 6:22:45
epoch [2/50] batch [1700/2000] time 2.003 (2.032) data 0.000 (0.001) loss 0.7637 (1.5607) lr 1.0000e-05 eta 2 days, 6:22:09
epoch [2/50] batch [1720/2000] time 2.033 (2.033) data 0.000 (0.001) loss 1.6423 (1.5630) lr 1.0000e-05 eta 2 days, 6:21:34
epoch [2/50] batch [1740/2000] time 2.032 (2.033) data 0.000 (0.001) loss 0.9565 (1.5629) lr 1.0000e-05 eta 2 days, 6:20:56
epoch [2/50] batch [1760/2000] time 2.030 (2.033) data 0.000 (0.001) loss 3.4996 (1.5616) lr 1.0000e-05 eta 2 days, 6:20:11
epoch [2/50] batch [1780/2000] time 2.060 (2.033) data 0.000 (0.001) loss 2.5318 (1.5629) lr 1.0000e-05 eta 2 days, 6:19:37
epoch [2/50] batch [1800/2000] time 2.060 (2.033) data 0.000 (0.001) loss 4.2086 (1.5672) lr 1.0000e-05 eta 2 days, 6:18:56
epoch [2/50] batch [1820/2000] time 2.056 (2.033) data 0.000 (0.001) loss 2.5373 (1.5668) lr 1.0000e-05 eta 2 days, 6:18:17
epoch [2/50] batch [1840/2000] time 2.056 (2.033) data 0.000 (0.001) loss 1.9361 (1.5666) lr 1.0000e-05 eta 2 days, 6:17:39
epoch [2/50] batch [1860/2000] time 2.033 (2.033) data 0.000 (0.001) loss 1.7008 (1.5676) lr 1.0000e-05 eta 2 days, 6:17:01
epoch [2/50] batch [1880/2000] time 2.051 (2.033) data 0.000 (0.001) loss 0.9478 (1.5692) lr 1.0000e-05 eta 2 days, 6:16:20
epoch [2/50] batch [1900/2000] time 2.035 (2.033) data 0.000 (0.001) loss 1.6024 (1.5670) lr 1.0000e-05 eta 2 days, 6:15:54
epoch [2/50] batch [1920/2000] time 2.097 (2.033) data 0.000 (0.001) loss 0.5889 (1.5708) lr 1.0000e-05 eta 2 days, 6:15:19
epoch [2/50] batch [1940/2000] time 2.004 (2.033) data 0.000 (0.001) loss 0.5011 (1.5712) lr 1.0000e-05 eta 2 days, 6:14:50
epoch [2/50] batch [1960/2000] time 2.041 (2.033) data 0.000 (0.001) loss 0.4403 (1.5708) lr 1.0000e-05 eta 2 days, 6:14:11
epoch [2/50] batch [1980/2000] time 2.013 (2.033) data 0.000 (0.001) loss 0.5908 (1.5661) lr 1.0000e-05 eta 2 days, 6:13:40
epoch [2/50] batch [2000/2000] time 1.983 (2.033) data 0.000 (0.001) loss 1.8700 (1.5599) lr 1.0000e-05 eta 2 days, 6:13:06
epoch [3/50] batch [20/2000] time 2.019 (2.081) data 0.000 (0.036) loss 1.8518 (1.5997) lr 1.0000e-05 eta 2 days, 7:29:10
epoch [3/50] batch [40/2000] time 2.051 (2.071) data 0.000 (0.018) loss 0.7719 (1.5456) lr 1.0000e-05 eta 2 days, 7:11:36
epoch [3/50] batch [60/2000] time 2.040 (2.062) data 0.001 (0.012) loss 1.9982 (1.6115) lr 1.0000e-05 eta 2 days, 6:57:11
epoch [3/50] batch [80/2000] time 2.013 (2.058) data 0.000 (0.009) loss 1.7787 (1.6019) lr 1.0000e-05 eta 2 days, 6:50:10
epoch [3/50] batch [100/2000] time 2.010 (2.058) data 0.000 (0.008) loss 1.9572 (1.5541) lr 1.0000e-05 eta 2 days, 6:49:56
epoch [3/50] batch [120/2000] time 2.058 (2.059) data 0.000 (0.007) loss 1.5715 (1.5826) lr 1.0000e-05 eta 2 days, 6:49:29
epoch [3/50] batch [140/2000] time 2.065 (2.057) data 0.000 (0.006) loss 2.1759 (1.5398) lr 1.0000e-05 eta 2 days, 6:46:26
epoch [3/50] batch [160/2000] time 2.056 (2.055) data 0.000 (0.005) loss 0.8239 (1.4951) lr 1.0000e-05 eta 2 days, 6:42:49
epoch [3/50] batch [180/2000] time 2.042 (2.053) data 0.000 (0.005) loss 0.8387 (1.4839) lr 1.0000e-05 eta 2 days, 6:38:59
epoch [3/50] batch [200/2000] time 1.999 (2.052) data 0.000 (0.004) loss 2.2066 (1.5010) lr 1.0000e-05 eta 2 days, 6:36:40
epoch [3/50] batch [220/2000] time 2.006 (2.051) data 0.000 (0.004) loss 1.9051 (1.4979) lr 1.0000e-05 eta 2 days, 6:33:54
epoch [3/50] batch [240/2000] time 2.018 (2.050) data 0.001 (0.004) loss 3.3970 (1.4941) lr 1.0000e-05 eta 2 days, 6:32:27
epoch [3/50] batch [260/2000] time 2.071 (2.050) data 0.000 (0.004) loss 0.2187 (1.5045) lr 1.0000e-05 eta 2 days, 6:31:09
epoch [3/50] batch [280/2000] time 2.052 (2.050) data 0.000 (0.003) loss 0.7349 (1.4788) lr 1.0000e-05 eta 2 days, 6:30:26
epoch [3/50] batch [300/2000] time 1.993 (2.050) data 0.015 (0.003) loss 0.5591 (1.4568) lr 1.0000e-05 eta 2 days, 6:29:07
epoch [3/50] batch [320/2000] time 2.059 (2.049) data 0.000 (0.003) loss 1.0011 (1.4666) lr 1.0000e-05 eta 2 days, 6:27:31
epoch [3/50] batch [340/2000] time 2.040 (2.049) data 0.001 (0.003) loss 3.2323 (1.4766) lr 1.0000e-05 eta 2 days, 6:26:50
epoch [3/50] batch [360/2000] time 2.105 (2.049) data 0.000 (0.003) loss 2.0114 (1.4796) lr 1.0000e-05 eta 2 days, 6:26:48
epoch [3/50] batch [380/2000] time 2.057 (2.050) data 0.000 (0.003) loss 1.4083 (1.4933) lr 1.0000e-05 eta 2 days, 6:27:06
epoch [3/50] batch [400/2000] time 2.069 (2.049) data 0.000 (0.003) loss 0.6405 (1.4963) lr 1.0000e-05 eta 2 days, 6:25:21
epoch [3/50] batch [420/2000] time 2.005 (2.049) data 0.000 (0.002) loss 4.0367 (1.4756) lr 1.0000e-05 eta 2 days, 6:23:47
epoch [3/50] batch [440/2000] time 2.041 (2.049) data 0.000 (0.002) loss 0.1914 (1.4735) lr 1.0000e-05 eta 2 days, 6:22:57
epoch [3/50] batch [460/2000] time 2.042 (2.049) data 0.000 (0.002) loss 1.2261 (1.4812) lr 1.0000e-05 eta 2 days, 6:22:07
epoch [3/50] batch [480/2000] time 2.066 (2.049) data 0.000 (0.002) loss 1.5131 (1.4889) lr 1.0000e-05 eta 2 days, 6:21:40
epoch [3/50] batch [500/2000] time 2.045 (2.048) data 0.000 (0.002) loss 0.2869 (1.4871) lr 1.0000e-05 eta 2 days, 6:20:22
epoch [3/50] batch [520/2000] time 2.071 (2.049) data 0.000 (0.002) loss 0.4390 (1.4797) lr 1.0000e-05 eta 2 days, 6:20:19
epoch [3/50] batch [540/2000] time 2.013 (2.049) data 0.001 (0.002) loss 1.2915 (1.4877) lr 1.0000e-05 eta 2 days, 6:19:44
epoch [3/50] batch [560/2000] time 2.021 (2.049) data 0.001 (0.002) loss 1.1215 (1.4774) lr 1.0000e-05 eta 2 days, 6:19:16
epoch [3/50] batch [580/2000] time 2.012 (2.050) data 0.000 (0.002) loss 2.2760 (1.4748) lr 1.0000e-05 eta 2 days, 6:19:31
epoch [3/50] batch [600/2000] time 2.063 (2.050) data 0.001 (0.002) loss 3.0198 (1.4865) lr 1.0000e-05 eta 2 days, 6:19:03
epoch [3/50] batch [620/2000] time 2.033 (2.050) data 0.000 (0.002) loss 0.8818 (1.4838) lr 1.0000e-05 eta 2 days, 6:18:08
epoch [3/50] batch [640/2000] time 2.034 (2.049) data 0.000 (0.002) loss 1.1025 (1.4763) lr 1.0000e-05 eta 2 days, 6:17:00
epoch [3/50] batch [660/2000] time 2.032 (2.049) data 0.000 (0.002) loss 0.3147 (1.4715) lr 1.0000e-05 eta 2 days, 6:15:38
epoch [3/50] batch [680/2000] time 2.053 (2.049) data 0.000 (0.002) loss 2.7043 (1.4714) lr 1.0000e-05 eta 2 days, 6:14:29
epoch [3/50] batch [700/2000] time 2.058 (2.048) data 0.000 (0.002) loss 1.3238 (1.4661) lr 1.0000e-05 eta 2 days, 6:12:24
epoch [3/50] batch [720/2000] time 2.033 (2.047) data 0.000 (0.002) loss 1.4943 (1.4678) lr 1.0000e-05 eta 2 days, 6:11:12
epoch [3/50] batch [740/2000] time 2.056 (2.047) data 0.000 (0.002) loss 0.5207 (1.4688) lr 1.0000e-05 eta 2 days, 6:09:54
epoch [3/50] batch [760/2000] time 1.973 (2.047) data 0.000 (0.002) loss 1.2053 (1.4689) lr 1.0000e-05 eta 2 days, 6:08:51
epoch [3/50] batch [780/2000] time 2.042 (2.046) data 0.000 (0.002) loss 1.1968 (1.4672) lr 1.0000e-05 eta 2 days, 6:07:34
epoch [3/50] batch [800/2000] time 1.999 (2.046) data 0.000 (0.002) loss 0.4817 (1.4664) lr 1.0000e-05 eta 2 days, 6:06:23
epoch [3/50] batch [820/2000] time 2.032 (2.046) data 0.000 (0.001) loss 2.4466 (1.4681) lr 1.0000e-05 eta 2 days, 6:05:05
epoch [3/50] batch [840/2000] time 2.031 (2.045) data 0.000 (0.001) loss 0.6712 (1.4732) lr 1.0000e-05 eta 2 days, 6:04:03
epoch [3/50] batch [860/2000] time 2.002 (2.045) data 0.000 (0.001) loss 0.8456 (1.4733) lr 1.0000e-05 eta 2 days, 6:03:01
epoch [3/50] batch [880/2000] time 2.000 (2.045) data 0.000 (0.001) loss 1.1981 (1.4665) lr 1.0000e-05 eta 2 days, 6:01:54
epoch [3/50] batch [900/2000] time 2.058 (2.045) data 0.000 (0.001) loss 0.8785 (1.4693) lr 1.0000e-05 eta 2 days, 6:00:54
epoch [3/50] batch [920/2000] time 2.058 (2.045) data 0.000 (0.001) loss 3.0746 (1.4767) lr 1.0000e-05 eta 2 days, 5:59:54
epoch [3/50] batch [940/2000] time 2.055 (2.044) data 0.000 (0.001) loss 2.5170 (1.4798) lr 1.0000e-05 eta 2 days, 5:58:48
epoch [3/50] batch [960/2000] time 1.996 (2.044) data 0.000 (0.001) loss 1.0936 (1.4788) lr 1.0000e-05 eta 2 days, 5:57:47
epoch [3/50] batch [980/2000] time 2.034 (2.044) data 0.000 (0.001) loss 1.8921 (1.4730) lr 1.0000e-05 eta 2 days, 5:56:45
epoch [3/50] batch [1000/2000] time 2.056 (2.044) data 0.000 (0.001) loss 0.9469 (1.4690) lr 1.0000e-05 eta 2 days, 5:55:39
epoch [3/50] batch [1020/2000] time 2.057 (2.043) data 0.000 (0.001) loss 1.8538 (1.4653) lr 1.0000e-05 eta 2 days, 5:54:51
epoch [3/50] batch [1040/2000] time 2.058 (2.043) data 0.000 (0.001) loss 2.6629 (1.4606) lr 1.0000e-05 eta 2 days, 5:53:59
epoch [3/50] batch [1060/2000] time 1.999 (2.043) data 0.000 (0.001) loss 0.6319 (1.4601) lr 1.0000e-05 eta 2 days, 5:52:55
epoch [3/50] batch [1080/2000] time 2.045 (2.043) data 0.000 (0.001) loss 2.9277 (1.4637) lr 1.0000e-05 eta 2 days, 5:51:59
epoch [3/50] batch [1100/2000] time 2.000 (2.043) data 0.000 (0.001) loss 2.3308 (1.4682) lr 1.0000e-05 eta 2 days, 5:50:59
epoch [3/50] batch [1120/2000] time 2.001 (2.043) data 0.000 (0.001) loss 0.2194 (1.4613) lr 1.0000e-05 eta 2 days, 5:50:02
epoch [3/50] batch [1140/2000] time 1.996 (2.042) data 0.001 (0.001) loss 1.2500 (1.4573) lr 1.0000e-05 eta 2 days, 5:49:08
epoch [3/50] batch [1160/2000] time 2.053 (2.042) data 0.000 (0.001) loss 0.2834 (1.4564) lr 1.0000e-05 eta 2 days, 5:48:05
epoch [3/50] batch [1180/2000] time 2.030 (2.042) data 0.000 (0.001) loss 3.1038 (1.4571) lr 1.0000e-05 eta 2 days, 5:47:07
epoch [3/50] batch [1200/2000] time 1.997 (2.042) data 0.000 (0.001) loss 0.9999 (1.4602) lr 1.0000e-05 eta 2 days, 5:46:03
epoch [3/50] batch [1220/2000] time 2.057 (2.042) data 0.000 (0.001) loss 1.6867 (1.4671) lr 1.0000e-05 eta 2 days, 5:45:01
epoch [3/50] batch [1240/2000] time 1.998 (2.041) data 0.000 (0.001) loss 0.8536 (1.4716) lr 1.0000e-05 eta 2 days, 5:44:08
epoch [3/50] batch [1260/2000] time 2.056 (2.041) data 0.000 (0.001) loss 1.8929 (1.4751) lr 1.0000e-05 eta 2 days, 5:43:13
epoch [3/50] batch [1280/2000] time 2.037 (2.041) data 0.000 (0.001) loss 0.9876 (1.4745) lr 1.0000e-05 eta 2 days, 5:42:28
epoch [3/50] batch [1300/2000] time 2.032 (2.041) data 0.000 (0.001) loss 0.7488 (1.4776) lr 1.0000e-05 eta 2 days, 5:41:38
epoch [3/50] batch [1320/2000] time 2.000 (2.041) data 0.000 (0.001) loss 2.0316 (1.4739) lr 1.0000e-05 eta 2 days, 5:40:48
epoch [3/50] batch [1340/2000] time 2.036 (2.041) data 0.000 (0.001) loss 1.8695 (1.4750) lr 1.0000e-05 eta 2 days, 5:39:59
epoch [3/50] batch [1360/2000] time 2.036 (2.041) data 0.000 (0.001) loss 2.7094 (1.4747) lr 1.0000e-05 eta 2 days, 5:39:05
epoch [3/50] batch [1380/2000] time 2.058 (2.041) data 0.000 (0.001) loss 0.5230 (1.4722) lr 1.0000e-05 eta 2 days, 5:38:21
epoch [3/50] batch [1400/2000] time 2.055 (2.041) data 0.000 (0.001) loss 0.6614 (1.4735) lr 1.0000e-05 eta 2 days, 5:37:28
epoch [3/50] batch [1420/2000] time 2.058 (2.041) data 0.000 (0.001) loss 1.0326 (1.4798) lr 1.0000e-05 eta 2 days, 5:36:45
epoch [3/50] batch [1440/2000] time 2.056 (2.041) data 0.000 (0.001) loss 0.7705 (1.4886) lr 1.0000e-05 eta 2 days, 5:36:04
epoch [3/50] batch [1460/2000] time 2.055 (2.041) data 0.000 (0.001) loss 2.5287 (1.4854) lr 1.0000e-05 eta 2 days, 5:35:20
epoch [3/50] batch [1480/2000] time 2.003 (2.040) data 0.000 (0.001) loss 2.6869 (1.4846) lr 1.0000e-05 eta 2 days, 5:34:25
epoch [3/50] batch [1500/2000] time 2.058 (2.041) data 0.000 (0.001) loss 0.3824 (1.4870) lr 1.0000e-05 eta 2 days, 5:33:47
epoch [3/50] batch [1520/2000] time 1.973 (2.040) data 0.000 (0.001) loss 2.2219 (1.4856) lr 1.0000e-05 eta 2 days, 5:32:56
epoch [3/50] batch [1540/2000] time 1.999 (2.040) data 0.000 (0.001) loss 2.1184 (1.4873) lr 1.0000e-05 eta 2 days, 5:32:02
epoch [3/50] batch [1560/2000] time 2.033 (2.040) data 0.000 (0.001) loss 2.9459 (1.4909) lr 1.0000e-05 eta 2 days, 5:31:14
epoch [3/50] batch [1580/2000] time 2.054 (2.040) data 0.000 (0.001) loss 3.1633 (1.4941) lr 1.0000e-05 eta 2 days, 5:30:27
epoch [3/50] batch [1600/2000] time 2.052 (2.040) data 0.000 (0.001) loss 3.1219 (1.4929) lr 1.0000e-05 eta 2 days, 5:29:29
epoch [3/50] batch [1620/2000] time 2.000 (2.040) data 0.000 (0.001) loss 1.3671 (1.4961) lr 1.0000e-05 eta 2 days, 5:28:28
epoch [3/50] batch [1640/2000] time 2.041 (2.040) data 0.000 (0.001) loss 0.7978 (1.4932) lr 1.0000e-05 eta 2 days, 5:27:44
epoch [3/50] batch [1660/2000] time 2.004 (2.040) data 0.000 (0.001) loss 2.4101 (1.4934) lr 1.0000e-05 eta 2 days, 5:27:04
epoch [3/50] batch [1680/2000] time 2.035 (2.040) data 0.000 (0.001) loss 1.6591 (1.4935) lr 1.0000e-05 eta 2 days, 5:26:17
epoch [3/50] batch [1700/2000] time 1.999 (2.040) data 0.000 (0.001) loss 0.1032 (1.4897) lr 1.0000e-05 eta 2 days, 5:25:29
epoch [3/50] batch [1720/2000] time 2.000 (2.039) data 0.000 (0.001) loss 1.1877 (1.4889) lr 1.0000e-05 eta 2 days, 5:24:36
epoch [3/50] batch [1740/2000] time 2.034 (2.039) data 0.000 (0.001) loss 2.1424 (1.4867) lr 1.0000e-05 eta 2 days, 5:24:00
epoch [3/50] batch [1760/2000] time 2.064 (2.039) data 0.000 (0.001) loss 1.6790 (1.4900) lr 1.0000e-05 eta 2 days, 5:23:14
epoch [3/50] batch [1780/2000] time 2.003 (2.039) data 0.000 (0.001) loss 0.7562 (1.4854) lr 1.0000e-05 eta 2 days, 5:22:26
epoch [3/50] batch [1800/2000] time 2.057 (2.039) data 0.000 (0.001) loss 1.3864 (1.4833) lr 1.0000e-05 eta 2 days, 5:21:35
epoch [3/50] batch [1820/2000] time 2.059 (2.039) data 0.000 (0.001) loss 0.9663 (1.4849) lr 1.0000e-05 eta 2 days, 5:20:49
epoch [3/50] batch [1840/2000] time 2.037 (2.039) data 0.000 (0.001) loss 1.1341 (1.4859) lr 1.0000e-05 eta 2 days, 5:20:05
epoch [3/50] batch [1860/2000] time 2.054 (2.039) data 0.000 (0.001) loss 4.5119 (1.4848) lr 1.0000e-05 eta 2 days, 5:19:25
epoch [3/50] batch [1880/2000] time 2.058 (2.039) data 0.000 (0.001) loss 0.6169 (1.4857) lr 1.0000e-05 eta 2 days, 5:18:42
epoch [3/50] batch [1900/2000] time 2.003 (2.039) data 0.000 (0.001) loss 0.8019 (1.4852) lr 1.0000e-05 eta 2 days, 5:18:05
epoch [3/50] batch [1920/2000] time 2.059 (2.039) data 0.000 (0.001) loss 0.7225 (1.4885) lr 1.0000e-05 eta 2 days, 5:17:26
epoch [3/50] batch [1940/2000] time 1.976 (2.039) data 0.000 (0.001) loss 1.1641 (1.4899) lr 1.0000e-05 eta 2 days, 5:16:35
epoch [3/50] batch [1960/2000] time 2.030 (2.039) data 0.000 (0.001) loss 4.0899 (1.4904) lr 1.0000e-05 eta 2 days, 5:15:47
epoch [3/50] batch [1980/2000] time 2.056 (2.039) data 0.000 (0.001) loss 2.2939 (1.4919) lr 1.0000e-05 eta 2 days, 5:15:00
epoch [3/50] batch [2000/2000] time 2.054 (2.039) data 0.000 (0.001) loss 4.3059 (1.4936) lr 1.0000e-05 eta 2 days, 5:14:11
epoch [4/50] batch [20/2000] time 2.001 (2.078) data 0.000 (0.040) loss 1.2603 (1.5457) lr 1.0000e-05 eta 2 days, 6:14:31
epoch [4/50] batch [40/2000] time 2.061 (2.056) data 0.000 (0.020) loss 1.7162 (1.4508) lr 1.0000e-05 eta 2 days, 5:39:27
epoch [4/50] batch [60/2000] time 2.068 (2.050) data 0.001 (0.014) loss 1.9042 (1.4324) lr 1.0000e-05 eta 2 days, 5:29:47
epoch [4/50] batch [80/2000] time 2.004 (2.047) data 0.000 (0.010) loss 1.7098 (1.4470) lr 1.0000e-05 eta 2 days, 5:23:55
epoch [4/50] batch [100/2000] time 2.062 (2.046) data 0.002 (0.008) loss 0.5690 (1.4834) lr 1.0000e-05 eta 2 days, 5:22:09
epoch [4/50] batch [120/2000] time 2.063 (2.045) data 0.000 (0.007) loss 1.3024 (1.5204) lr 1.0000e-05 eta 2 days, 5:20:23
epoch [4/50] batch [140/2000] time 2.003 (2.043) data 0.000 (0.006) loss 1.2023 (1.4996) lr 1.0000e-05 eta 2 days, 5:16:36
epoch [4/50] batch [160/2000] time 2.058 (2.042) data 0.000 (0.005) loss 0.9167 (1.5482) lr 1.0000e-05 eta 2 days, 5:14:18
epoch [4/50] batch [180/2000] time 2.054 (2.042) data 0.000 (0.005) loss 0.5488 (1.5742) lr 1.0000e-05 eta 2 days, 5:12:24
epoch [4/50] batch [200/2000] time 2.002 (2.041) data 0.000 (0.004) loss 2.2292 (1.5843) lr 1.0000e-05 eta 2 days, 5:10:22
epoch [4/50] batch [220/2000] time 1.976 (2.040) data 0.000 (0.004) loss 1.4211 (1.5908) lr 1.0000e-05 eta 2 days, 5:09:09
epoch [4/50] batch [240/2000] time 2.056 (2.040) data 0.000 (0.004) loss 2.5745 (1.5736) lr 1.0000e-05 eta 2 days, 5:07:36
epoch [4/50] batch [260/2000] time 2.062 (2.040) data 0.000 (0.003) loss 1.7715 (1.5899) lr 1.0000e-05 eta 2 days, 5:06:24
epoch [4/50] batch [280/2000] time 2.006 (2.039) data 0.000 (0.003) loss 0.8074 (1.5747) lr 1.0000e-05 eta 2 days, 5:05:22
epoch [4/50] batch [300/2000] time 2.000 (2.039) data 0.000 (0.003) loss 1.8131 (1.5688) lr 1.0000e-05 eta 2 days, 5:03:53
epoch [4/50] batch [320/2000] time 2.005 (2.039) data 0.000 (0.003) loss 0.5069 (1.5630) lr 1.0000e-05 eta 2 days, 5:03:19
epoch [4/50] batch [340/2000] time 2.004 (2.038) data 0.000 (0.003) loss 3.5358 (1.5616) lr 1.0000e-05 eta 2 days, 5:01:54
epoch [4/50] batch [360/2000] time 2.062 (2.038) data 0.000 (0.002) loss 3.4206 (1.5523) lr 1.0000e-05 eta 2 days, 5:01:13
epoch [4/50] batch [380/2000] time 2.060 (2.038) data 0.000 (0.002) loss 0.1789 (1.5402) lr 1.0000e-05 eta 2 days, 5:00:10
epoch [4/50] batch [400/2000] time 2.056 (2.038) data 0.000 (0.002) loss 0.6465 (1.5412) lr 1.0000e-05 eta 2 days, 4:59:01
epoch [4/50] batch [420/2000] time 2.003 (2.038) data 0.000 (0.002) loss 2.5744 (1.5296) lr 1.0000e-05 eta 2 days, 4:58:22
epoch [4/50] batch [440/2000] time 2.005 (2.038) data 0.000 (0.002) loss 3.0273 (1.5310) lr 1.0000e-05 eta 2 days, 4:57:54
epoch [4/50] batch [460/2000] time 2.036 (2.038) data 0.000 (0.002) loss 1.0065 (1.5298) lr 1.0000e-05 eta 2 days, 4:57:28
epoch [4/50] batch [480/2000] time 2.005 (2.038) data 0.000 (0.002) loss 1.2416 (1.5300) lr 1.0000e-05 eta 2 days, 4:56:14
epoch [4/50] batch [500/2000] time 2.060 (2.038) data 0.000 (0.002) loss 0.2716 (1.5315) lr 1.0000e-05 eta 2 days, 4:55:11
epoch [4/50] batch [520/2000] time 2.002 (2.037) data 0.000 (0.002) loss 2.9210 (1.5296) lr 1.0000e-05 eta 2 days, 4:54:05
epoch [4/50] batch [540/2000] time 2.037 (2.037) data 0.000 (0.002) loss 1.2209 (1.5312) lr 1.0000e-05 eta 2 days, 4:53:20
epoch [4/50] batch [560/2000] time 2.004 (2.037) data 0.000 (0.002) loss 1.4608 (1.5548) lr 1.0000e-05 eta 2 days, 4:52:05
epoch [4/50] batch [580/2000] time 2.059 (2.037) data 0.000 (0.002) loss 1.3622 (1.5539) lr 1.0000e-05 eta 2 days, 4:50:59
epoch [4/50] batch [600/2000] time 2.055 (2.036) data 0.001 (0.002) loss 0.9059 (1.5333) lr 1.0000e-05 eta 2 days, 4:49:59
epoch [4/50] batch [620/2000] time 2.057 (2.036) data 0.000 (0.002) loss 3.3985 (1.5296) lr 1.0000e-05 eta 2 days, 4:49:22
epoch [4/50] batch [640/2000] time 2.055 (2.036) data 0.000 (0.002) loss 0.4411 (1.5279) lr 1.0000e-05 eta 2 days, 4:48:25
epoch [4/50] batch [660/2000] time 2.000 (2.036) data 0.000 (0.001) loss 1.3570 (1.5171) lr 1.0000e-05 eta 2 days, 4:47:31
epoch [4/50] batch [680/2000] time 2.002 (2.036) data 0.000 (0.001) loss 0.3974 (1.5093) lr 1.0000e-05 eta 2 days, 4:46:46
epoch [4/50] batch [700/2000] time 2.058 (2.036) data 0.000 (0.001) loss 1.2263 (1.5023) lr 1.0000e-05 eta 2 days, 4:46:01
epoch [4/50] batch [720/2000] time 2.055 (2.036) data 0.000 (0.001) loss 3.3293 (1.5020) lr 1.0000e-05 eta 2 days, 4:45:19
epoch [4/50] batch [740/2000] time 2.056 (2.036) data 0.000 (0.001) loss 0.6766 (1.5091) lr 1.0000e-05 eta 2 days, 4:44:40
epoch [4/50] batch [760/2000] time 2.036 (2.036) data 0.000 (0.001) loss 1.7746 (1.5130) lr 1.0000e-05 eta 2 days, 4:43:49
epoch [4/50] batch [780/2000] time 2.059 (2.036) data 0.001 (0.001) loss 2.2500 (1.5121) lr 1.0000e-05 eta 2 days, 4:43:08
epoch [4/50] batch [800/2000] time 2.059 (2.036) data 0.000 (0.001) loss 0.2681 (1.5140) lr 1.0000e-05 eta 2 days, 4:42:24
epoch [4/50] batch [820/2000] time 2.003 (2.036) data 0.000 (0.001) loss 0.6188 (1.5075) lr 1.0000e-05 eta 2 days, 4:41:33
epoch [4/50] batch [840/2000] time 2.067 (2.036) data 0.000 (0.001) loss 1.3297 (1.5043) lr 1.0000e-05 eta 2 days, 4:41:05
epoch [4/50] batch [860/2000] time 2.062 (2.036) data 0.000 (0.001) loss 0.4629 (1.4965) lr 1.0000e-05 eta 2 days, 4:40:33
epoch [4/50] batch [880/2000] time 2.001 (2.036) data 0.000 (0.001) loss 2.6404 (1.4978) lr 1.0000e-05 eta 2 days, 4:39:42
epoch [4/50] batch [900/2000] time 2.004 (2.036) data 0.000 (0.001) loss 2.0163 (1.4957) lr 1.0000e-05 eta 2 days, 4:39:04
epoch [4/50] batch [920/2000] time 2.037 (2.036) data 0.000 (0.001) loss 0.1083 (1.4945) lr 1.0000e-05 eta 2 days, 4:38:14
epoch [4/50] batch [940/2000] time 2.060 (2.036) data 0.000 (0.001) loss 1.1721 (1.4944) lr 1.0000e-05 eta 2 days, 4:37:27
epoch [4/50] batch [960/2000] time 2.056 (2.036) data 0.000 (0.001) loss 0.6696 (1.4945) lr 1.0000e-05 eta 2 days, 4:37:08
epoch [4/50] batch [980/2000] time 2.001 (2.036) data 0.000 (0.001) loss 0.4903 (1.4915) lr 1.0000e-05 eta 2 days, 4:36:22
epoch [4/50] batch [1000/2000] time 2.036 (2.036) data 0.000 (0.001) loss 3.1071 (1.4959) lr 1.0000e-05 eta 2 days, 4:35:33
epoch [4/50] batch [1020/2000] time 2.034 (2.036) data 0.000 (0.001) loss 1.8034 (1.4939) lr 1.0000e-05 eta 2 days, 4:34:36
epoch [4/50] batch [1040/2000] time 2.037 (2.036) data 0.000 (0.001) loss 1.4620 (1.4909) lr 1.0000e-05 eta 2 days, 4:33:57
epoch [4/50] batch [1060/2000] time 2.001 (2.036) data 0.000 (0.001) loss 1.7487 (1.4985) lr 1.0000e-05 eta 2 days, 4:33:19
epoch [4/50] batch [1080/2000] time 2.037 (2.036) data 0.000 (0.001) loss 2.4268 (1.4929) lr 1.0000e-05 eta 2 days, 4:32:41
epoch [4/50] batch [1100/2000] time 2.002 (2.036) data 0.000 (0.001) loss 1.7547 (1.4881) lr 1.0000e-05 eta 2 days, 4:32:07
epoch [4/50] batch [1120/2000] time 2.035 (2.036) data 0.000 (0.001) loss 0.8790 (1.4843) lr 1.0000e-05 eta 2 days, 4:31:32
epoch [4/50] batch [1140/2000] time 2.036 (2.036) data 0.001 (0.001) loss 1.2788 (1.4881) lr 1.0000e-05 eta 2 days, 4:30:46
epoch [4/50] batch [1160/2000] time 2.055 (2.036) data 0.000 (0.001) loss 1.7514 (1.4917) lr 1.0000e-05 eta 2 days, 4:30:12
epoch [4/50] batch [1180/2000] time 2.036 (2.036) data 0.001 (0.001) loss 2.8420 (1.4854) lr 1.0000e-05 eta 2 days, 4:29:32
epoch [4/50] batch [1200/2000] time 2.004 (2.036) data 0.000 (0.001) loss 0.6544 (1.4896) lr 1.0000e-05 eta 2 days, 4:28:47
epoch [4/50] batch [1220/2000] time 2.036 (2.036) data 0.000 (0.001) loss 0.5246 (1.4880) lr 1.0000e-05 eta 2 days, 4:28:09
epoch [4/50] batch [1240/2000] time 2.004 (2.036) data 0.000 (0.001) loss 2.7657 (1.4864) lr 1.0000e-05 eta 2 days, 4:27:24
epoch [4/50] batch [1260/2000] time 2.037 (2.036) data 0.000 (0.001) loss 0.9858 (1.4828) lr 1.0000e-05 eta 2 days, 4:26:55
epoch [4/50] batch [1280/2000] time 2.001 (2.036) data 0.000 (0.001) loss 2.0323 (1.4893) lr 1.0000e-05 eta 2 days, 4:26:10
epoch [4/50] batch [1300/2000] time 2.057 (2.036) data 0.000 (0.001) loss 0.3861 (1.4880) lr 1.0000e-05 eta 2 days, 4:25:33
epoch [4/50] batch [1320/2000] time 2.060 (2.036) data 0.000 (0.001) loss 1.1407 (1.4925) lr 1.0000e-05 eta 2 days, 4:24:54
epoch [4/50] batch [1340/2000] time 2.001 (2.036) data 0.000 (0.001) loss 0.3858 (1.4887) lr 1.0000e-05 eta 2 days, 4:24:12
epoch [4/50] batch [1360/2000] time 2.065 (2.036) data 0.000 (0.001) loss 1.7255 (1.4866) lr 1.0000e-05 eta 2 days, 4:23:39
epoch [4/50] batch [1380/2000] time 2.035 (2.036) data 0.000 (0.001) loss 0.9433 (1.4850) lr 1.0000e-05 eta 2 days, 4:22:58
epoch [4/50] batch [1400/2000] time 2.060 (2.036) data 0.000 (0.001) loss 2.0390 (1.4843) lr 1.0000e-05 eta 2 days, 4:22:06
epoch [4/50] batch [1420/2000] time 1.999 (2.036) data 0.000 (0.001) loss 0.8030 (1.4807) lr 1.0000e-05 eta 2 days, 4:21:20
epoch [4/50] batch [1440/2000] time 2.066 (2.036) data 0.000 (0.001) loss 0.5353 (1.4827) lr 1.0000e-05 eta 2 days, 4:20:28
epoch [4/50] batch [1460/2000] time 2.060 (2.036) data 0.000 (0.001) loss 0.7900 (1.4835) lr 1.0000e-05 eta 2 days, 4:19:51
epoch [4/50] batch [1480/2000] time 2.060 (2.036) data 0.000 (0.001) loss 0.8187 (1.4820) lr 1.0000e-05 eta 2 days, 4:19:16
epoch [4/50] batch [1500/2000] time 2.001 (2.036) data 0.000 (0.001) loss 2.5193 (1.4828) lr 1.0000e-05 eta 2 days, 4:18:36
epoch [4/50] batch [1520/2000] time 2.058 (2.036) data 0.000 (0.001) loss 0.7994 (1.4811) lr 1.0000e-05 eta 2 days, 4:17:58
epoch [4/50] batch [1540/2000] time 2.005 (2.036) data 0.000 (0.001) loss 0.8600 (1.4768) lr 1.0000e-05 eta 2 days, 4:17:27
epoch [4/50] batch [1560/2000] time 2.058 (2.036) data 0.000 (0.001) loss 0.3745 (1.4713) lr 1.0000e-05 eta 2 days, 4:16:53
epoch [4/50] batch [1580/2000] time 2.034 (2.036) data 0.000 (0.001) loss 1.4109 (1.4703) lr 1.0000e-05 eta 2 days, 4:16:12
epoch [4/50] batch [1600/2000] time 2.037 (2.036) data 0.000 (0.001) loss 3.7264 (1.4715) lr 1.0000e-05 eta 2 days, 4:15:42
epoch [4/50] batch [1620/2000] time 2.063 (2.036) data 0.000 (0.001) loss 3.2004 (1.4693) lr 1.0000e-05 eta 2 days, 4:15:03
epoch [4/50] batch [1640/2000] time 2.057 (2.036) data 0.000 (0.001) loss 1.3586 (1.4685) lr 1.0000e-05 eta 2 days, 4:14:18
epoch [4/50] batch [1660/2000] time 2.037 (2.036) data 0.000 (0.001) loss 0.7657 (1.4648) lr 1.0000e-05 eta 2 days, 4:13:40
epoch [4/50] batch [1680/2000] time 1.977 (2.036) data 0.001 (0.001) loss 1.6451 (1.4660) lr 1.0000e-05 eta 2 days, 4:12:54
epoch [4/50] batch [1700/2000] time 2.058 (2.036) data 0.000 (0.001) loss 1.0175 (1.4652) lr 1.0000e-05 eta 2 days, 4:12:13
epoch [4/50] batch [1720/2000] time 2.039 (2.036) data 0.000 (0.001) loss 2.1289 (1.4658) lr 1.0000e-05 eta 2 days, 4:11:28
epoch [4/50] batch [1740/2000] time 2.058 (2.036) data 0.001 (0.001) loss 0.9674 (1.4650) lr 1.0000e-05 eta 2 days, 4:10:49
epoch [4/50] batch [1760/2000] time 2.006 (2.036) data 0.000 (0.001) loss 1.5322 (1.4667) lr 1.0000e-05 eta 2 days, 4:10:10
epoch [4/50] batch [1780/2000] time 2.061 (2.036) data 0.000 (0.001) loss 0.7712 (1.4662) lr 1.0000e-05 eta 2 days, 4:09:34
epoch [4/50] batch [1800/2000] time 2.061 (2.036) data 0.000 (0.001) loss 1.1642 (1.4627) lr 1.0000e-05 eta 2 days, 4:09:00
epoch [4/50] batch [1820/2000] time 1.980 (2.036) data 0.000 (0.001) loss 1.7741 (1.4612) lr 1.0000e-05 eta 2 days, 4:08:21
epoch [4/50] batch [1840/2000] time 2.058 (2.036) data 0.000 (0.001) loss 1.9429 (1.4582) lr 1.0000e-05 eta 2 days, 4:07:36
epoch [4/50] batch [1860/2000] time 1.977 (2.036) data 0.000 (0.001) loss 4.2379 (1.4653) lr 1.0000e-05 eta 2 days, 4:06:51
epoch [4/50] batch [1880/2000] time 2.057 (2.036) data 0.000 (0.001) loss 2.5974 (1.4661) lr 1.0000e-05 eta 2 days, 4:06:11
epoch [4/50] batch [1900/2000] time 2.056 (2.036) data 0.000 (0.001) loss 2.1847 (1.4670) lr 1.0000e-05 eta 2 days, 4:05:21
epoch [4/50] batch [1920/2000] time 2.039 (2.036) data 0.000 (0.001) loss 0.6380 (1.4623) lr 1.0000e-05 eta 2 days, 4:04:40
epoch [4/50] batch [1940/2000] time 2.001 (2.036) data 0.000 (0.001) loss 1.2563 (1.4603) lr 1.0000e-05 eta 2 days, 4:04:04
epoch [4/50] batch [1960/2000] time 2.001 (2.036) data 0.000 (0.001) loss 1.2226 (1.4584) lr 1.0000e-05 eta 2 days, 4:03:21
epoch [4/50] batch [1980/2000] time 2.000 (2.036) data 0.000 (0.001) loss 1.5224 (1.4606) lr 1.0000e-05 eta 2 days, 4:02:34
epoch [4/50] batch [2000/2000] time 2.058 (2.036) data 0.000 (0.001) loss 2.3341 (1.4635) lr 1.0000e-05 eta 2 days, 4:01:53
epoch [5/50] batch [20/2000] time 2.059 (2.067) data 0.000 (0.028) loss 2.3417 (1.8400) lr 1.0000e-05 eta 2 days, 4:48:00
epoch [5/50] batch [40/2000] time 2.000 (2.049) data 0.000 (0.014) loss 0.6453 (1.6439) lr 1.0000e-05 eta 2 days, 4:20:35
epoch [5/50] batch [60/2000] time 2.056 (2.046) data 0.000 (0.009) loss 1.4861 (1.4869) lr 1.0000e-05 eta 2 days, 4:14:33
epoch [5/50] batch [80/2000] time 2.001 (2.043) data 0.000 (0.007) loss 0.8621 (1.5492) lr 1.0000e-05 eta 2 days, 4:09:39
epoch [5/50] batch [100/2000] time 1.980 (2.040) data 0.000 (0.006) loss 2.0270 (1.5403) lr 1.0000e-05 eta 2 days, 4:05:09
epoch [5/50] batch [120/2000] time 1.978 (2.039) data 0.000 (0.005) loss 0.4205 (1.5403) lr 1.0000e-05 eta 2 days, 4:01:43
epoch [5/50] batch [140/2000] time 2.058 (2.038) data 0.000 (0.004) loss 2.0923 (1.5410) lr 1.0000e-05 eta 2 days, 4:00:11
epoch [5/50] batch [160/2000] time 2.003 (2.037) data 0.000 (0.004) loss 0.8190 (1.5184) lr 1.0000e-05 eta 2 days, 3:58:40
epoch [5/50] batch [180/2000] time 2.036 (2.038) data 0.000 (0.003) loss 1.7202 (1.5226) lr 1.0000e-05 eta 2 days, 3:58:07
epoch [5/50] batch [200/2000] time 1.982 (2.037) data 0.000 (0.003) loss 2.2680 (1.5248) lr 1.0000e-05 eta 2 days, 3:56:26
epoch [5/50] batch [220/2000] time 2.059 (2.037) data 0.000 (0.003) loss 1.6062 (1.5349) lr 1.0000e-05 eta 2 days, 3:55:53
epoch [5/50] batch [240/2000] time 2.002 (2.037) data 0.000 (0.003) loss 1.8114 (1.5129) lr 1.0000e-05 eta 2 days, 3:55:13
epoch [5/50] batch [260/2000] time 2.058 (2.038) data 0.000 (0.002) loss 2.0667 (1.5078) lr 1.0000e-05 eta 2 days, 3:55:23
epoch [5/50] batch [280/2000] time 2.035 (2.037) data 0.000 (0.002) loss 3.7960 (1.5152) lr 1.0000e-05 eta 2 days, 3:54:06
epoch [5/50] batch [300/2000] time 2.055 (2.037) data 0.000 (0.002) loss 1.7268 (1.5101) lr 1.0000e-05 eta 2 days, 3:53:19
epoch [5/50] batch [320/2000] time 2.057 (2.036) data 0.000 (0.002) loss 2.3896 (1.5034) lr 1.0000e-05 eta 2 days, 3:51:30
epoch [5/50] batch [340/2000] time 2.035 (2.036) data 0.000 (0.002) loss 1.6440 (1.4917) lr 1.0000e-05 eta 2 days, 3:49:52
epoch [5/50] batch [360/2000] time 1.996 (2.036) data 0.000 (0.002) loss 0.7881 (1.4615) lr 1.0000e-05 eta 2 days, 3:49:04
epoch [5/50] batch [380/2000] time 1.999 (2.036) data 0.000 (0.002) loss 1.7276 (1.4569) lr 1.0000e-05 eta 2 days, 3:48:15
epoch [5/50] batch [400/2000] time 2.032 (2.035) data 0.000 (0.002) loss 1.2043 (1.4556) lr 1.0000e-05 eta 2 days, 3:47:05
epoch [5/50] batch [420/2000] time 2.058 (2.035) data 0.000 (0.002) loss 1.9097 (1.4630) lr 1.0000e-05 eta 2 days, 3:46:07
epoch [5/50] batch [440/2000] time 2.002 (2.035) data 0.000 (0.002) loss 2.0881 (1.4732) lr 1.0000e-05 eta 2 days, 3:45:44
epoch [5/50] batch [460/2000] time 2.060 (2.036) data 0.000 (0.001) loss 0.2621 (1.4710) lr 1.0000e-05 eta 2 days, 3:45:31
epoch [5/50] batch [480/2000] time 2.001 (2.036) data 0.000 (0.001) loss 1.3610 (1.4592) lr 1.0000e-05 eta 2 days, 3:44:49
epoch [5/50] batch [500/2000] time 2.064 (2.036) data 0.000 (0.001) loss 0.2100 (1.4533) lr 1.0000e-05 eta 2 days, 3:44:37
epoch [5/50] batch [520/2000] time 2.033 (2.036) data 0.000 (0.001) loss 2.6484 (1.4457) lr 1.0000e-05 eta 2 days, 3:43:36
epoch [5/50] batch [540/2000] time 2.057 (2.035) data 0.000 (0.001) loss 3.6420 (1.4390) lr 1.0000e-05 eta 2 days, 3:42:38
epoch [5/50] batch [560/2000] time 1.977 (2.036) data 0.000 (0.001) loss 1.0407 (1.4588) lr 1.0000e-05 eta 2 days, 3:42:20
epoch [5/50] batch [580/2000] time 2.037 (2.035) data 0.000 (0.001) loss 4.1813 (1.4693) lr 1.0000e-05 eta 2 days, 3:41:14
epoch [5/50] batch [600/2000] time 2.057 (2.036) data 0.001 (0.001) loss 1.7626 (1.4804) lr 1.0000e-05 eta 2 days, 3:40:50
epoch [5/50] batch [620/2000] time 2.057 (2.036) data 0.000 (0.001) loss 2.9766 (1.4777) lr 1.0000e-05 eta 2 days, 3:40:04
epoch [5/50] batch [640/2000] time 2.004 (2.036) data 0.000 (0.001) loss 1.9043 (1.4852) lr 1.0000e-05 eta 2 days, 3:39:25
epoch [5/50] batch [660/2000] time 2.033 (2.036) data 0.000 (0.001) loss 0.1622 (1.4842) lr 1.0000e-05 eta 2 days, 3:38:51
epoch [5/50] batch [680/2000] time 2.001 (2.035) data 0.000 (0.001) loss 0.3452 (1.4691) lr 1.0000e-05 eta 2 days, 3:37:57
epoch [5/50] batch [700/2000] time 2.056 (2.035) data 0.000 (0.001) loss 0.9213 (1.4696) lr 1.0000e-05 eta 2 days, 3:37:18
epoch [5/50] batch [720/2000] time 1.999 (2.035) data 0.000 (0.001) loss 0.2759 (1.4624) lr 1.0000e-05 eta 2 days, 3:36:31
epoch [5/50] batch [740/2000] time 2.002 (2.035) data 0.000 (0.001) loss 0.7259 (1.4599) lr 1.0000e-05 eta 2 days, 3:35:34
epoch [5/50] batch [760/2000] time 2.057 (2.035) data 0.000 (0.001) loss 0.6713 (1.4567) lr 1.0000e-05 eta 2 days, 3:35:04
epoch [5/50] batch [780/2000] time 2.038 (2.035) data 0.000 (0.001) loss 1.8796 (1.4612) lr 1.0000e-05 eta 2 days, 3:34:23
epoch [5/50] batch [800/2000] time 2.055 (2.035) data 0.000 (0.001) loss 2.2954 (1.4644) lr 1.0000e-05 eta 2 days, 3:33:56
epoch [5/50] batch [820/2000] time 2.060 (2.036) data 0.000 (0.001) loss 0.3364 (1.4591) lr 1.0000e-05 eta 2 days, 3:33:21
epoch [5/50] batch [840/2000] time 2.055 (2.035) data 0.000 (0.001) loss 0.6195 (1.4562) lr 1.0000e-05 eta 2 days, 3:32:36
epoch [5/50] batch [860/2000] time 2.001 (2.035) data 0.000 (0.001) loss 5.1854 (1.4543) lr 1.0000e-05 eta 2 days, 3:31:50
epoch [5/50] batch [880/2000] time 2.037 (2.035) data 0.000 (0.001) loss 0.1889 (1.4482) lr 1.0000e-05 eta 2 days, 3:30:58
epoch [5/50] batch [900/2000] time 2.034 (2.035) data 0.000 (0.001) loss 2.1654 (1.4447) lr 1.0000e-05 eta 2 days, 3:30:22
epoch [5/50] batch [920/2000] time 2.058 (2.035) data 0.000 (0.001) loss 0.6944 (1.4397) lr 1.0000e-05 eta 2 days, 3:29:29
epoch [5/50] batch [940/2000] time 2.057 (2.035) data 0.000 (0.001) loss 3.3863 (1.4417) lr 1.0000e-05 eta 2 days, 3:28:49
epoch [5/50] batch [960/2000] time 2.001 (2.035) data 0.000 (0.001) loss 3.4969 (1.4495) lr 1.0000e-05 eta 2 days, 3:28:07
epoch [5/50] batch [980/2000] time 2.059 (2.035) data 0.000 (0.001) loss 0.8613 (1.4484) lr 1.0000e-05 eta 2 days, 3:27:23
epoch [5/50] batch [1000/2000] time 2.036 (2.035) data 0.000 (0.001) loss 1.4796 (1.4475) lr 1.0000e-05 eta 2 days, 3:26:31
epoch [5/50] batch [1020/2000] time 1.999 (2.035) data 0.000 (0.001) loss 0.4303 (1.4492) lr 1.0000e-05 eta 2 days, 3:25:57
epoch [5/50] batch [1040/2000] time 2.059 (2.035) data 0.000 (0.001) loss 0.4307 (1.4452) lr 1.0000e-05 eta 2 days, 3:25:21
epoch [5/50] batch [1060/2000] time 2.001 (2.035) data 0.000 (0.001) loss 0.6459 (1.4465) lr 1.0000e-05 eta 2 days, 3:24:32
epoch [5/50] batch [1080/2000] time 2.057 (2.035) data 0.000 (0.001) loss 2.0969 (1.4428) lr 1.0000e-05 eta 2 days, 3:23:44
epoch [5/50] batch [1100/2000] time 2.008 (2.035) data 0.000 (0.001) loss 1.7332 (1.4444) lr 1.0000e-05 eta 2 days, 3:23:09
epoch [5/50] batch [1120/2000] time 2.037 (2.035) data 0.000 (0.001) loss 1.5978 (1.4426) lr 1.0000e-05 eta 2 days, 3:22:29
epoch [5/50] batch [1140/2000] time 2.064 (2.035) data 0.001 (0.001) loss 1.2949 (1.4443) lr 1.0000e-05 eta 2 days, 3:21:45
epoch [5/50] batch [1160/2000] time 2.035 (2.035) data 0.000 (0.001) loss 1.2061 (1.4405) lr 1.0000e-05 eta 2 days, 3:21:07
epoch [5/50] batch [1180/2000] time 2.060 (2.035) data 0.000 (0.001) loss 1.8958 (1.4392) lr 1.0000e-05 eta 2 days, 3:20:21
epoch [5/50] batch [1200/2000] time 1.980 (2.035) data 0.000 (0.001) loss 0.9331 (1.4403) lr 1.0000e-05 eta 2 days, 3:19:46
epoch [5/50] batch [1220/2000] time 2.007 (2.035) data 0.000 (0.001) loss 0.3721 (1.4383) lr 1.0000e-05 eta 2 days, 3:19:09
epoch [5/50] batch [1240/2000] time 2.064 (2.035) data 0.000 (0.001) loss 0.6573 (1.4377) lr 1.0000e-05 eta 2 days, 3:18:45
epoch [5/50] batch [1260/2000] time 2.060 (2.035) data 0.000 (0.001) loss 1.2032 (1.4407) lr 1.0000e-05 eta 2 days, 3:18:13
epoch [5/50] batch [1280/2000] time 2.062 (2.035) data 0.000 (0.001) loss 1.2757 (1.4412) lr 1.0000e-05 eta 2 days, 3:17:31
epoch [5/50] batch [1300/2000] time 2.059 (2.035) data 0.000 (0.001) loss 0.1546 (1.4375) lr 1.0000e-05 eta 2 days, 3:16:44
epoch [5/50] batch [1320/2000] time 2.061 (2.035) data 0.000 (0.001) loss 0.4702 (1.4381) lr 1.0000e-05 eta 2 days, 3:16:04
epoch [5/50] batch [1340/2000] time 2.043 (2.035) data 0.000 (0.001) loss 0.4870 (1.4349) lr 1.0000e-05 eta 2 days, 3:15:17
epoch [5/50] batch [1360/2000] time 2.058 (2.035) data 0.000 (0.001) loss 4.7912 (1.4388) lr 1.0000e-05 eta 2 days, 3:14:38
epoch [5/50] batch [1380/2000] time 2.065 (2.035) data 0.000 (0.001) loss 1.1048 (1.4409) lr 1.0000e-05 eta 2 days, 3:13:56
epoch [5/50] batch [1400/2000] time 2.035 (2.035) data 0.000 (0.001) loss 0.2936 (1.4376) lr 1.0000e-05 eta 2 days, 3:13:13
epoch [5/50] batch [1420/2000] time 2.057 (2.035) data 0.000 (0.001) loss 1.5842 (1.4341) lr 1.0000e-05 eta 2 days, 3:12:32
epoch [5/50] batch [1440/2000] time 2.060 (2.035) data 0.000 (0.001) loss 2.1955 (1.4334) lr 1.0000e-05 eta 2 days, 3:11:52
epoch [5/50] batch [1460/2000] time 2.058 (2.035) data 0.000 (0.001) loss 2.8493 (1.4357) lr 1.0000e-05 eta 2 days, 3:11:23
epoch [5/50] batch [1480/2000] time 2.034 (2.035) data 0.000 (0.001) loss 0.3158 (1.4339) lr 1.0000e-05 eta 2 days, 3:10:38
epoch [5/50] batch [1500/2000] time 2.058 (2.035) data 0.000 (0.001) loss 0.4023 (1.4311) lr 1.0000e-05 eta 2 days, 3:10:03
epoch [5/50] batch [1520/2000] time 2.036 (2.035) data 0.000 (0.001) loss 0.4034 (1.4300) lr 1.0000e-05 eta 2 days, 3:09:26
epoch [5/50] batch [1540/2000] time 2.040 (2.035) data 0.000 (0.001) loss 0.4975 (1.4279) lr 1.0000e-05 eta 2 days, 3:08:45
epoch [5/50] batch [1560/2000] time 2.059 (2.035) data 0.000 (0.001) loss 2.7796 (1.4264) lr 1.0000e-05 eta 2 days, 3:07:57
epoch [5/50] batch [1580/2000] time 1.977 (2.035) data 0.000 (0.001) loss 2.4117 (1.4240) lr 1.0000e-05 eta 2 days, 3:07:13
epoch [5/50] batch [1600/2000] time 2.038 (2.035) data 0.000 (0.001) loss 0.2675 (1.4262) lr 1.0000e-05 eta 2 days, 3:06:31
epoch [5/50] batch [1620/2000] time 2.035 (2.035) data 0.000 (0.001) loss 1.5522 (1.4273) lr 1.0000e-05 eta 2 days, 3:05:51
epoch [5/50] batch [1640/2000] time 2.057 (2.035) data 0.000 (0.001) loss 0.4736 (1.4263) lr 1.0000e-05 eta 2 days, 3:05:04
epoch [5/50] batch [1660/2000] time 2.035 (2.035) data 0.000 (0.001) loss 1.2203 (1.4297) lr 1.0000e-05 eta 2 days, 3:04:22
epoch [5/50] batch [1680/2000] time 2.003 (2.035) data 0.001 (0.001) loss 0.7619 (1.4296) lr 1.0000e-05 eta 2 days, 3:03:43
epoch [5/50] batch [1700/2000] time 2.059 (2.035) data 0.000 (0.001) loss 0.3004 (1.4268) lr 1.0000e-05 eta 2 days, 3:03:05
epoch [5/50] batch [1720/2000] time 2.058 (2.035) data 0.000 (0.001) loss 2.0211 (1.4289) lr 1.0000e-05 eta 2 days, 3:02:15
epoch [5/50] batch [1740/2000] time 2.057 (2.035) data 0.000 (0.001) loss 3.9555 (1.4307) lr 1.0000e-05 eta 2 days, 3:01:33
epoch [5/50] batch [1760/2000] time 2.059 (2.035) data 0.000 (0.001) loss 1.1861 (1.4334) lr 1.0000e-05 eta 2 days, 3:00:56
epoch [5/50] batch [1780/2000] time 1.979 (2.035) data 0.000 (0.001) loss 3.3724 (1.4335) lr 1.0000e-05 eta 2 days, 3:00:14
epoch [5/50] batch [1800/2000] time 2.054 (2.035) data 0.000 (0.001) loss 2.1426 (1.4280) lr 1.0000e-05 eta 2 days, 2:59:33
epoch [5/50] batch [1820/2000] time 2.034 (2.035) data 0.000 (0.001) loss 0.0692 (1.4272) lr 1.0000e-05 eta 2 days, 2:58:49
epoch [5/50] batch [1840/2000] time 2.034 (2.035) data 0.000 (0.001) loss 1.2282 (1.4262) lr 1.0000e-05 eta 2 days, 2:58:07
epoch [5/50] batch [1860/2000] time 2.057 (2.035) data 0.000 (0.001) loss 1.4877 (1.4224) lr 1.0000e-05 eta 2 days, 2:57:28
epoch [5/50] batch [1880/2000] time 2.007 (2.035) data 0.000 (0.001) loss 2.1192 (1.4221) lr 1.0000e-05 eta 2 days, 2:56:46
epoch [5/50] batch [1900/2000] time 2.036 (2.035) data 0.000 (0.001) loss 0.0173 (1.4184) lr 1.0000e-05 eta 2 days, 2:56:07
epoch [5/50] batch [1920/2000] time 2.059 (2.035) data 0.000 (0.001) loss 1.3389 (1.4157) lr 1.0000e-05 eta 2 days, 2:55:30
epoch [5/50] batch [1940/2000] time 2.057 (2.035) data 0.000 (0.001) loss 0.2402 (1.4115) lr 1.0000e-05 eta 2 days, 2:54:50
epoch [5/50] batch [1960/2000] time 1.977 (2.035) data 0.000 (0.001) loss 3.8441 (1.4125) lr 1.0000e-05 eta 2 days, 2:54:07
epoch [5/50] batch [1980/2000] time 2.033 (2.035) data 0.000 (0.001) loss 0.3418 (1.4123) lr 1.0000e-05 eta 2 days, 2:53:29
epoch [5/50] batch [2000/2000] time 2.037 (2.035) data 0.000 (0.001) loss 2.0398 (1.4139) lr 2.0000e-03 eta 2 days, 2:52:42
epoch [6/50] batch [20/2000] time 2.059 (2.066) data 0.000 (0.028) loss 1.8732 (1.8180) lr 2.0000e-03 eta 2 days, 3:38:53
epoch [6/50] batch [40/2000] time 2.058 (2.052) data 0.000 (0.014) loss 2.1222 (1.6707) lr 2.0000e-03 eta 2 days, 3:16:41
epoch [6/50] batch [60/2000] time 2.060 (2.048) data 0.001 (0.009) loss 1.2903 (1.6395) lr 2.0000e-03 eta 2 days, 3:09:32
epoch [6/50] batch [80/2000] time 2.035 (2.047) data 0.000 (0.007) loss 0.7810 (1.6105) lr 2.0000e-03 eta 2 days, 3:07:41
epoch [6/50] batch [100/2000] time 2.058 (2.046) data 0.000 (0.006) loss 0.6801 (1.6028) lr 2.0000e-03 eta 2 days, 3:05:27
epoch [6/50] batch [120/2000] time 2.056 (2.044) data 0.000 (0.005) loss 0.9855 (1.6466) lr 2.0000e-03 eta 2 days, 3:01:58
epoch [6/50] batch [140/2000] time 2.064 (2.043) data 0.000 (0.004) loss 2.7282 (1.6839) lr 2.0000e-03 eta 2 days, 3:00:05
epoch [6/50] batch [160/2000] time 2.061 (2.042) data 0.000 (0.004) loss 2.0728 (1.7631) lr 2.0000e-03 eta 2 days, 2:57:11
epoch [6/50] batch [180/2000] time 2.059 (2.041) data 0.000 (0.003) loss 1.3367 (1.7342) lr 2.0000e-03 eta 2 days, 2:56:02
epoch [6/50] batch [200/2000] time 2.057 (2.041) data 0.000 (0.003) loss 0.5592 (1.6722) lr 2.0000e-03 eta 2 days, 2:54:33
epoch [6/50] batch [220/2000] time 2.061 (2.040) data 0.000 (0.003) loss 2.2014 (1.6242) lr 2.0000e-03 eta 2 days, 2:52:57
epoch [6/50] batch [240/2000] time 2.007 (2.039) data 0.000 (0.003) loss 0.2116 (1.5985) lr 2.0000e-03 eta 2 days, 2:51:01
epoch [6/50] batch [260/2000] time 2.033 (2.039) data 0.000 (0.002) loss 0.9056 (1.5673) lr 2.0000e-03 eta 2 days, 2:49:55
epoch [6/50] batch [280/2000] time 2.058 (2.039) data 0.000 (0.002) loss 0.3177 (1.5523) lr 2.0000e-03 eta 2 days, 2:48:23
epoch [6/50] batch [300/2000] time 2.035 (2.038) data 0.000 (0.002) loss 0.7186 (1.5336) lr 2.0000e-03 eta 2 days, 2:47:12
epoch [6/50] batch [320/2000] time 2.002 (2.038) data 0.000 (0.002) loss 3.3021 (1.5109) lr 2.0000e-03 eta 2 days, 2:46:11
epoch [6/50] batch [340/2000] time 2.035 (2.038) data 0.000 (0.002) loss 1.4693 (1.5231) lr 2.0000e-03 eta 2 days, 2:45:40
epoch [6/50] batch [360/2000] time 2.058 (2.038) data 0.000 (0.002) loss 0.2201 (1.5010) lr 2.0000e-03 eta 2 days, 2:44:28
epoch [6/50] batch [380/2000] time 2.031 (2.038) data 0.000 (0.002) loss 0.4355 (1.4953) lr 2.0000e-03 eta 2 days, 2:43:45
epoch [6/50] batch [400/2000] time 2.056 (2.038) data 0.000 (0.002) loss 0.2821 (1.4781) lr 2.0000e-03 eta 2 days, 2:43:03
epoch [6/50] batch [420/2000] time 2.055 (2.038) data 0.000 (0.002) loss 0.4547 (1.4591) lr 2.0000e-03 eta 2 days, 2:42:21
epoch [6/50] batch [440/2000] time 2.002 (2.037) data 0.000 (0.001) loss 1.5540 (1.4633) lr 2.0000e-03 eta 2 days, 2:41:09
epoch [6/50] batch [460/2000] time 2.006 (2.037) data 0.000 (0.001) loss 1.2347 (1.4626) lr 2.0000e-03 eta 2 days, 2:40:13
epoch [6/50] batch [480/2000] time 2.056 (2.037) data 0.000 (0.001) loss 3.7467 (1.4696) lr 2.0000e-03 eta 2 days, 2:39:40
epoch [6/50] batch [500/2000] time 2.004 (2.037) data 0.000 (0.001) loss 1.0564 (1.4627) lr 2.0000e-03 eta 2 days, 2:38:56
epoch [6/50] batch [520/2000] time 2.058 (2.037) data 0.000 (0.001) loss 1.7424 (1.4476) lr 2.0000e-03 eta 2 days, 2:38:14
epoch [6/50] batch [540/2000] time 2.058 (2.037) data 0.000 (0.001) loss 1.9667 (1.4491) lr 2.0000e-03 eta 2 days, 2:37:41
epoch [6/50] batch [560/2000] time 2.035 (2.037) data 0.000 (0.001) loss 1.2343 (1.4419) lr 2.0000e-03 eta 2 days, 2:36:51
epoch [6/50] batch [580/2000] time 2.065 (2.037) data 0.000 (0.001) loss 1.1171 (1.4282) lr 2.0000e-03 eta 2 days, 2:35:50
epoch [6/50] batch [600/2000] time 2.033 (2.037) data 0.001 (0.001) loss 1.6255 (1.4259) lr 2.0000e-03 eta 2 days, 2:35:09
epoch [6/50] batch [620/2000] time 2.059 (2.037) data 0.000 (0.001) loss 0.7314 (1.4181) lr 2.0000e-03 eta 2 days, 2:34:41
epoch [6/50] batch [640/2000] time 2.033 (2.037) data 0.000 (0.001) loss 1.1221 (1.4233) lr 2.0000e-03 eta 2 days, 2:33:48
epoch [6/50] batch [660/2000] time 2.003 (2.037) data 0.000 (0.001) loss 0.2058 (1.4189) lr 2.0000e-03 eta 2 days, 2:32:54
epoch [6/50] batch [680/2000] time 2.035 (2.037) data 0.000 (0.001) loss 0.3543 (1.4253) lr 2.0000e-03 eta 2 days, 2:32:14
epoch [6/50] batch [700/2000] time 2.002 (2.037) data 0.000 (0.001) loss 1.9138 (1.4273) lr 2.0000e-03 eta 2 days, 2:31:45
epoch [6/50] batch [720/2000] time 2.060 (2.037) data 0.000 (0.001) loss 0.8717 (1.4167) lr 2.0000e-03 eta 2 days, 2:31:26
epoch [6/50] batch [740/2000] time 2.007 (2.037) data 0.000 (0.001) loss 0.9211 (1.4187) lr 2.0000e-03 eta 2 days, 2:30:39
epoch [6/50] batch [760/2000] time 2.033 (2.037) data 0.000 (0.001) loss 0.6627 (1.4131) lr 2.0000e-03 eta 2 days, 2:29:51
epoch [6/50] batch [780/2000] time 2.037 (2.037) data 0.000 (0.001) loss 0.0682 (1.4074) lr 2.0000e-03 eta 2 days, 2:29:13
epoch [6/50] batch [800/2000] time 2.034 (2.037) data 0.000 (0.001) loss 0.4818 (1.4002) lr 2.0000e-03 eta 2 days, 2:28:19
epoch [6/50] batch [820/2000] time 2.057 (2.037) data 0.000 (0.001) loss 2.2926 (1.4128) lr 2.0000e-03 eta 2 days, 2:27:51
epoch [6/50] batch [840/2000] time 2.059 (2.037) data 0.000 (0.001) loss 1.2245 (1.4077) lr 2.0000e-03 eta 2 days, 2:27:05
epoch [6/50] batch [860/2000] time 2.035 (2.037) data 0.000 (0.001) loss 0.9923 (1.4135) lr 2.0000e-03 eta 2 days, 2:26:07
epoch [6/50] batch [880/2000] time 2.032 (2.037) data 0.000 (0.001) loss 0.1153 (1.4101) lr 2.0000e-03 eta 2 days, 2:25:31
epoch [6/50] batch [900/2000] time 1.999 (2.037) data 0.000 (0.001) loss 1.6910 (1.4093) lr 2.0000e-03 eta 2 days, 2:24:42
epoch [6/50] batch [920/2000] time 1.976 (2.037) data 0.000 (0.001) loss 4.9095 (1.4184) lr 2.0000e-03 eta 2 days, 2:23:49
epoch [6/50] batch [940/2000] time 2.001 (2.037) data 0.000 (0.001) loss 1.7182 (1.4102) lr 2.0000e-03 eta 2 days, 2:23:03
epoch [6/50] batch [960/2000] time 2.056 (2.037) data 0.000 (0.001) loss 0.4055 (1.4159) lr 2.0000e-03 eta 2 days, 2:22:12
epoch [6/50] batch [980/2000] time 2.060 (2.037) data 0.000 (0.001) loss 1.2286 (1.4182) lr 2.0000e-03 eta 2 days, 2:21:32
epoch [6/50] batch [1000/2000] time 2.002 (2.037) data 0.000 (0.001) loss 0.5701 (1.4165) lr 2.0000e-03 eta 2 days, 2:20:49
epoch [6/50] batch [1020/2000] time 1.978 (2.037) data 0.000 (0.001) loss 0.8798 (1.4104) lr 2.0000e-03 eta 2 days, 2:20:12
epoch [6/50] batch [1040/2000] time 2.038 (2.037) data 0.000 (0.001) loss 0.4252 (1.4080) lr 2.0000e-03 eta 2 days, 2:19:45
epoch [6/50] batch [1060/2000] time 2.059 (2.037) data 0.000 (0.001) loss 1.5602 (1.4065) lr 2.0000e-03 eta 2 days, 2:19:18
epoch [6/50] batch [1080/2000] time 2.002 (2.037) data 0.000 (0.001) loss 0.2873 (1.4009) lr 2.0000e-03 eta 2 days, 2:18:27
epoch [6/50] batch [1100/2000] time 1.983 (2.037) data 0.000 (0.001) loss 1.2146 (1.4036) lr 2.0000e-03 eta 2 days, 2:17:36
epoch [6/50] batch [1120/2000] time 2.059 (2.037) data 0.000 (0.001) loss 1.9720 (1.4035) lr 2.0000e-03 eta 2 days, 2:17:04
epoch [6/50] batch [1140/2000] time 2.001 (2.037) data 0.001 (0.001) loss 0.3298 (1.4023) lr 2.0000e-03 eta 2 days, 2:16:24
epoch [6/50] batch [1160/2000] time 2.059 (2.037) data 0.000 (0.001) loss 1.0676 (1.3952) lr 2.0000e-03 eta 2 days, 2:15:38
epoch [6/50] batch [1180/2000] time 2.009 (2.037) data 0.000 (0.001) loss 0.3500 (1.3875) lr 2.0000e-03 eta 2 days, 2:14:52
epoch [6/50] batch [1200/2000] time 2.005 (2.037) data 0.000 (0.001) loss 0.4792 (1.3878) lr 2.0000e-03 eta 2 days, 2:14:16
epoch [6/50] batch [1220/2000] time 2.039 (2.037) data 0.000 (0.001) loss 1.9102 (1.3828) lr 2.0000e-03 eta 2 days, 2:13:37
epoch [6/50] batch [1240/2000] time 2.064 (2.037) data 0.000 (0.001) loss 0.4818 (1.3804) lr 2.0000e-03 eta 2 days, 2:12:57
epoch [6/50] batch [1260/2000] time 2.004 (2.037) data 0.000 (0.001) loss 2.0776 (1.3794) lr 2.0000e-03 eta 2 days, 2:12:21
epoch [6/50] batch [1280/2000] time 2.008 (2.037) data 0.000 (0.001) loss 1.8737 (1.3819) lr 2.0000e-03 eta 2 days, 2:11:43
epoch [6/50] batch [1300/2000] time 2.039 (2.037) data 0.000 (0.001) loss 3.3505 (1.3829) lr 2.0000e-03 eta 2 days, 2:11:03
epoch [6/50] batch [1320/2000] time 2.047 (2.037) data 0.000 (0.001) loss 1.9410 (1.3840) lr 2.0000e-03 eta 2 days, 2:10:32
epoch [6/50] batch [1340/2000] time 2.063 (2.037) data 0.000 (0.001) loss 2.6035 (1.3862) lr 2.0000e-03 eta 2 days, 2:09:55
epoch [6/50] batch [1360/2000] time 2.002 (2.037) data 0.000 (0.001) loss 3.2758 (1.3878) lr 2.0000e-03 eta 2 days, 2:09:05
epoch [6/50] batch [1380/2000] time 2.036 (2.037) data 0.000 (0.001) loss 1.8017 (1.3904) lr 2.0000e-03 eta 2 days, 2:08:18
epoch [6/50] batch [1400/2000] time 2.038 (2.037) data 0.000 (0.001) loss 1.3131 (1.3841) lr 2.0000e-03 eta 2 days, 2:07:37
epoch [6/50] batch [1420/2000] time 2.042 (2.037) data 0.000 (0.001) loss 1.0495 (1.3857) lr 2.0000e-03 eta 2 days, 2:07:00
epoch [6/50] batch [1440/2000] time 2.000 (2.037) data 0.000 (0.001) loss 0.6931 (1.3838) lr 2.0000e-03 eta 2 days, 2:06:19
epoch [6/50] batch [1460/2000] time 2.058 (2.037) data 0.000 (0.001) loss 0.2505 (1.3851) lr 2.0000e-03 eta 2 days, 2:05:46
epoch [6/50] batch [1480/2000] time 2.005 (2.037) data 0.000 (0.001) loss 0.1868 (1.3806) lr 2.0000e-03 eta 2 days, 2:04:59
epoch [6/50] batch [1500/2000] time 2.034 (2.037) data 0.000 (0.001) loss 0.8820 (1.3766) lr 2.0000e-03 eta 2 days, 2:04:18
epoch [6/50] batch [1520/2000] time 2.034 (2.037) data 0.000 (0.001) loss 1.7015 (1.3727) lr 2.0000e-03 eta 2 days, 2:03:43
epoch [6/50] batch [1540/2000] time 2.003 (2.037) data 0.000 (0.001) loss 2.9297 (1.3699) lr 2.0000e-03 eta 2 days, 2:03:04
epoch [6/50] batch [1560/2000] time 2.058 (2.037) data 0.000 (0.001) loss 1.9126 (1.3708) lr 2.0000e-03 eta 2 days, 2:02:24
epoch [6/50] batch [1580/2000] time 2.059 (2.037) data 0.000 (0.001) loss 0.6603 (1.3673) lr 2.0000e-03 eta 2 days, 2:01:51
epoch [6/50] batch [1600/2000] time 2.061 (2.037) data 0.000 (0.001) loss 0.0747 (1.3651) lr 2.0000e-03 eta 2 days, 2:01:14
epoch [6/50] batch [1620/2000] time 2.010 (2.037) data 0.003 (0.001) loss 0.6353 (1.3678) lr 2.0000e-03 eta 2 days, 2:00:29
epoch [6/50] batch [1640/2000] time 2.005 (2.037) data 0.000 (0.001) loss 0.8661 (1.3642) lr 2.0000e-03 eta 2 days, 1:59:45
epoch [6/50] batch [1660/2000] time 2.038 (2.037) data 0.000 (0.001) loss 2.9223 (1.3640) lr 2.0000e-03 eta 2 days, 1:58:59
epoch [6/50] batch [1680/2000] time 2.060 (2.037) data 0.001 (0.001) loss 0.7881 (1.3649) lr 2.0000e-03 eta 2 days, 1:58:16
epoch [6/50] batch [1700/2000] time 2.002 (2.037) data 0.000 (0.001) loss 0.2057 (1.3613) lr 2.0000e-03 eta 2 days, 1:57:30
epoch [6/50] batch [1720/2000] time 1.980 (2.037) data 0.000 (0.001) loss 1.1122 (1.3613) lr 2.0000e-03 eta 2 days, 1:56:52
epoch [6/50] batch [1740/2000] time 1.976 (2.037) data 0.000 (0.001) loss 2.1621 (1.3607) lr 2.0000e-03 eta 2 days, 1:56:16
epoch [6/50] batch [1760/2000] time 2.057 (2.037) data 0.000 (0.001) loss 3.0826 (1.3600) lr 2.0000e-03 eta 2 days, 1:55:35
epoch [6/50] batch [1780/2000] time 2.059 (2.037) data 0.000 (0.001) loss 0.7254 (1.3584) lr 2.0000e-03 eta 2 days, 1:54:52
epoch [6/50] batch [1800/2000] time 2.065 (2.037) data 0.000 (0.001) loss 2.2591 (1.3597) lr 2.0000e-03 eta 2 days, 1:54:11
epoch [6/50] batch [1820/2000] time 2.057 (2.037) data 0.000 (0.001) loss 1.6526 (1.3597) lr 2.0000e-03 eta 2 days, 1:53:32
epoch [6/50] batch [1840/2000] time 2.058 (2.037) data 0.000 (0.001) loss 0.6963 (1.3568) lr 2.0000e-03 eta 2 days, 1:52:57
epoch [6/50] batch [1860/2000] time 2.001 (2.037) data 0.000 (0.001) loss 1.1619 (1.3574) lr 2.0000e-03 eta 2 days, 1:52:14
epoch [6/50] batch [1880/2000] time 2.039 (2.037) data 0.000 (0.001) loss 2.6458 (1.3567) lr 2.0000e-03 eta 2 days, 1:51:29
epoch [6/50] batch [1900/2000] time 2.061 (2.037) data 0.000 (0.001) loss 0.8259 (1.3571) lr 2.0000e-03 eta 2 days, 1:50:50
epoch [6/50] batch [1920/2000] time 2.040 (2.037) data 0.000 (0.001) loss 3.9950 (1.3583) lr 2.0000e-03 eta 2 days, 1:50:08
epoch [6/50] batch [1940/2000] time 2.057 (2.037) data 0.000 (0.001) loss 2.3795 (1.3562) lr 2.0000e-03 eta 2 days, 1:49:16
epoch [6/50] batch [1960/2000] time 2.036 (2.037) data 0.000 (0.001) loss 1.1209 (1.3566) lr 2.0000e-03 eta 2 days, 1:48:41
epoch [6/50] batch [1980/2000] time 2.035 (2.037) data 0.000 (0.001) loss 1.2338 (1.3572) lr 2.0000e-03 eta 2 days, 1:48:01
epoch [6/50] batch [2000/2000] time 2.002 (2.037) data 0.000 (0.001) loss 0.5574 (1.3578) lr 1.9980e-03 eta 2 days, 1:47:11
epoch [7/50] batch [20/2000] time 2.064 (2.071) data 0.000 (0.027) loss 0.8879 (1.5657) lr 1.9980e-03 eta 2 days, 2:36:50
epoch [7/50] batch [40/2000] time 2.058 (2.050) data 0.000 (0.014) loss 1.1088 (1.2728) lr 1.9980e-03 eta 2 days, 2:05:28
epoch [7/50] batch [60/2000] time 2.033 (2.045) data 0.001 (0.009) loss 1.0509 (1.2405) lr 1.9980e-03 eta 2 days, 1:57:27
epoch [7/50] batch [80/2000] time 2.039 (2.041) data 0.000 (0.007) loss 0.6926 (1.3049) lr 1.9980e-03 eta 2 days, 1:51:04
epoch [7/50] batch [100/2000] time 2.059 (2.041) data 0.000 (0.006) loss 0.2772 (1.2976) lr 1.9980e-03 eta 2 days, 1:49:21
epoch [7/50] batch [120/2000] time 2.035 (2.040) data 0.000 (0.005) loss 1.0954 (1.2676) lr 1.9980e-03 eta 2 days, 1:48:32
epoch [7/50] batch [140/2000] time 2.058 (2.040) data 0.000 (0.004) loss 0.8865 (1.2542) lr 1.9980e-03 eta 2 days, 1:46:41
epoch [7/50] batch [160/2000] time 1.978 (2.039) data 0.000 (0.004) loss 0.8482 (1.2327) lr 1.9980e-03 eta 2 days, 1:44:54
epoch [7/50] batch [180/2000] time 2.059 (2.039) data 0.000 (0.003) loss 3.2344 (1.2722) lr 1.9980e-03 eta 2 days, 1:45:01
epoch [7/50] batch [200/2000] time 2.058 (2.039) data 0.000 (0.003) loss 2.4527 (1.2592) lr 1.9980e-03 eta 2 days, 1:43:22
epoch [7/50] batch [220/2000] time 2.003 (2.038) data 0.000 (0.003) loss 1.0524 (1.2258) lr 1.9980e-03 eta 2 days, 1:41:26
epoch [7/50] batch [240/2000] time 2.059 (2.037) data 0.000 (0.002) loss 0.0462 (1.2326) lr 1.9980e-03 eta 2 days, 1:39:52
epoch [7/50] batch [260/2000] time 2.065 (2.037) data 0.000 (0.002) loss 1.0958 (1.2389) lr 1.9980e-03 eta 2 days, 1:39:19
epoch [7/50] batch [280/2000] time 2.064 (2.038) data 0.000 (0.002) loss 2.5297 (1.2488) lr 1.9980e-03 eta 2 days, 1:39:00
epoch [7/50] batch [300/2000] time 2.059 (2.038) data 0.000 (0.002) loss 1.7049 (1.2612) lr 1.9980e-03 eta 2 days, 1:38:16
epoch [7/50] batch [320/2000] time 2.057 (2.038) data 0.000 (0.002) loss 0.8620 (1.2624) lr 1.9980e-03 eta 2 days, 1:37:34
epoch [7/50] batch [340/2000] time 2.007 (2.038) data 0.003 (0.002) loss 1.2523 (1.2580) lr 1.9980e-03 eta 2 days, 1:37:32
epoch [7/50] batch [360/2000] time 2.059 (2.038) data 0.000 (0.002) loss 0.5474 (1.2691) lr 1.9980e-03 eta 2 days, 1:36:50
epoch [7/50] batch [380/2000] time 2.058 (2.038) data 0.000 (0.002) loss 2.0723 (1.2539) lr 1.9980e-03 eta 2 days, 1:35:42
epoch [7/50] batch [400/2000] time 2.057 (2.038) data 0.000 (0.002) loss 2.2014 (1.2658) lr 1.9980e-03 eta 2 days, 1:34:45
epoch [7/50] batch [420/2000] time 2.007 (2.037) data 0.000 (0.002) loss 1.4450 (1.2770) lr 1.9980e-03 eta 2 days, 1:33:13
epoch [7/50] batch [440/2000] time 2.043 (2.037) data 0.000 (0.001) loss 0.4540 (1.2742) lr 1.9980e-03 eta 2 days, 1:32:56
epoch [7/50] batch [460/2000] time 2.035 (2.037) data 0.000 (0.001) loss 2.0439 (1.2891) lr 1.9980e-03 eta 2 days, 1:32:06
epoch [7/50] batch [480/2000] time 2.036 (2.037) data 0.000 (0.001) loss 1.0536 (1.2864) lr 1.9980e-03 eta 2 days, 1:31:35
epoch [7/50] batch [500/2000] time 2.000 (2.037) data 0.000 (0.001) loss 1.9153 (1.2824) lr 1.9980e-03 eta 2 days, 1:30:46
epoch [7/50] batch [520/2000] time 2.002 (2.037) data 0.000 (0.001) loss 0.6541 (1.2774) lr 1.9980e-03 eta 2 days, 1:30:14
epoch [7/50] batch [540/2000] time 2.000 (2.037) data 0.000 (0.001) loss 1.3242 (1.2816) lr 1.9980e-03 eta 2 days, 1:29:26
epoch [7/50] batch [560/2000] time 2.003 (2.037) data 0.000 (0.001) loss 1.0895 (1.2871) lr 1.9980e-03 eta 2 days, 1:28:27
epoch [7/50] batch [580/2000] time 2.060 (2.037) data 0.000 (0.001) loss 1.0787 (1.2888) lr 1.9980e-03 eta 2 days, 1:27:42
epoch [7/50] batch [600/2000] time 2.033 (2.037) data 0.001 (0.001) loss 0.1384 (1.2880) lr 1.9980e-03 eta 2 days, 1:27:02
epoch [7/50] batch [620/2000] time 1.998 (2.037) data 0.000 (0.001) loss 2.5942 (1.2872) lr 1.9980e-03 eta 2 days, 1:26:06
epoch [7/50] batch [640/2000] time 2.056 (2.037) data 0.000 (0.001) loss 0.2209 (1.2800) lr 1.9980e-03 eta 2 days, 1:25:48
epoch [7/50] batch [660/2000] time 2.058 (2.037) data 0.000 (0.001) loss 1.5902 (1.2746) lr 1.9980e-03 eta 2 days, 1:25:02
epoch [7/50] batch [680/2000] time 1.999 (2.037) data 0.000 (0.001) loss 0.7401 (1.2784) lr 1.9980e-03 eta 2 days, 1:24:05
epoch [7/50] batch [700/2000] time 1.995 (2.037) data 0.000 (0.001) loss 1.0737 (1.2747) lr 1.9980e-03 eta 2 days, 1:23:13
epoch [7/50] batch [720/2000] time 2.031 (2.037) data 0.000 (0.001) loss 3.0383 (1.2810) lr 1.9980e-03 eta 2 days, 1:22:30
epoch [7/50] batch [740/2000] time 2.054 (2.037) data 0.000 (0.001) loss 0.1283 (1.2919) lr 1.9980e-03 eta 2 days, 1:21:50
epoch [7/50] batch [760/2000] time 2.056 (2.037) data 0.000 (0.001) loss 1.8776 (1.2894) lr 1.9980e-03 eta 2 days, 1:21:12
epoch [7/50] batch [780/2000] time 2.031 (2.037) data 0.000 (0.001) loss 1.5870 (1.2908) lr 1.9980e-03 eta 2 days, 1:20:26
epoch [7/50] batch [800/2000] time 2.033 (2.036) data 0.000 (0.001) loss 0.9842 (1.2902) lr 1.9980e-03 eta 2 days, 1:19:26
epoch [7/50] batch [820/2000] time 2.060 (2.036) data 0.000 (0.001) loss 0.0394 (1.2932) lr 1.9980e-03 eta 2 days, 1:18:49
epoch [7/50] batch [840/2000] time 2.005 (2.036) data 0.000 (0.001) loss 3.7046 (1.2919) lr 1.9980e-03 eta 2 days, 1:18:04
epoch [7/50] batch [860/2000] time 2.003 (2.036) data 0.000 (0.001) loss 1.8325 (1.2917) lr 1.9980e-03 eta 2 days, 1:17:10
epoch [7/50] batch [880/2000] time 2.055 (2.036) data 0.000 (0.001) loss 2.3968 (1.2856) lr 1.9980e-03 eta 2 days, 1:16:36
epoch [7/50] batch [900/2000] time 2.057 (2.036) data 0.000 (0.001) loss 0.6400 (1.2869) lr 1.9980e-03 eta 2 days, 1:15:44
epoch [7/50] batch [920/2000] time 2.000 (2.036) data 0.000 (0.001) loss 0.5474 (1.2851) lr 1.9980e-03 eta 2 days, 1:15:04
epoch [7/50] batch [940/2000] time 2.055 (2.036) data 0.000 (0.001) loss 2.0046 (1.2827) lr 1.9980e-03 eta 2 days, 1:14:28
epoch [7/50] batch [960/2000] time 2.057 (2.036) data 0.000 (0.001) loss 0.7191 (1.2838) lr 1.9980e-03 eta 2 days, 1:13:45
epoch [7/50] batch [980/2000] time 2.031 (2.036) data 0.000 (0.001) loss 0.9517 (1.2854) lr 1.9980e-03 eta 2 days, 1:12:52
epoch [7/50] batch [1000/2000] time 2.057 (2.036) data 0.000 (0.001) loss 2.2506 (1.2861) lr 1.9980e-03 eta 2 days, 1:12:01
epoch [7/50] batch [1020/2000] time 2.055 (2.036) data 0.000 (0.001) loss 0.5743 (1.2939) lr 1.9980e-03 eta 2 days, 1:11:22
epoch [7/50] batch [1040/2000] time 2.056 (2.036) data 0.000 (0.001) loss 1.4551 (1.2946) lr 1.9980e-03 eta 2 days, 1:10:37
epoch [7/50] batch [1060/2000] time 2.033 (2.036) data 0.000 (0.001) loss 1.5003 (1.2909) lr 1.9980e-03 eta 2 days, 1:09:58
epoch [7/50] batch [1080/2000] time 1.998 (2.036) data 0.000 (0.001) loss 0.8290 (1.2951) lr 1.9980e-03 eta 2 days, 1:09:06
epoch [7/50] batch [1100/2000] time 2.059 (2.036) data 0.000 (0.001) loss 0.3530 (1.2930) lr 1.9980e-03 eta 2 days, 1:08:25
epoch [7/50] batch [1120/2000] time 2.064 (2.036) data 0.000 (0.001) loss 0.9140 (1.2886) lr 1.9980e-03 eta 2 days, 1:07:44
epoch [7/50] batch [1140/2000] time 2.060 (2.036) data 0.001 (0.001) loss 0.4849 (1.2911) lr 1.9980e-03 eta 2 days, 1:06:50
epoch [7/50] batch [1160/2000] time 2.001 (2.036) data 0.000 (0.001) loss 0.7144 (1.2953) lr 1.9980e-03 eta 2 days, 1:06:05
epoch [7/50] batch [1180/2000] time 2.031 (2.035) data 0.000 (0.001) loss 0.8397 (1.2954) lr 1.9980e-03 eta 2 days, 1:05:12
epoch [7/50] batch [1200/2000] time 2.035 (2.035) data 0.000 (0.001) loss 1.3556 (1.2919) lr 1.9980e-03 eta 2 days, 1:04:27
epoch [7/50] batch [1220/2000] time 2.058 (2.035) data 0.000 (0.001) loss 2.7416 (1.2950) lr 1.9980e-03 eta 2 days, 1:03:53
epoch [7/50] batch [1240/2000] time 2.032 (2.036) data 0.000 (0.001) loss 0.1195 (1.2945) lr 1.9980e-03 eta 2 days, 1:03:23
epoch [7/50] batch [1260/2000] time 2.036 (2.036) data 0.000 (0.001) loss 3.1300 (1.2971) lr 1.9980e-03 eta 2 days, 1:02:42
epoch [7/50] batch [1280/2000] time 2.035 (2.036) data 0.000 (0.001) loss 0.3758 (1.2981) lr 1.9980e-03 eta 2 days, 1:02:02
epoch [7/50] batch [1300/2000] time 2.060 (2.036) data 0.000 (0.001) loss 1.3672 (1.2980) lr 1.9980e-03 eta 2 days, 1:01:27
epoch [7/50] batch [1320/2000] time 1.974 (2.036) data 0.000 (0.001) loss 2.8335 (1.2989) lr 1.9980e-03 eta 2 days, 1:00:42
epoch [7/50] batch [1340/2000] time 2.059 (2.036) data 0.000 (0.001) loss 3.8661 (1.2985) lr 1.9980e-03 eta 2 days, 1:00:02
epoch [7/50] batch [1360/2000] time 2.058 (2.036) data 0.000 (0.001) loss 1.8279 (1.2995) lr 1.9980e-03 eta 2 days, 0:59:20
epoch [7/50] batch [1380/2000] time 2.034 (2.035) data 0.000 (0.001) loss 0.7140 (1.2974) lr 1.9980e-03 eta 2 days, 0:58:29
epoch [7/50] batch [1400/2000] time 1.999 (2.035) data 0.000 (0.001) loss 0.6007 (1.3008) lr 1.9980e-03 eta 2 days, 0:57:48
epoch [7/50] batch [1420/2000] time 2.000 (2.035) data 0.000 (0.001) loss 0.6479 (1.2976) lr 1.9980e-03 eta 2 days, 0:57:12
epoch [7/50] batch [1440/2000] time 1.974 (2.036) data 0.000 (0.001) loss 1.2036 (1.2947) lr 1.9980e-03 eta 2 days, 0:56:36
epoch [7/50] batch [1460/2000] time 2.057 (2.035) data 0.000 (0.001) loss 1.4679 (1.2939) lr 1.9980e-03 eta 2 days, 0:55:48
epoch [7/50] batch [1480/2000] time 2.055 (2.035) data 0.000 (0.001) loss 0.0439 (1.2887) lr 1.9980e-03 eta 2 days, 0:55:05
epoch [7/50] batch [1500/2000] time 2.056 (2.035) data 0.000 (0.001) loss 2.2072 (1.2849) lr 1.9980e-03 eta 2 days, 0:54:21
epoch [7/50] batch [1520/2000] time 2.061 (2.035) data 0.000 (0.001) loss 0.3361 (1.2813) lr 1.9980e-03 eta 2 days, 0:53:40
epoch [7/50] batch [1540/2000] time 2.059 (2.035) data 0.000 (0.001) loss 1.6345 (1.2858) lr 1.9980e-03 eta 2 days, 0:53:01
epoch [7/50] batch [1560/2000] time 2.063 (2.035) data 0.000 (0.001) loss 4.8959 (1.2905) lr 1.9980e-03 eta 2 days, 0:52:18
epoch [7/50] batch [1580/2000] time 2.033 (2.035) data 0.000 (0.001) loss 0.7247 (1.2885) lr 1.9980e-03 eta 2 days, 0:51:37
epoch [7/50] batch [1600/2000] time 1.978 (2.035) data 0.000 (0.001) loss 1.1692 (1.2881) lr 1.9980e-03 eta 2 days, 0:51:04
epoch [7/50] batch [1620/2000] time 2.036 (2.035) data 0.000 (0.001) loss 1.5577 (1.2844) lr 1.9980e-03 eta 2 days, 0:50:26
epoch [7/50] batch [1640/2000] time 2.060 (2.035) data 0.000 (0.001) loss 0.2913 (1.2822) lr 1.9980e-03 eta 2 days, 0:49:45
epoch [7/50] batch [1660/2000] time 2.062 (2.035) data 0.000 (0.001) loss 1.5730 (1.2798) lr 1.9980e-03 eta 2 days, 0:49:03
epoch [7/50] batch [1680/2000] time 2.057 (2.035) data 0.001 (0.001) loss 0.2970 (1.2765) lr 1.9980e-03 eta 2 days, 0:48:20
epoch [7/50] batch [1700/2000] time 2.062 (2.035) data 0.000 (0.001) loss 1.7422 (1.2828) lr 1.9980e-03 eta 2 days, 0:47:39
epoch [7/50] batch [1720/2000] time 2.017 (2.035) data 0.000 (0.001) loss 0.6012 (1.2836) lr 1.9980e-03 eta 2 days, 0:46:58
epoch [7/50] batch [1740/2000] time 2.040 (2.035) data 0.000 (0.001) loss 0.5662 (1.2805) lr 1.9980e-03 eta 2 days, 0:46:10
epoch [7/50] batch [1760/2000] time 2.058 (2.035) data 0.000 (0.001) loss 0.1324 (1.2799) lr 1.9980e-03 eta 2 days, 0:45:35
epoch [7/50] batch [1780/2000] time 2.060 (2.035) data 0.000 (0.001) loss 1.8672 (1.2821) lr 1.9980e-03 eta 2 days, 0:44:48
epoch [7/50] batch [1800/2000] time 2.062 (2.036) data 0.000 (0.001) loss 0.2850 (1.2802) lr 1.9980e-03 eta 2 days, 0:44:21
epoch [7/50] batch [1820/2000] time 2.059 (2.036) data 0.000 (0.001) loss 2.0144 (1.2800) lr 1.9980e-03 eta 2 days, 0:43:48
epoch [7/50] batch [1840/2000] time 2.056 (2.036) data 0.000 (0.001) loss 1.7370 (1.2802) lr 1.9980e-03 eta 2 days, 0:43:14
epoch [7/50] batch [1860/2000] time 2.059 (2.036) data 0.000 (0.001) loss 0.0729 (1.2786) lr 1.9980e-03 eta 2 days, 0:42:35
epoch [7/50] batch [1880/2000] time 2.056 (2.036) data 0.000 (0.001) loss 0.9017 (1.2778) lr 1.9980e-03 eta 2 days, 0:42:00
epoch [7/50] batch [1900/2000] time 2.060 (2.036) data 0.000 (0.001) loss 3.8940 (1.2784) lr 1.9980e-03 eta 2 days, 0:41:22
epoch [7/50] batch [1920/2000] time 2.055 (2.036) data 0.000 (0.001) loss 1.1756 (1.2746) lr 1.9980e-03 eta 2 days, 0:40:40
epoch [7/50] batch [1940/2000] time 1.979 (2.036) data 0.000 (0.001) loss 1.4880 (1.2740) lr 1.9980e-03 eta 2 days, 0:39:53
epoch [7/50] batch [1960/2000] time 2.054 (2.036) data 0.000 (0.001) loss 2.0725 (1.2711) lr 1.9980e-03 eta 2 days, 0:39:17
epoch [7/50] batch [1980/2000] time 2.001 (2.036) data 0.000 (0.001) loss 1.0784 (1.2722) lr 1.9980e-03 eta 2 days, 0:38:34
epoch [7/50] batch [2000/2000] time 2.061 (2.036) data 0.000 (0.001) loss 0.8834 (1.2703) lr 1.9921e-03 eta 2 days, 0:37:58
epoch [8/50] batch [20/2000] time 2.043 (2.064) data 0.000 (0.027) loss 0.7383 (1.0900) lr 1.9921e-03 eta 2 days, 1:17:13
epoch [8/50] batch [40/2000] time 2.060 (2.054) data 0.000 (0.014) loss 0.8581 (1.2104) lr 1.9921e-03 eta 2 days, 1:02:06
epoch [8/50] batch [60/2000] time 2.057 (2.047) data 0.001 (0.009) loss 1.2146 (1.3188) lr 1.9921e-03 eta 2 days, 0:52:20
epoch [8/50] batch [80/2000] time 2.032 (2.044) data 0.000 (0.007) loss 0.3655 (1.2610) lr 1.9921e-03 eta 2 days, 0:47:16
epoch [8/50] batch [100/2000] time 2.004 (2.041) data 0.000 (0.006) loss 2.2324 (1.2317) lr 1.9921e-03 eta 2 days, 0:41:30
epoch [8/50] batch [120/2000] time 2.004 (2.040) data 0.000 (0.005) loss 1.6751 (1.2564) lr 1.9921e-03 eta 2 days, 0:39:36
epoch [8/50] batch [140/2000] time 2.003 (2.040) data 0.000 (0.004) loss 0.8910 (1.1884) lr 1.9921e-03 eta 2 days, 0:38:46
epoch [8/50] batch [160/2000] time 2.057 (2.039) data 0.000 (0.004) loss 2.4919 (1.1949) lr 1.9921e-03 eta 2 days, 0:37:10
epoch [8/50] batch [180/2000] time 2.041 (2.039) data 0.000 (0.003) loss 0.1335 (1.1843) lr 1.9921e-03 eta 2 days, 0:36:27
epoch [8/50] batch [200/2000] time 2.006 (2.039) data 0.000 (0.003) loss 1.0893 (1.2095) lr 1.9921e-03 eta 2 days, 0:36:09
epoch [8/50] batch [220/2000] time 2.059 (2.039) data 0.000 (0.003) loss 2.5489 (1.2170) lr 1.9921e-03 eta 2 days, 0:35:24
epoch [8/50] batch [240/2000] time 2.001 (2.039) data 0.000 (0.002) loss 0.8060 (1.2209) lr 1.9921e-03 eta 2 days, 0:34:54
epoch [8/50] batch [260/2000] time 2.036 (2.039) data 0.000 (0.002) loss 2.6386 (1.2258) lr 1.9921e-03 eta 2 days, 0:33:14
epoch [8/50] batch [280/2000] time 2.003 (2.038) data 0.000 (0.002) loss 0.7452 (1.2094) lr 1.9921e-03 eta 2 days, 0:31:52
epoch [8/50] batch [300/2000] time 2.064 (2.038) data 0.000 (0.002) loss 0.3586 (1.2009) lr 1.9921e-03 eta 2 days, 0:31:17
epoch [8/50] batch [320/2000] time 2.056 (2.038) data 0.000 (0.002) loss 2.9774 (1.2195) lr 1.9921e-03 eta 2 days, 0:29:57
epoch [8/50] batch [340/2000] time 2.056 (2.038) data 0.000 (0.002) loss 0.3982 (1.1947) lr 1.9921e-03 eta 2 days, 0:29:16
epoch [8/50] batch [360/2000] time 2.002 (2.037) data 0.000 (0.002) loss 3.2689 (1.1937) lr 1.9921e-03 eta 2 days, 0:28:03
epoch [8/50] batch [380/2000] time 2.062 (2.037) data 0.000 (0.002) loss 2.5685 (1.1870) lr 1.9921e-03 eta 2 days, 0:26:59
epoch [8/50] batch [400/2000] time 2.069 (2.038) data 0.000 (0.002) loss 0.2588 (1.1851) lr 1.9921e-03 eta 2 days, 0:26:52
epoch [8/50] batch [420/2000] time 2.064 (2.038) data 0.000 (0.002) loss 3.5435 (1.2097) lr 1.9921e-03 eta 2 days, 0:26:09
epoch [8/50] batch [440/2000] time 2.059 (2.037) data 0.000 (0.001) loss 0.3635 (1.2187) lr 1.9921e-03 eta 2 days, 0:25:08
epoch [8/50] batch [460/2000] time 2.001 (2.037) data 0.000 (0.001) loss 2.1550 (1.2251) lr 1.9921e-03 eta 2 days, 0:24:31
epoch [8/50] batch [480/2000] time 2.003 (2.037) data 0.000 (0.001) loss 3.0435 (1.2354) lr 1.9921e-03 eta 2 days, 0:23:50
epoch [8/50] batch [500/2000] time 2.056 (2.037) data 0.000 (0.001) loss 0.7731 (1.2230) lr 1.9921e-03 eta 2 days, 0:23:00
epoch [8/50] batch [520/2000] time 2.062 (2.037) data 0.000 (0.001) loss 0.4544 (1.2273) lr 1.9921e-03 eta 2 days, 0:22:04
epoch [8/50] batch [540/2000] time 2.062 (2.037) data 0.000 (0.001) loss 0.6200 (1.2196) lr 1.9921e-03 eta 2 days, 0:21:23
epoch [8/50] batch [560/2000] time 2.035 (2.037) data 0.000 (0.001) loss 0.3708 (1.2106) lr 1.9921e-03 eta 2 days, 0:20:09
epoch [8/50] batch [580/2000] time 2.058 (2.037) data 0.000 (0.001) loss 1.4595 (1.2084) lr 1.9921e-03 eta 2 days, 0:19:22
epoch [8/50] batch [600/2000] time 2.003 (2.036) data 0.001 (0.001) loss 1.0987 (1.2104) lr 1.9921e-03 eta 2 days, 0:18:31
epoch [8/50] batch [620/2000] time 2.033 (2.036) data 0.000 (0.001) loss 0.2828 (1.2023) lr 1.9921e-03 eta 2 days, 0:17:42
epoch [8/50] batch [640/2000] time 2.056 (2.036) data 0.000 (0.001) loss 0.8105 (1.2083) lr 1.9921e-03 eta 2 days, 0:17:02
epoch [8/50] batch [660/2000] time 2.033 (2.036) data 0.000 (0.001) loss 1.7305 (1.2176) lr 1.9921e-03 eta 2 days, 0:16:16
epoch [8/50] batch [680/2000] time 2.060 (2.036) data 0.000 (0.001) loss 2.0824 (1.2148) lr 1.9921e-03 eta 2 days, 0:15:26
epoch [8/50] batch [700/2000] time 2.058 (2.036) data 0.000 (0.001) loss 0.2895 (1.2162) lr 1.9921e-03 eta 2 days, 0:14:43
epoch [8/50] batch [720/2000] time 2.057 (2.036) data 0.000 (0.001) loss 0.3148 (1.2201) lr 1.9921e-03 eta 2 days, 0:13:41
epoch [8/50] batch [740/2000] time 2.035 (2.036) data 0.000 (0.001) loss 1.8020 (1.2317) lr 1.9921e-03 eta 2 days, 0:12:56
epoch [8/50] batch [760/2000] time 2.002 (2.036) data 0.000 (0.001) loss 1.6094 (1.2314) lr 1.9921e-03 eta 2 days, 0:12:10
epoch [8/50] batch [780/2000] time 2.057 (2.036) data 0.000 (0.001) loss 2.7085 (1.2342) lr 1.9921e-03 eta 2 days, 0:11:27
epoch [8/50] batch [800/2000] time 2.059 (2.036) data 0.000 (0.001) loss 0.0834 (1.2323) lr 1.9921e-03 eta 2 days, 0:10:39
epoch [8/50] batch [820/2000] time 2.038 (2.036) data 0.000 (0.001) loss 0.0448 (1.2303) lr 1.9921e-03 eta 2 days, 0:09:43
epoch [8/50] batch [840/2000] time 2.004 (2.035) data 0.000 (0.001) loss 1.3715 (1.2412) lr 1.9921e-03 eta 2 days, 0:08:50
epoch [8/50] batch [860/2000] time 1.999 (2.035) data 0.000 (0.001) loss 1.2781 (1.2482) lr 1.9921e-03 eta 2 days, 0:08:19
epoch [8/50] batch [880/2000] time 2.061 (2.035) data 0.000 (0.001) loss 0.1145 (1.2516) lr 1.9921e-03 eta 2 days, 0:07:32
epoch [8/50] batch [900/2000] time 2.060 (2.035) data 0.000 (0.001) loss 3.2714 (1.2559) lr 1.9921e-03 eta 2 days, 0:06:55
epoch [8/50] batch [920/2000] time 2.009 (2.035) data 0.000 (0.001) loss 0.8706 (1.2550) lr 1.9921e-03 eta 2 days, 0:06:16
epoch [8/50] batch [940/2000] time 2.041 (2.036) data 0.000 (0.001) loss 1.0496 (1.2605) lr 1.9921e-03 eta 2 days, 0:05:47
epoch [8/50] batch [960/2000] time 2.004 (2.036) data 0.000 (0.001) loss 1.1093 (1.2613) lr 1.9921e-03 eta 2 days, 0:05:11
epoch [8/50] batch [980/2000] time 2.004 (2.036) data 0.000 (0.001) loss 1.3543 (1.2584) lr 1.9921e-03 eta 2 days, 0:04:24
epoch [8/50] batch [1000/2000] time 2.002 (2.036) data 0.000 (0.001) loss 0.1140 (1.2520) lr 1.9921e-03 eta 2 days, 0:03:54
epoch [8/50] batch [1020/2000] time 2.058 (2.036) data 0.000 (0.001) loss 1.0797 (1.2523) lr 1.9921e-03 eta 2 days, 0:03:18
epoch [8/50] batch [1040/2000] time 2.062 (2.036) data 0.000 (0.001) loss 0.7706 (1.2499) lr 1.9921e-03 eta 2 days, 0:02:43
epoch [8/50] batch [1060/2000] time 2.004 (2.036) data 0.000 (0.001) loss 2.5935 (1.2534) lr 1.9921e-03 eta 2 days, 0:02:06
epoch [8/50] batch [1080/2000] time 2.058 (2.036) data 0.000 (0.001) loss 0.6348 (1.2569) lr 1.9921e-03 eta 2 days, 0:01:34
epoch [8/50] batch [1100/2000] time 2.063 (2.036) data 0.000 (0.001) loss 3.3294 (1.2575) lr 1.9921e-03 eta 2 days, 0:00:46
epoch [8/50] batch [1120/2000] time 2.059 (2.036) data 0.000 (0.001) loss 0.8680 (1.2534) lr 1.9921e-03 eta 2 days, 0:00:14
epoch [8/50] batch [1140/2000] time 2.060 (2.036) data 0.000 (0.001) loss 0.2412 (1.2584) lr 1.9921e-03 eta 1 day, 23:59:46
epoch [8/50] batch [1160/2000] time 2.037 (2.036) data 0.000 (0.001) loss 0.7231 (1.2554) lr 1.9921e-03 eta 1 day, 23:59:06
epoch [8/50] batch [1180/2000] time 2.035 (2.036) data 0.000 (0.001) loss 0.4479 (1.2526) lr 1.9921e-03 eta 1 day, 23:58:21
epoch [8/50] batch [1200/2000] time 2.059 (2.036) data 0.000 (0.001) loss 0.6354 (1.2542) lr 1.9921e-03 eta 1 day, 23:57:31
epoch [8/50] batch [1220/2000] time 2.028 (2.036) data 0.000 (0.001) loss 0.1195 (1.2531) lr 1.9921e-03 eta 1 day, 23:56:43
epoch [8/50] batch [1240/2000] time 1.995 (2.036) data 0.000 (0.001) loss 0.3334 (1.2505) lr 1.9921e-03 eta 1 day, 23:55:47
epoch [8/50] batch [1260/2000] time 1.996 (2.036) data 0.000 (0.001) loss 1.7000 (1.2516) lr 1.9921e-03 eta 1 day, 23:55:05
epoch [8/50] batch [1280/2000] time 2.004 (2.036) data 0.000 (0.001) loss 0.6597 (1.2465) lr 1.9921e-03 eta 1 day, 23:54:20
epoch [8/50] batch [1300/2000] time 2.055 (2.036) data 0.000 (0.001) loss 0.8518 (1.2486) lr 1.9921e-03 eta 1 day, 23:53:27
epoch [8/50] batch [1320/2000] time 1.998 (2.035) data 0.000 (0.001) loss 0.3721 (1.2423) lr 1.9921e-03 eta 1 day, 23:52:39
epoch [8/50] batch [1340/2000] time 2.030 (2.035) data 0.000 (0.001) loss 1.2053 (1.2414) lr 1.9921e-03 eta 1 day, 23:51:53
epoch [8/50] batch [1360/2000] time 2.052 (2.035) data 0.000 (0.001) loss 0.5270 (1.2444) lr 1.9921e-03 eta 1 day, 23:51:10
epoch [8/50] batch [1380/2000] time 2.059 (2.035) data 0.000 (0.001) loss 1.4846 (1.2425) lr 1.9921e-03 eta 1 day, 23:50:21
epoch [8/50] batch [1400/2000] time 2.033 (2.035) data 0.000 (0.001) loss 0.2967 (1.2390) lr 1.9921e-03 eta 1 day, 23:49:47
epoch [8/50] batch [1420/2000] time 2.056 (2.035) data 0.000 (0.001) loss 1.4587 (1.2426) lr 1.9921e-03 eta 1 day, 23:49:09
epoch [8/50] batch [1440/2000] time 2.057 (2.035) data 0.000 (0.001) loss 0.8154 (1.2393) lr 1.9921e-03 eta 1 day, 23:48:23
epoch [8/50] batch [1460/2000] time 2.050 (2.035) data 0.000 (0.001) loss 0.5083 (1.2387) lr 1.9921e-03 eta 1 day, 23:47:35
epoch [8/50] batch [1480/2000] time 2.051 (2.035) data 0.000 (0.001) loss 0.2164 (1.2366) lr 1.9921e-03 eta 1 day, 23:46:48
epoch [8/50] batch [1500/2000] time 2.052 (2.035) data 0.000 (0.001) loss 0.2393 (1.2336) lr 1.9921e-03 eta 1 day, 23:46:06
epoch [8/50] batch [1520/2000] time 2.001 (2.035) data 0.000 (0.001) loss 0.0587 (1.2383) lr 1.9921e-03 eta 1 day, 23:45:28
epoch [8/50] batch [1540/2000] time 1.973 (2.035) data 0.000 (0.001) loss 0.0552 (1.2358) lr 1.9921e-03 eta 1 day, 23:44:33
epoch [8/50] batch [1560/2000] time 2.054 (2.035) data 0.000 (0.001) loss 0.7841 (1.2406) lr 1.9921e-03 eta 1 day, 23:43:49
epoch [8/50] batch [1580/2000] time 1.980 (2.035) data 0.000 (0.001) loss 1.6016 (1.2437) lr 1.9921e-03 eta 1 day, 23:43:04
epoch [8/50] batch [1600/2000] time 2.032 (2.035) data 0.000 (0.001) loss 2.4731 (1.2455) lr 1.9921e-03 eta 1 day, 23:42:26
epoch [8/50] batch [1620/2000] time 2.033 (2.035) data 0.000 (0.001) loss 0.6131 (1.2425) lr 1.9921e-03 eta 1 day, 23:41:45
epoch [8/50] batch [1640/2000] time 2.062 (2.035) data 0.000 (0.001) loss 0.5434 (1.2408) lr 1.9921e-03 eta 1 day, 23:41:04
epoch [8/50] batch [1660/2000] time 2.005 (2.035) data 0.000 (0.001) loss 1.0145 (1.2390) lr 1.9921e-03 eta 1 day, 23:40:19
epoch [8/50] batch [1680/2000] time 2.008 (2.035) data 0.000 (0.001) loss 0.6383 (1.2376) lr 1.9921e-03 eta 1 day, 23:39:50
epoch [8/50] batch [1700/2000] time 2.066 (2.035) data 0.000 (0.001) loss 2.0787 (1.2357) lr 1.9921e-03 eta 1 day, 23:39:11
epoch [8/50] batch [1720/2000] time 2.037 (2.035) data 0.000 (0.001) loss 0.3867 (1.2347) lr 1.9921e-03 eta 1 day, 23:38:35
epoch [8/50] batch [1740/2000] time 2.010 (2.042) data 0.000 (0.001) loss 1.8523 (1.2327) lr 1.9921e-03 eta 1 day, 23:48:16
epoch [8/50] batch [1760/2000] time 2.063 (2.042) data 0.000 (0.001) loss 2.7105 (1.2344) lr 1.9921e-03 eta 1 day, 23:47:34
epoch [8/50] batch [1780/2000] time 2.064 (2.042) data 0.000 (0.001) loss 0.5794 (1.2367) lr 1.9921e-03 eta 1 day, 23:46:51
epoch [8/50] batch [1800/2000] time 2.011 (2.042) data 0.000 (0.001) loss 1.7163 (1.2345) lr 1.9921e-03 eta 1 day, 23:46:10
epoch [8/50] batch [1820/2000] time 2.062 (2.042) data 0.000 (0.001) loss 2.4304 (1.2361) lr 1.9921e-03 eta 1 day, 23:45:19
epoch [8/50] batch [1840/2000] time 2.062 (2.042) data 0.000 (0.001) loss 0.6964 (1.2307) lr 1.9921e-03 eta 1 day, 23:44:33
epoch [8/50] batch [1860/2000] time 2.060 (2.042) data 0.000 (0.001) loss 0.3754 (1.2332) lr 1.9921e-03 eta 1 day, 23:43:54
epoch [8/50] batch [1880/2000] time 2.006 (2.042) data 0.000 (0.001) loss 3.0555 (1.2311) lr 1.9921e-03 eta 1 day, 23:42:58
epoch [8/50] batch [1900/2000] time 2.035 (2.042) data 0.000 (0.001) loss 0.6448 (1.2296) lr 1.9921e-03 eta 1 day, 23:42:17
epoch [8/50] batch [1920/2000] time 2.006 (2.042) data 0.000 (0.001) loss 1.3626 (1.2315) lr 1.9921e-03 eta 1 day, 23:41:37
epoch [8/50] batch [1940/2000] time 2.004 (2.042) data 0.000 (0.001) loss 2.6125 (1.2294) lr 1.9921e-03 eta 1 day, 23:40:54
epoch [8/50] batch [1960/2000] time 2.037 (2.042) data 0.000 (0.001) loss 1.2750 (1.2295) lr 1.9921e-03 eta 1 day, 23:40:04
epoch [8/50] batch [1980/2000] time 2.061 (2.042) data 0.000 (0.001) loss 1.2713 (1.2366) lr 1.9921e-03 eta 1 day, 23:39:23
epoch [8/50] batch [2000/2000] time 2.060 (2.042) data 0.000 (0.001) loss 0.5530 (1.2377) lr 1.9823e-03 eta 1 day, 23:38:44
epoch [9/50] batch [20/2000] time 2.019 (2.082) data 0.000 (0.036) loss 0.3760 (1.1385) lr 1.9823e-03 eta 2 days, 0:34:22
epoch [9/50] batch [40/2000] time 2.037 (2.060) data 0.000 (0.018) loss 0.5619 (1.1682) lr 1.9823e-03 eta 2 days, 0:03:15
epoch [9/50] batch [60/2000] time 2.057 (2.052) data 0.001 (0.012) loss 0.0646 (1.0797) lr 1.9823e-03 eta 1 day, 23:51:02
epoch [9/50] batch [80/2000] time 2.009 (2.049) data 0.000 (0.009) loss 2.8517 (1.1505) lr 1.9823e-03 eta 1 day, 23:45:58
epoch [9/50] batch [100/2000] time 2.060 (2.050) data 0.000 (0.007) loss 0.8385 (1.1207) lr 1.9823e-03 eta 1 day, 23:46:01
epoch [9/50] batch [120/2000] time 2.035 (2.046) data 0.000 (0.006) loss 0.3306 (1.1319) lr 1.9823e-03 eta 1 day, 23:40:23
epoch [9/50] batch [140/2000] time 2.034 (2.045) data 0.000 (0.005) loss 1.6491 (1.1645) lr 1.9823e-03 eta 1 day, 23:37:55
epoch [9/50] batch [160/2000] time 1.978 (2.044) data 0.000 (0.005) loss 0.5990 (1.1781) lr 1.9823e-03 eta 1 day, 23:35:54
epoch [9/50] batch [180/2000] time 2.001 (2.042) data 0.000 (0.004) loss 2.1952 (1.1406) lr 1.9823e-03 eta 1 day, 23:32:58
epoch [9/50] batch [200/2000] time 2.005 (2.041) data 0.000 (0.004) loss 3.4555 (1.1651) lr 1.9823e-03 eta 1 day, 23:31:13
epoch [9/50] batch [220/2000] time 2.011 (2.040) data 0.000 (0.004) loss 1.4904 (1.1567) lr 1.9823e-03 eta 1 day, 23:29:10
epoch [9/50] batch [240/2000] time 2.003 (2.040) data 0.000 (0.003) loss 2.2902 (1.1717) lr 1.9823e-03 eta 1 day, 23:27:26
epoch [9/50] batch [260/2000] time 1.977 (2.039) data 0.000 (0.003) loss 1.3183 (1.1644) lr 1.9823e-03 eta 1 day, 23:25:32
epoch [9/50] batch [280/2000] time 2.062 (2.039) data 0.000 (0.003) loss 1.6945 (1.1683) lr 1.9823e-03 eta 1 day, 23:25:00
epoch [9/50] batch [300/2000] time 2.058 (2.039) data 0.000 (0.003) loss 0.4542 (1.1626) lr 1.9823e-03 eta 1 day, 23:24:45
epoch [9/50] batch [320/2000] time 2.060 (2.039) data 0.000 (0.003) loss 0.0341 (1.1768) lr 1.9823e-03 eta 1 day, 23:24:02
epoch [9/50] batch [340/2000] time 2.057 (2.039) data 0.000 (0.002) loss 1.0976 (1.1770) lr 1.9823e-03 eta 1 day, 23:22:54
epoch [9/50] batch [360/2000] time 2.060 (2.039) data 0.000 (0.002) loss 0.0588 (1.1558) lr 1.9823e-03 eta 1 day, 23:22:22
epoch [9/50] batch [380/2000] time 2.058 (2.039) data 0.000 (0.002) loss 0.3316 (1.1731) lr 1.9823e-03 eta 1 day, 23:21:09
epoch [9/50] batch [400/2000] time 2.003 (2.039) data 0.000 (0.002) loss 0.5621 (1.1641) lr 1.9823e-03 eta 1 day, 23:20:29
epoch [9/50] batch [420/2000] time 1.978 (2.038) data 0.000 (0.002) loss 1.5840 (1.1749) lr 1.9823e-03 eta 1 day, 23:19:21
epoch [9/50] batch [440/2000] time 2.036 (2.038) data 0.000 (0.002) loss 1.6319 (1.1981) lr 1.9823e-03 eta 1 day, 23:18:04
epoch [9/50] batch [460/2000] time 2.035 (2.038) data 0.000 (0.002) loss 2.4823 (1.2083) lr 1.9823e-03 eta 1 day, 23:17:37
epoch [9/50] batch [480/2000] time 2.008 (2.038) data 0.000 (0.002) loss 0.3093 (1.2193) lr 1.9823e-03 eta 1 day, 23:16:49
epoch [9/50] batch [500/2000] time 2.058 (2.038) data 0.000 (0.002) loss 2.4257 (1.2318) lr 1.9823e-03 eta 1 day, 23:16:21
epoch [9/50] batch [520/2000] time 2.004 (2.038) data 0.000 (0.002) loss 0.9412 (1.2240) lr 1.9823e-03 eta 1 day, 23:15:22
epoch [9/50] batch [540/2000] time 1.979 (2.038) data 0.000 (0.002) loss 2.0816 (1.2185) lr 1.9823e-03 eta 1 day, 23:14:23
epoch [9/50] batch [560/2000] time 2.062 (2.038) data 0.005 (0.002) loss 3.7361 (1.2261) lr 1.9823e-03 eta 1 day, 23:13:40
epoch [9/50] batch [580/2000] time 1.983 (2.038) data 0.000 (0.002) loss 0.5908 (1.2216) lr 1.9823e-03 eta 1 day, 23:12:48
epoch [9/50] batch [600/2000] time 2.035 (2.037) data 0.001 (0.002) loss 0.6645 (1.2315) lr 1.9823e-03 eta 1 day, 23:11:50
epoch [9/50] batch [620/2000] time 2.053 (2.037) data 0.000 (0.002) loss 0.5317 (1.2293) lr 1.9823e-03 eta 1 day, 23:11:14
epoch [9/50] batch [640/2000] time 2.007 (2.037) data 0.000 (0.002) loss 0.3094 (1.2315) lr 1.9823e-03 eta 1 day, 23:10:35
epoch [9/50] batch [660/2000] time 2.010 (2.037) data 0.000 (0.001) loss 0.9103 (1.2314) lr 1.9823e-03 eta 1 day, 23:09:59
epoch [9/50] batch [680/2000] time 2.060 (2.037) data 0.000 (0.001) loss 0.5382 (1.2328) lr 1.9823e-03 eta 1 day, 23:09:20
epoch [9/50] batch [700/2000] time 2.006 (2.037) data 0.000 (0.001) loss 1.3886 (1.2371) lr 1.9823e-03 eta 1 day, 23:08:34
epoch [9/50] batch [720/2000] time 2.063 (2.037) data 0.000 (0.001) loss 2.6151 (1.2385) lr 1.9823e-03 eta 1 day, 23:08:02
epoch [9/50] batch [740/2000] time 2.063 (2.038) data 0.000 (0.001) loss 1.1764 (1.2328) lr 1.9823e-03 eta 1 day, 23:07:54
epoch [9/50] batch [760/2000] time 2.062 (2.038) data 0.000 (0.001) loss 2.6975 (1.2374) lr 1.9823e-03 eta 1 day, 23:07:16
epoch [9/50] batch [780/2000] time 2.057 (2.038) data 0.000 (0.001) loss 0.9031 (1.2426) lr 1.9823e-03 eta 1 day, 23:06:37
epoch [9/50] batch [800/2000] time 2.059 (2.038) data 0.000 (0.001) loss 0.7387 (1.2501) lr 1.9823e-03 eta 1 day, 23:06:07
epoch [9/50] batch [820/2000] time 2.057 (2.038) data 0.000 (0.001) loss 0.7679 (1.2427) lr 1.9823e-03 eta 1 day, 23:05:21
epoch [9/50] batch [840/2000] time 2.034 (2.038) data 0.000 (0.001) loss 0.6242 (1.2466) lr 1.9823e-03 eta 1 day, 23:04:42
epoch [9/50] batch [860/2000] time 2.052 (2.038) data 0.000 (0.001) loss 1.8850 (1.2454) lr 1.9823e-03 eta 1 day, 23:03:49
epoch [9/50] batch [880/2000] time 1.999 (2.038) data 0.000 (0.001) loss 0.2702 (1.2423) lr 1.9823e-03 eta 1 day, 23:02:42
epoch [9/50] batch [900/2000] time 2.052 (2.037) data 0.000 (0.001) loss 2.2618 (1.2395) lr 1.9823e-03 eta 1 day, 23:01:56
epoch [9/50] batch [920/2000] time 2.053 (2.038) data 0.000 (0.001) loss 0.2535 (1.2408) lr 1.9823e-03 eta 1 day, 23:01:16
epoch [9/50] batch [940/2000] time 2.050 (2.037) data 0.000 (0.001) loss 1.4846 (1.2350) lr 1.9823e-03 eta 1 day, 23:00:24
epoch [9/50] batch [960/2000] time 2.050 (2.037) data 0.000 (0.001) loss 0.5410 (1.2358) lr 1.9823e-03 eta 1 day, 22:59:37
epoch [9/50] batch [980/2000] time 2.033 (2.037) data 0.000 (0.001) loss 0.0652 (1.2317) lr 1.9823e-03 eta 1 day, 22:58:49
epoch [9/50] batch [1000/2000] time 2.030 (2.037) data 0.000 (0.001) loss 2.4119 (1.2356) lr 1.9823e-03 eta 1 day, 22:57:58
epoch [9/50] batch [1020/2000] time 2.050 (2.037) data 0.000 (0.001) loss 0.9194 (1.2372) lr 1.9823e-03 eta 1 day, 22:57:11
epoch [9/50] batch [1040/2000] time 2.055 (2.037) data 0.000 (0.001) loss 0.7405 (1.2451) lr 1.9823e-03 eta 1 day, 22:56:13
epoch [9/50] batch [1060/2000] time 2.000 (2.037) data 0.000 (0.001) loss 0.0925 (1.2459) lr 1.9823e-03 eta 1 day, 22:55:22
epoch [9/50] batch [1080/2000] time 2.059 (2.037) data 0.000 (0.001) loss 0.8952 (1.2512) lr 1.9823e-03 eta 1 day, 22:54:29
epoch [9/50] batch [1100/2000] time 2.051 (2.037) data 0.000 (0.001) loss 2.5863 (1.2560) lr 1.9823e-03 eta 1 day, 22:53:46
epoch [9/50] batch [1120/2000] time 1.999 (2.036) data 0.000 (0.001) loss 0.9730 (1.2571) lr 1.9823e-03 eta 1 day, 22:52:47
epoch [9/50] batch [1140/2000] time 2.052 (2.036) data 0.000 (0.001) loss 1.5737 (1.2596) lr 1.9823e-03 eta 1 day, 22:51:59
epoch [9/50] batch [1160/2000] time 2.032 (2.036) data 0.000 (0.001) loss 0.4850 (1.2575) lr 1.9823e-03 eta 1 day, 22:51:05
epoch [9/50] batch [1180/2000] time 2.006 (2.036) data 0.000 (0.001) loss 0.7079 (1.2600) lr 1.9823e-03 eta 1 day, 22:50:21
epoch [9/50] batch [1200/2000] time 1.998 (2.036) data 0.000 (0.001) loss 1.8417 (1.2628) lr 1.9823e-03 eta 1 day, 22:49:41
epoch [9/50] batch [1220/2000] time 2.002 (2.036) data 0.000 (0.001) loss 1.5369 (1.2627) lr 1.9823e-03 eta 1 day, 22:48:46
epoch [9/50] batch [1240/2000] time 2.051 (2.036) data 0.000 (0.001) loss 0.5996 (1.2623) lr 1.9823e-03 eta 1 day, 22:48:04
epoch [9/50] batch [1260/2000] time 1.999 (2.036) data 0.000 (0.001) loss 0.5586 (1.2585) lr 1.9823e-03 eta 1 day, 22:47:14
epoch [9/50] batch [1280/2000] time 2.049 (2.036) data 0.000 (0.001) loss 2.1423 (1.2652) lr 1.9823e-03 eta 1 day, 22:46:26
epoch [9/50] batch [1300/2000] time 2.048 (2.036) data 0.000 (0.001) loss 2.8467 (1.2616) lr 1.9823e-03 eta 1 day, 22:45:36
epoch [9/50] batch [1320/2000] time 2.048 (2.035) data 0.000 (0.001) loss 1.1373 (1.2585) lr 1.9823e-03 eta 1 day, 22:44:44
epoch [9/50] batch [1340/2000] time 2.053 (2.035) data 0.000 (0.001) loss 1.5595 (1.2592) lr 1.9823e-03 eta 1 day, 22:43:55
epoch [9/50] batch [1360/2000] time 2.027 (2.035) data 0.000 (0.001) loss 1.6236 (1.2648) lr 1.9823e-03 eta 1 day, 22:42:58
epoch [9/50] batch [1380/2000] time 2.046 (2.035) data 0.000 (0.001) loss 1.4013 (1.2654) lr 1.9823e-03 eta 1 day, 22:42:05
epoch [9/50] batch [1400/2000] time 2.051 (2.035) data 0.000 (0.001) loss 1.1260 (1.2692) lr 1.9823e-03 eta 1 day, 22:41:17
epoch [9/50] batch [1420/2000] time 2.049 (2.035) data 0.000 (0.001) loss 0.4053 (1.2642) lr 1.9823e-03 eta 1 day, 22:40:23
epoch [9/50] batch [1440/2000] time 2.052 (2.035) data 0.000 (0.001) loss 0.9852 (1.2665) lr 1.9823e-03 eta 1 day, 22:39:32
epoch [9/50] batch [1460/2000] time 2.030 (2.034) data 0.000 (0.001) loss 2.3770 (1.2711) lr 1.9823e-03 eta 1 day, 22:38:46
epoch [9/50] batch [1480/2000] time 1.975 (2.034) data 0.000 (0.001) loss 0.6188 (1.2693) lr 1.9823e-03 eta 1 day, 22:37:54
epoch [9/50] batch [1500/2000] time 2.030 (2.034) data 0.000 (0.001) loss 0.4955 (1.2719) lr 1.9823e-03 eta 1 day, 22:37:02
epoch [9/50] batch [1520/2000] time 2.027 (2.034) data 0.000 (0.001) loss 1.1469 (1.2693) lr 1.9823e-03 eta 1 day, 22:36:17
epoch [9/50] batch [1540/2000] time 2.052 (2.034) data 0.000 (0.001) loss 0.7177 (1.2701) lr 1.9823e-03 eta 1 day, 22:35:24
epoch [9/50] batch [1560/2000] time 2.027 (2.034) data 0.000 (0.001) loss 0.1470 (1.2698) lr 1.9823e-03 eta 1 day, 22:34:35
epoch [9/50] batch [1580/2000] time 2.001 (2.034) data 0.000 (0.001) loss 0.2548 (1.2684) lr 1.9823e-03 eta 1 day, 22:33:48
epoch [9/50] batch [1600/2000] time 2.000 (2.034) data 0.000 (0.001) loss 0.2628 (1.2645) lr 1.9823e-03 eta 1 day, 22:33:07
epoch [9/50] batch [1620/2000] time 2.054 (2.034) data 0.000 (0.001) loss 2.5441 (1.2644) lr 1.9823e-03 eta 1 day, 22:32:30
epoch [9/50] batch [1640/2000] time 2.027 (2.034) data 0.000 (0.001) loss 0.9134 (1.2644) lr 1.9823e-03 eta 1 day, 22:31:51
epoch [9/50] batch [1660/2000] time 2.028 (2.034) data 0.000 (0.001) loss 0.9531 (1.2655) lr 1.9823e-03 eta 1 day, 22:31:01
epoch [9/50] batch [1680/2000] time 2.029 (2.034) data 0.001 (0.001) loss 2.0287 (1.2681) lr 1.9823e-03 eta 1 day, 22:30:21
epoch [9/50] batch [1700/2000] time 2.050 (2.034) data 0.000 (0.001) loss 1.3784 (1.2645) lr 1.9823e-03 eta 1 day, 22:29:40
epoch [9/50] batch [1720/2000] time 2.053 (2.034) data 0.000 (0.001) loss 1.0236 (1.2595) lr 1.9823e-03 eta 1 day, 22:28:54
epoch [9/50] batch [1740/2000] time 2.000 (2.034) data 0.000 (0.001) loss 1.3930 (1.2586) lr 1.9823e-03 eta 1 day, 22:28:09
epoch [9/50] batch [1760/2000] time 2.054 (2.034) data 0.000 (0.001) loss 1.4872 (1.2597) lr 1.9823e-03 eta 1 day, 22:27:22
epoch [9/50] batch [1780/2000] time 2.029 (2.034) data 0.000 (0.001) loss 0.4535 (1.2550) lr 1.9823e-03 eta 1 day, 22:26:34
epoch [9/50] batch [1800/2000] time 2.005 (2.034) data 0.000 (0.001) loss 1.0883 (1.2529) lr 1.9823e-03 eta 1 day, 22:25:57
epoch [9/50] batch [1820/2000] time 2.056 (2.034) data 0.000 (0.001) loss 1.6926 (1.2536) lr 1.9823e-03 eta 1 day, 22:25:25
epoch [9/50] batch [1840/2000] time 2.056 (2.034) data 0.000 (0.001) loss 0.8659 (1.2541) lr 1.9823e-03 eta 1 day, 22:24:44
epoch [9/50] batch [1860/2000] time 2.050 (2.034) data 0.000 (0.001) loss 1.5441 (1.2528) lr 1.9823e-03 eta 1 day, 22:24:01
epoch [9/50] batch [1880/2000] time 2.037 (2.034) data 0.000 (0.001) loss 0.4280 (1.2495) lr 1.9823e-03 eta 1 day, 22:23:15
epoch [9/50] batch [1900/2000] time 2.051 (2.034) data 0.000 (0.001) loss 1.9261 (1.2484) lr 1.9823e-03 eta 1 day, 22:22:38
epoch [9/50] batch [1920/2000] time 2.056 (2.034) data 0.000 (0.001) loss 2.1730 (1.2472) lr 1.9823e-03 eta 1 day, 22:21:50
epoch [9/50] batch [1940/2000] time 2.051 (2.033) data 0.000 (0.001) loss 0.6735 (1.2487) lr 1.9823e-03 eta 1 day, 22:21:06
epoch [9/50] batch [1960/2000] time 2.050 (2.033) data 0.000 (0.001) loss 0.1855 (1.2467) lr 1.9823e-03 eta 1 day, 22:20:26
epoch [9/50] batch [1980/2000] time 2.030 (2.033) data 0.000 (0.001) loss 0.6017 (1.2459) lr 1.9823e-03 eta 1 day, 22:19:41
epoch [9/50] batch [2000/2000] time 2.027 (2.033) data 0.000 (0.001) loss 0.0622 (1.2403) lr 1.9686e-03 eta 1 day, 22:18:59
epoch [10/50] batch [20/2000] time 1.999 (2.060) data 0.000 (0.028) loss 1.3985 (1.3282) lr 1.9686e-03 eta 1 day, 22:54:29
epoch [10/50] batch [40/2000] time 2.055 (2.048) data 0.000 (0.014) loss 0.4886 (1.0826) lr 1.9686e-03 eta 1 day, 22:37:24
epoch [10/50] batch [60/2000] time 2.030 (2.041) data 0.001 (0.009) loss 0.3191 (1.1611) lr 1.9686e-03 eta 1 day, 22:27:31
epoch [10/50] batch [80/2000] time 2.000 (2.038) data 0.000 (0.007) loss 1.9391 (1.1045) lr 1.9686e-03 eta 1 day, 22:22:09
epoch [10/50] batch [100/2000] time 2.053 (2.036) data 0.000 (0.006) loss 0.9922 (1.1172) lr 1.9686e-03 eta 1 day, 22:19:37
epoch [10/50] batch [120/2000] time 2.031 (2.036) data 0.000 (0.005) loss 2.2509 (1.1649) lr 1.9686e-03 eta 1 day, 22:18:15
epoch [10/50] batch [140/2000] time 2.054 (2.035) data 0.000 (0.004) loss 0.6769 (1.1203) lr 1.9686e-03 eta 1 day, 22:16:34
epoch [10/50] batch [160/2000] time 2.029 (2.034) data 0.000 (0.004) loss 0.4465 (1.1035) lr 1.9686e-03 eta 1 day, 22:13:41
epoch [10/50] batch [180/2000] time 2.054 (2.033) data 0.000 (0.003) loss 2.4827 (1.1600) lr 1.9686e-03 eta 1 day, 22:12:54
epoch [10/50] batch [200/2000] time 2.001 (2.033) data 0.000 (0.003) loss 0.0607 (1.1618) lr 1.9686e-03 eta 1 day, 22:11:20
epoch [10/50] batch [220/2000] time 2.062 (2.033) data 0.000 (0.003) loss 1.4865 (1.1746) lr 1.9686e-03 eta 1 day, 22:11:27
epoch [10/50] batch [240/2000] time 2.061 (2.034) data 0.000 (0.003) loss 3.6859 (1.1784) lr 1.9686e-03 eta 1 day, 22:11:05
epoch [10/50] batch [260/2000] time 2.007 (2.034) data 0.000 (0.002) loss 0.7574 (1.1833) lr 1.9686e-03 eta 1 day, 22:10:33
epoch [10/50] batch [280/2000] time 2.053 (2.033) data 0.000 (0.002) loss 0.5298 (1.2045) lr 1.9686e-03 eta 1 day, 22:09:08
epoch [10/50] batch [300/2000] time 2.030 (2.033) data 0.000 (0.002) loss 0.0491 (1.1784) lr 1.9686e-03 eta 1 day, 22:08:01
epoch [10/50] batch [320/2000] time 2.051 (2.033) data 0.000 (0.002) loss 3.4037 (1.1885) lr 1.9686e-03 eta 1 day, 22:07:29
epoch [10/50] batch [340/2000] time 1.998 (2.032) data 0.000 (0.002) loss 1.0552 (1.1996) lr 1.9686e-03 eta 1 day, 22:06:12
epoch [10/50] batch [360/2000] time 1.999 (2.032) data 0.000 (0.002) loss 0.0551 (1.1963) lr 1.9686e-03 eta 1 day, 22:05:03
epoch [10/50] batch [380/2000] time 1.975 (2.032) data 0.000 (0.002) loss 1.7334 (1.2054) lr 1.9686e-03 eta 1 day, 22:03:43
epoch [10/50] batch [400/2000] time 1.973 (2.032) data 0.000 (0.002) loss 0.7599 (1.2070) lr 1.9686e-03 eta 1 day, 22:03:10
epoch [10/50] batch [420/2000] time 1.978 (2.031) data 0.000 (0.002) loss 0.1318 (1.2003) lr 1.9686e-03 eta 1 day, 22:01:45
epoch [10/50] batch [440/2000] time 2.005 (2.031) data 0.000 (0.001) loss 0.2496 (1.1947) lr 1.9686e-03 eta 1 day, 22:01:17
epoch [10/50] batch [460/2000] time 2.057 (2.032) data 0.000 (0.001) loss 0.3438 (1.1972) lr 1.9686e-03 eta 1 day, 22:01:02
epoch [10/50] batch [480/2000] time 2.031 (2.032) data 0.000 (0.001) loss 2.3347 (1.2055) lr 1.9686e-03 eta 1 day, 22:00:11
epoch [10/50] batch [500/2000] time 2.027 (2.032) data 0.000 (0.001) loss 0.1777 (1.2082) lr 1.9686e-03 eta 1 day, 21:59:33
epoch [10/50] batch [520/2000] time 1.999 (2.032) data 0.000 (0.001) loss 0.5331 (1.2192) lr 1.9686e-03 eta 1 day, 21:58:55
epoch [10/50] batch [540/2000] time 2.056 (2.032) data 0.000 (0.001) loss 1.3618 (1.2267) lr 1.9686e-03 eta 1 day, 21:58:21
epoch [10/50] batch [560/2000] time 2.031 (2.032) data 0.000 (0.001) loss 0.5957 (1.2181) lr 1.9686e-03 eta 1 day, 21:57:45
epoch [10/50] batch [580/2000] time 2.006 (2.032) data 0.000 (0.001) loss 0.6764 (1.2198) lr 1.9686e-03 eta 1 day, 21:56:52
epoch [10/50] batch [600/2000] time 2.050 (2.032) data 0.001 (0.001) loss 0.1225 (1.2260) lr 1.9686e-03 eta 1 day, 21:56:06
epoch [10/50] batch [620/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.4738 (1.2161) lr 1.9686e-03 eta 1 day, 21:55:20
epoch [10/50] batch [640/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.1645 (1.2092) lr 1.9686e-03 eta 1 day, 21:54:26
epoch [10/50] batch [660/2000] time 2.032 (2.031) data 0.000 (0.001) loss 2.5921 (1.2076) lr 1.9686e-03 eta 1 day, 21:53:39
epoch [10/50] batch [680/2000] time 2.004 (2.031) data 0.000 (0.001) loss 2.5034 (1.2074) lr 1.9686e-03 eta 1 day, 21:53:05
epoch [10/50] batch [700/2000] time 2.005 (2.032) data 0.000 (0.001) loss 1.0510 (1.2034) lr 1.9686e-03 eta 1 day, 21:52:43
epoch [10/50] batch [720/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.4178 (1.2034) lr 1.9686e-03 eta 1 day, 21:51:58
epoch [10/50] batch [740/2000] time 2.029 (2.031) data 0.000 (0.001) loss 2.4284 (1.2047) lr 1.9686e-03 eta 1 day, 21:51:09
epoch [10/50] batch [760/2000] time 2.061 (2.031) data 0.000 (0.001) loss 1.7098 (1.2037) lr 1.9686e-03 eta 1 day, 21:50:36
epoch [10/50] batch [780/2000] time 2.057 (2.032) data 0.000 (0.001) loss 0.4811 (1.2000) lr 1.9686e-03 eta 1 day, 21:50:02
epoch [10/50] batch [800/2000] time 2.058 (2.032) data 0.000 (0.001) loss 1.0507 (1.1914) lr 1.9686e-03 eta 1 day, 21:49:23
epoch [10/50] batch [820/2000] time 2.065 (2.032) data 0.000 (0.001) loss 0.7669 (1.1936) lr 1.9686e-03 eta 1 day, 21:48:56
epoch [10/50] batch [840/2000] time 2.056 (2.032) data 0.000 (0.001) loss 1.1112 (1.1939) lr 1.9686e-03 eta 1 day, 21:48:08
epoch [10/50] batch [860/2000] time 1.997 (2.032) data 0.000 (0.001) loss 0.6267 (1.1905) lr 1.9686e-03 eta 1 day, 21:47:28
epoch [10/50] batch [880/2000] time 2.034 (2.032) data 0.000 (0.001) loss 0.9546 (1.1927) lr 1.9686e-03 eta 1 day, 21:46:54
epoch [10/50] batch [900/2000] time 2.055 (2.032) data 0.000 (0.001) loss 2.4034 (1.2021) lr 1.9686e-03 eta 1 day, 21:46:15
epoch [10/50] batch [920/2000] time 2.057 (2.032) data 0.000 (0.001) loss 1.4529 (1.2085) lr 1.9686e-03 eta 1 day, 21:45:43
epoch [10/50] batch [940/2000] time 2.039 (2.032) data 0.000 (0.001) loss 3.1335 (1.2083) lr 1.9686e-03 eta 1 day, 21:45:14
epoch [10/50] batch [960/2000] time 1.979 (2.032) data 0.000 (0.001) loss 0.4821 (1.2115) lr 1.9686e-03 eta 1 day, 21:44:46
epoch [10/50] batch [980/2000] time 2.056 (2.032) data 0.000 (0.001) loss 1.6165 (1.2090) lr 1.9686e-03 eta 1 day, 21:44:22
epoch [10/50] batch [1000/2000] time 2.060 (2.032) data 0.000 (0.001) loss 0.2473 (1.2046) lr 1.9686e-03 eta 1 day, 21:43:44
epoch [10/50] batch [1020/2000] time 2.061 (2.033) data 0.000 (0.001) loss 0.8030 (1.2024) lr 1.9686e-03 eta 1 day, 21:43:15
epoch [10/50] batch [1040/2000] time 2.059 (2.033) data 0.000 (0.001) loss 1.3300 (1.1989) lr 1.9686e-03 eta 1 day, 21:42:52
epoch [10/50] batch [1060/2000] time 2.061 (2.033) data 0.000 (0.001) loss 0.0985 (1.1982) lr 1.9686e-03 eta 1 day, 21:42:23
epoch [10/50] batch [1080/2000] time 1.977 (2.033) data 0.000 (0.001) loss 3.4865 (1.2019) lr 1.9686e-03 eta 1 day, 21:41:44
epoch [10/50] batch [1100/2000] time 2.032 (2.033) data 0.000 (0.001) loss 0.6570 (1.2058) lr 1.9686e-03 eta 1 day, 21:41:23
epoch [10/50] batch [1120/2000] time 2.057 (2.033) data 0.000 (0.001) loss 2.1439 (1.1998) lr 1.9686e-03 eta 1 day, 21:40:44
epoch [10/50] batch [1140/2000] time 2.042 (2.033) data 0.001 (0.001) loss 0.3821 (1.1997) lr 1.9686e-03 eta 1 day, 21:40:22
epoch [10/50] batch [1160/2000] time 2.196 (2.034) data 0.000 (0.001) loss 0.2338 (1.2048) lr 1.9686e-03 eta 1 day, 21:40:02
epoch [10/50] batch [1180/2000] time 1.996 (2.034) data 0.000 (0.001) loss 0.7611 (1.1989) lr 1.9686e-03 eta 1 day, 21:39:45
epoch [10/50] batch [1200/2000] time 2.061 (2.034) data 0.000 (0.001) loss 1.2125 (1.2064) lr 1.9686e-03 eta 1 day, 21:39:16
epoch [10/50] batch [1220/2000] time 2.061 (2.034) data 0.000 (0.001) loss 1.8880 (1.2039) lr 1.9686e-03 eta 1 day, 21:38:44
epoch [10/50] batch [1240/2000] time 2.037 (2.034) data 0.000 (0.001) loss 0.5478 (1.2052) lr 1.9686e-03 eta 1 day, 21:38:08
epoch [10/50] batch [1260/2000] time 2.001 (2.034) data 0.000 (0.001) loss 1.6664 (1.2097) lr 1.9686e-03 eta 1 day, 21:37:28
epoch [10/50] batch [1280/2000] time 2.001 (2.034) data 0.000 (0.001) loss 1.3957 (1.2088) lr 1.9686e-03 eta 1 day, 21:36:41
epoch [10/50] batch [1300/2000] time 2.034 (2.034) data 0.000 (0.001) loss 1.6601 (1.2084) lr 1.9686e-03 eta 1 day, 21:35:58
epoch [10/50] batch [1320/2000] time 2.063 (2.034) data 0.000 (0.001) loss 0.7803 (1.2080) lr 1.9686e-03 eta 1 day, 21:35:32
epoch [10/50] batch [1340/2000] time 2.066 (2.034) data 0.000 (0.001) loss 4.4501 (1.2148) lr 1.9686e-03 eta 1 day, 21:34:57
epoch [10/50] batch [1360/2000] time 1.979 (2.035) data 0.000 (0.001) loss 2.9021 (1.2118) lr 1.9686e-03 eta 1 day, 21:34:28
epoch [10/50] batch [1380/2000] time 2.005 (2.035) data 0.000 (0.001) loss 0.8374 (1.2145) lr 1.9686e-03 eta 1 day, 21:33:56
epoch [10/50] batch [1400/2000] time 2.008 (2.035) data 0.000 (0.001) loss 0.0555 (1.2171) lr 1.9686e-03 eta 1 day, 21:33:34
epoch [10/50] batch [1420/2000] time 2.039 (2.035) data 0.000 (0.001) loss 1.2947 (1.2153) lr 1.9686e-03 eta 1 day, 21:33:14
epoch [10/50] batch [1440/2000] time 2.009 (2.035) data 0.000 (0.001) loss 0.8450 (1.2148) lr 1.9686e-03 eta 1 day, 21:32:45
epoch [10/50] batch [1460/2000] time 2.071 (2.035) data 0.000 (0.001) loss 1.8014 (1.2176) lr 1.9686e-03 eta 1 day, 21:32:14
epoch [10/50] batch [1480/2000] time 1.999 (2.035) data 0.000 (0.001) loss 0.3573 (1.2119) lr 1.9686e-03 eta 1 day, 21:31:35
epoch [10/50] batch [1500/2000] time 2.000 (2.035) data 0.000 (0.001) loss 2.2332 (1.2123) lr 1.9686e-03 eta 1 day, 21:30:43
epoch [10/50] batch [1520/2000] time 1.999 (2.035) data 0.000 (0.001) loss 1.3115 (1.2107) lr 1.9686e-03 eta 1 day, 21:30:01
epoch [10/50] batch [1540/2000] time 2.057 (2.035) data 0.000 (0.001) loss 1.5633 (1.2064) lr 1.9686e-03 eta 1 day, 21:29:25
epoch [10/50] batch [1560/2000] time 1.977 (2.035) data 0.000 (0.001) loss 1.3751 (1.2050) lr 1.9686e-03 eta 1 day, 21:28:39
epoch [10/50] batch [1580/2000] time 2.001 (2.035) data 0.000 (0.001) loss 2.1411 (1.2001) lr 1.9686e-03 eta 1 day, 21:27:49
epoch [10/50] batch [1600/2000] time 2.003 (2.035) data 0.000 (0.001) loss 1.8343 (1.2021) lr 1.9686e-03 eta 1 day, 21:27:17
epoch [10/50] batch [1620/2000] time 2.004 (2.035) data 0.000 (0.001) loss 1.7976 (1.2040) lr 1.9686e-03 eta 1 day, 21:26:42
epoch [10/50] batch [1640/2000] time 2.086 (2.035) data 0.001 (0.001) loss 0.7008 (1.2061) lr 1.9686e-03 eta 1 day, 21:26:10
epoch [10/50] batch [1660/2000] time 2.016 (2.036) data 0.000 (0.001) loss 0.6713 (1.2067) lr 1.9686e-03 eta 1 day, 21:25:46
epoch [10/50] batch [1680/2000] time 2.037 (2.036) data 0.000 (0.001) loss 0.1252 (1.2028) lr 1.9686e-03 eta 1 day, 21:25:02
epoch [10/50] batch [1700/2000] time 2.055 (2.036) data 0.000 (0.001) loss 1.3758 (1.2044) lr 1.9686e-03 eta 1 day, 21:24:20
epoch [10/50] batch [1720/2000] time 2.058 (2.036) data 0.000 (0.001) loss 0.1142 (1.2059) lr 1.9686e-03 eta 1 day, 21:23:31
epoch [10/50] batch [1740/2000] time 2.032 (2.035) data 0.000 (0.001) loss 1.1859 (1.2065) lr 1.9686e-03 eta 1 day, 21:22:48
epoch [10/50] batch [1760/2000] time 2.066 (2.036) data 0.000 (0.001) loss 0.3670 (1.2111) lr 1.9686e-03 eta 1 day, 21:22:09
epoch [10/50] batch [1780/2000] time 2.032 (2.035) data 0.000 (0.001) loss 3.9886 (1.2105) lr 1.9686e-03 eta 1 day, 21:21:24
epoch [10/50] batch [1800/2000] time 2.058 (2.035) data 0.000 (0.001) loss 0.0630 (1.2117) lr 1.9686e-03 eta 1 day, 21:20:37
epoch [10/50] batch [1820/2000] time 1.999 (2.035) data 0.000 (0.001) loss 0.6629 (1.2099) lr 1.9686e-03 eta 1 day, 21:19:53
epoch [10/50] batch [1840/2000] time 2.060 (2.035) data 0.000 (0.001) loss 1.6524 (1.2123) lr 1.9686e-03 eta 1 day, 21:19:07
epoch [10/50] batch [1860/2000] time 2.055 (2.035) data 0.000 (0.001) loss 0.6692 (1.2123) lr 1.9686e-03 eta 1 day, 21:18:26
epoch [10/50] batch [1880/2000] time 2.056 (2.035) data 0.000 (0.001) loss 1.7077 (1.2117) lr 1.9686e-03 eta 1 day, 21:17:43
epoch [10/50] batch [1900/2000] time 2.056 (2.035) data 0.000 (0.001) loss 0.6513 (1.2135) lr 1.9686e-03 eta 1 day, 21:16:57
epoch [10/50] batch [1920/2000] time 2.055 (2.035) data 0.000 (0.001) loss 1.4717 (1.2139) lr 1.9686e-03 eta 1 day, 21:16:15
epoch [10/50] batch [1940/2000] time 2.058 (2.035) data 0.000 (0.001) loss 2.2283 (1.2141) lr 1.9686e-03 eta 1 day, 21:15:30
epoch [10/50] batch [1960/2000] time 2.058 (2.035) data 0.000 (0.001) loss 2.5511 (1.2121) lr 1.9686e-03 eta 1 day, 21:14:49
epoch [10/50] batch [1980/2000] time 2.001 (2.035) data 0.000 (0.001) loss 0.8778 (1.2111) lr 1.9686e-03 eta 1 day, 21:14:10
epoch [10/50] batch [2000/2000] time 1.998 (2.035) data 0.000 (0.001) loss 1.0605 (1.2129) lr 1.9511e-03 eta 1 day, 21:13:20
epoch [11/50] batch [20/2000] time 2.056 (2.070) data 0.000 (0.035) loss 4.0000 (1.3993) lr 1.9511e-03 eta 1 day, 21:59:20
epoch [11/50] batch [40/2000] time 2.054 (2.049) data 0.000 (0.018) loss 1.0284 (1.2516) lr 1.9511e-03 eta 1 day, 21:30:53
epoch [11/50] batch [60/2000] time 2.055 (2.041) data 0.001 (0.012) loss 1.0056 (1.1506) lr 1.9511e-03 eta 1 day, 21:19:54
epoch [11/50] batch [80/2000] time 2.001 (2.040) data 0.000 (0.009) loss 1.3871 (1.2024) lr 1.9511e-03 eta 1 day, 21:17:43
epoch [11/50] batch [100/2000] time 2.057 (2.040) data 0.000 (0.007) loss 0.2058 (1.2549) lr 1.9511e-03 eta 1 day, 21:16:23
epoch [11/50] batch [120/2000] time 2.034 (2.038) data 0.000 (0.006) loss 1.0127 (1.2675) lr 1.9511e-03 eta 1 day, 21:13:47
epoch [11/50] batch [140/2000] time 2.056 (2.038) data 0.000 (0.005) loss 0.6780 (1.2322) lr 1.9511e-03 eta 1 day, 21:12:05
epoch [11/50] batch [160/2000] time 2.002 (2.037) data 0.000 (0.005) loss 2.0338 (1.2763) lr 1.9511e-03 eta 1 day, 21:10:55
epoch [11/50] batch [180/2000] time 2.004 (2.037) data 0.003 (0.004) loss 0.3324 (1.2293) lr 1.9511e-03 eta 1 day, 21:10:32
epoch [11/50] batch [200/2000] time 2.000 (2.038) data 0.001 (0.004) loss 0.9631 (1.2124) lr 1.9511e-03 eta 1 day, 21:10:16
epoch [11/50] batch [220/2000] time 2.034 (2.037) data 0.000 (0.003) loss 2.0176 (1.1939) lr 1.9511e-03 eta 1 day, 21:09:03
epoch [11/50] batch [240/2000] time 2.055 (2.037) data 0.000 (0.003) loss 0.6827 (1.1954) lr 1.9511e-03 eta 1 day, 21:08:08
epoch [11/50] batch [260/2000] time 2.062 (2.037) data 0.000 (0.003) loss 0.0797 (1.1922) lr 1.9511e-03 eta 1 day, 21:07:10
epoch [11/50] batch [280/2000] time 2.034 (2.037) data 0.000 (0.003) loss 0.5316 (1.1825) lr 1.9511e-03 eta 1 day, 21:06:56
epoch [11/50] batch [300/2000] time 2.029 (2.037) data 0.000 (0.003) loss 2.6004 (1.2091) lr 1.9511e-03 eta 1 day, 21:05:22
epoch [11/50] batch [320/2000] time 1.998 (2.036) data 0.000 (0.002) loss 0.5219 (1.1957) lr 1.9511e-03 eta 1 day, 21:04:26
epoch [11/50] batch [340/2000] time 2.004 (2.036) data 0.000 (0.002) loss 0.5738 (1.1872) lr 1.9511e-03 eta 1 day, 21:02:50
epoch [11/50] batch [360/2000] time 2.050 (2.036) data 0.000 (0.002) loss 1.9551 (1.1783) lr 1.9511e-03 eta 1 day, 21:01:49
epoch [11/50] batch [380/2000] time 2.050 (2.035) data 0.000 (0.002) loss 0.0708 (1.1795) lr 1.9511e-03 eta 1 day, 21:00:48
epoch [11/50] batch [400/2000] time 2.049 (2.035) data 0.000 (0.002) loss 0.5411 (1.1992) lr 1.9511e-03 eta 1 day, 20:59:38
epoch [11/50] batch [420/2000] time 2.027 (2.035) data 0.000 (0.002) loss 1.0696 (1.1834) lr 1.9511e-03 eta 1 day, 20:58:56
epoch [11/50] batch [440/2000] time 2.028 (2.034) data 0.000 (0.002) loss 1.0873 (1.1939) lr 1.9511e-03 eta 1 day, 20:57:41
epoch [11/50] batch [460/2000] time 2.032 (2.034) data 0.000 (0.002) loss 0.3727 (1.1855) lr 1.9511e-03 eta 1 day, 20:56:39
epoch [11/50] batch [480/2000] time 1.998 (2.034) data 0.000 (0.002) loss 1.5909 (1.1953) lr 1.9511e-03 eta 1 day, 20:55:13
epoch [11/50] batch [500/2000] time 2.057 (2.034) data 0.000 (0.002) loss 2.5490 (1.2108) lr 1.9511e-03 eta 1 day, 20:54:28
epoch [11/50] batch [520/2000] time 2.030 (2.033) data 0.000 (0.002) loss 0.1338 (1.2093) lr 1.9511e-03 eta 1 day, 20:53:34
epoch [11/50] batch [540/2000] time 2.054 (2.033) data 0.000 (0.002) loss 0.7712 (1.2074) lr 1.9511e-03 eta 1 day, 20:52:14
epoch [11/50] batch [560/2000] time 2.053 (2.033) data 0.000 (0.002) loss 0.4392 (1.2045) lr 1.9511e-03 eta 1 day, 20:51:46
epoch [11/50] batch [580/2000] time 2.051 (2.033) data 0.000 (0.001) loss 1.6949 (1.2153) lr 1.9511e-03 eta 1 day, 20:50:56
epoch [11/50] batch [600/2000] time 2.054 (2.033) data 0.000 (0.001) loss 0.6075 (1.2207) lr 1.9511e-03 eta 1 day, 20:50:08
epoch [11/50] batch [620/2000] time 1.975 (2.032) data 0.000 (0.001) loss 0.9137 (1.2024) lr 1.9511e-03 eta 1 day, 20:48:57
epoch [11/50] batch [640/2000] time 2.051 (2.032) data 0.000 (0.001) loss 0.2125 (1.1976) lr 1.9511e-03 eta 1 day, 20:48:03
epoch [11/50] batch [660/2000] time 2.026 (2.032) data 0.000 (0.001) loss 1.7622 (1.1921) lr 1.9511e-03 eta 1 day, 20:47:29
epoch [11/50] batch [680/2000] time 1.976 (2.032) data 0.000 (0.001) loss 1.6381 (1.1906) lr 1.9511e-03 eta 1 day, 20:46:35
epoch [11/50] batch [700/2000] time 2.003 (2.032) data 0.000 (0.001) loss 1.6011 (1.1873) lr 1.9511e-03 eta 1 day, 20:45:52
epoch [11/50] batch [720/2000] time 2.056 (2.032) data 0.000 (0.001) loss 2.1262 (1.1900) lr 1.9511e-03 eta 1 day, 20:45:11
epoch [11/50] batch [740/2000] time 2.003 (2.032) data 0.000 (0.001) loss 0.3338 (1.1898) lr 1.9511e-03 eta 1 day, 20:44:30
epoch [11/50] batch [760/2000] time 1.999 (2.032) data 0.000 (0.001) loss 0.3473 (1.1995) lr 1.9511e-03 eta 1 day, 20:43:46
epoch [11/50] batch [780/2000] time 2.030 (2.032) data 0.000 (0.001) loss 3.1521 (1.2084) lr 1.9511e-03 eta 1 day, 20:43:09
epoch [11/50] batch [800/2000] time 1.999 (2.032) data 0.000 (0.001) loss 1.4382 (1.2103) lr 1.9511e-03 eta 1 day, 20:42:07
epoch [11/50] batch [820/2000] time 2.029 (2.032) data 0.000 (0.001) loss 1.4811 (1.2061) lr 1.9511e-03 eta 1 day, 20:41:26
epoch [11/50] batch [840/2000] time 2.054 (2.032) data 0.000 (0.001) loss 3.8218 (1.2037) lr 1.9511e-03 eta 1 day, 20:40:55
epoch [11/50] batch [860/2000] time 1.979 (2.032) data 0.000 (0.001) loss 1.9075 (1.2056) lr 1.9511e-03 eta 1 day, 20:40:11
epoch [11/50] batch [880/2000] time 2.005 (2.032) data 0.000 (0.001) loss 0.3377 (1.2028) lr 1.9511e-03 eta 1 day, 20:39:43
epoch [11/50] batch [900/2000] time 2.033 (2.032) data 0.000 (0.001) loss 0.2291 (1.1998) lr 1.9511e-03 eta 1 day, 20:39:05
epoch [11/50] batch [920/2000] time 2.032 (2.032) data 0.000 (0.001) loss 1.2889 (1.2043) lr 1.9511e-03 eta 1 day, 20:38:20
epoch [11/50] batch [940/2000] time 2.055 (2.032) data 0.000 (0.001) loss 0.4852 (1.2027) lr 1.9511e-03 eta 1 day, 20:37:38
epoch [11/50] batch [960/2000] time 2.063 (2.032) data 0.000 (0.001) loss 0.8886 (1.2036) lr 1.9511e-03 eta 1 day, 20:37:08
epoch [11/50] batch [980/2000] time 2.031 (2.032) data 0.000 (0.001) loss 1.4533 (1.2027) lr 1.9511e-03 eta 1 day, 20:36:31
epoch [11/50] batch [1000/2000] time 2.036 (2.032) data 0.000 (0.001) loss 0.2869 (1.1983) lr 1.9511e-03 eta 1 day, 20:35:48
epoch [11/50] batch [1020/2000] time 2.054 (2.032) data 0.000 (0.001) loss 1.1854 (1.2039) lr 1.9511e-03 eta 1 day, 20:35:13
epoch [11/50] batch [1040/2000] time 2.059 (2.032) data 0.000 (0.001) loss 0.3055 (1.2013) lr 1.9511e-03 eta 1 day, 20:34:37
epoch [11/50] batch [1060/2000] time 2.002 (2.033) data 0.000 (0.001) loss 0.8950 (1.1986) lr 1.9511e-03 eta 1 day, 20:34:05
epoch [11/50] batch [1080/2000] time 2.009 (2.033) data 0.000 (0.001) loss 0.6959 (1.1950) lr 1.9511e-03 eta 1 day, 20:33:31
epoch [11/50] batch [1100/2000] time 2.065 (2.033) data 0.000 (0.001) loss 3.9305 (1.1961) lr 1.9511e-03 eta 1 day, 20:33:05
epoch [11/50] batch [1120/2000] time 1.981 (2.033) data 0.000 (0.001) loss 1.7971 (1.1977) lr 1.9511e-03 eta 1 day, 20:32:40
epoch [11/50] batch [1140/2000] time 2.038 (2.033) data 0.000 (0.001) loss 0.2412 (1.1932) lr 1.9511e-03 eta 1 day, 20:32:04
epoch [11/50] batch [1160/2000] time 2.058 (2.033) data 0.000 (0.001) loss 3.2377 (1.1979) lr 1.9511e-03 eta 1 day, 20:31:23
epoch [11/50] batch [1180/2000] time 2.035 (2.033) data 0.000 (0.001) loss 1.0868 (1.2009) lr 1.9511e-03 eta 1 day, 20:30:45
epoch [11/50] batch [1200/2000] time 2.059 (2.033) data 0.000 (0.001) loss 1.4117 (1.2050) lr 1.9511e-03 eta 1 day, 20:30:13
epoch [11/50] batch [1220/2000] time 2.057 (2.033) data 0.000 (0.001) loss 1.3374 (1.2028) lr 1.9511e-03 eta 1 day, 20:29:39
epoch [11/50] batch [1240/2000] time 2.003 (2.033) data 0.000 (0.001) loss 4.0162 (1.2100) lr 1.9511e-03 eta 1 day, 20:28:56
epoch [11/50] batch [1260/2000] time 2.055 (2.033) data 0.000 (0.001) loss 0.6549 (1.2060) lr 1.9511e-03 eta 1 day, 20:28:26
epoch [11/50] batch [1280/2000] time 2.056 (2.033) data 0.000 (0.001) loss 1.3031 (1.2092) lr 1.9511e-03 eta 1 day, 20:27:44
epoch [11/50] batch [1300/2000] time 2.058 (2.033) data 0.000 (0.001) loss 0.8080 (1.2134) lr 1.9511e-03 eta 1 day, 20:27:09
epoch [11/50] batch [1320/2000] time 2.055 (2.033) data 0.000 (0.001) loss 2.6898 (1.2101) lr 1.9511e-03 eta 1 day, 20:26:23
epoch [11/50] batch [1340/2000] time 2.059 (2.033) data 0.001 (0.001) loss 2.7063 (1.2203) lr 1.9511e-03 eta 1 day, 20:25:38
epoch [11/50] batch [1360/2000] time 2.057 (2.033) data 0.000 (0.001) loss 0.7348 (1.2209) lr 1.9511e-03 eta 1 day, 20:24:52
epoch [11/50] batch [1380/2000] time 2.034 (2.033) data 0.000 (0.001) loss 3.0776 (1.2278) lr 1.9511e-03 eta 1 day, 20:24:16
epoch [11/50] batch [1400/2000] time 2.060 (2.033) data 0.000 (0.001) loss 0.0959 (1.2248) lr 1.9511e-03 eta 1 day, 20:23:33
epoch [11/50] batch [1420/2000] time 2.000 (2.033) data 0.000 (0.001) loss 0.6856 (1.2254) lr 1.9511e-03 eta 1 day, 20:22:44
epoch [11/50] batch [1440/2000] time 2.034 (2.033) data 0.000 (0.001) loss 0.0848 (1.2252) lr 1.9511e-03 eta 1 day, 20:22:03
epoch [11/50] batch [1460/2000] time 2.001 (2.033) data 0.000 (0.001) loss 0.4488 (1.2279) lr 1.9511e-03 eta 1 day, 20:21:22
epoch [11/50] batch [1480/2000] time 2.007 (2.033) data 0.000 (0.001) loss 0.9736 (1.2304) lr 1.9511e-03 eta 1 day, 20:20:46
epoch [11/50] batch [1500/2000] time 2.062 (2.033) data 0.000 (0.001) loss 1.7716 (1.2287) lr 1.9511e-03 eta 1 day, 20:20:10
epoch [11/50] batch [1520/2000] time 2.061 (2.033) data 0.000 (0.001) loss 1.2285 (1.2282) lr 1.9511e-03 eta 1 day, 20:19:39
epoch [11/50] batch [1540/2000] time 1.975 (2.033) data 0.000 (0.001) loss 0.5220 (1.2257) lr 1.9511e-03 eta 1 day, 20:18:49
epoch [11/50] batch [1560/2000] time 2.059 (2.033) data 0.000 (0.001) loss 0.0674 (1.2270) lr 1.9511e-03 eta 1 day, 20:18:11
epoch [11/50] batch [1580/2000] time 2.060 (2.033) data 0.000 (0.001) loss 0.6064 (1.2262) lr 1.9511e-03 eta 1 day, 20:17:30
epoch [11/50] batch [1600/2000] time 2.063 (2.033) data 0.000 (0.001) loss 0.6337 (1.2288) lr 1.9511e-03 eta 1 day, 20:17:01
epoch [11/50] batch [1620/2000] time 2.055 (2.033) data 0.000 (0.001) loss 2.8618 (1.2289) lr 1.9511e-03 eta 1 day, 20:16:22
epoch [11/50] batch [1640/2000] time 2.060 (2.033) data 0.000 (0.001) loss 1.8219 (1.2270) lr 1.9511e-03 eta 1 day, 20:15:41
epoch [11/50] batch [1660/2000] time 2.060 (2.033) data 0.000 (0.001) loss 1.1245 (1.2243) lr 1.9511e-03 eta 1 day, 20:15:03
epoch [11/50] batch [1680/2000] time 2.003 (2.033) data 0.001 (0.001) loss 0.7417 (1.2237) lr 1.9511e-03 eta 1 day, 20:14:21
epoch [11/50] batch [1700/2000] time 2.001 (2.033) data 0.000 (0.001) loss 0.7788 (1.2216) lr 1.9511e-03 eta 1 day, 20:13:42
epoch [11/50] batch [1720/2000] time 2.002 (2.034) data 0.000 (0.001) loss 0.2906 (1.2217) lr 1.9511e-03 eta 1 day, 20:13:03
epoch [11/50] batch [1740/2000] time 2.035 (2.033) data 0.000 (0.001) loss 1.2480 (1.2164) lr 1.9511e-03 eta 1 day, 20:12:21
epoch [11/50] batch [1760/2000] time 2.057 (2.033) data 0.000 (0.001) loss 2.0221 (1.2164) lr 1.9511e-03 eta 1 day, 20:11:33
epoch [11/50] batch [1780/2000] time 2.033 (2.033) data 0.000 (0.001) loss 2.2135 (1.2170) lr 1.9511e-03 eta 1 day, 20:10:51
epoch [11/50] batch [1800/2000] time 2.003 (2.033) data 0.000 (0.001) loss 2.3271 (1.2142) lr 1.9511e-03 eta 1 day, 20:10:11
epoch [11/50] batch [1820/2000] time 2.058 (2.033) data 0.000 (0.001) loss 0.8948 (1.2146) lr 1.9511e-03 eta 1 day, 20:09:29
epoch [11/50] batch [1840/2000] time 2.001 (2.033) data 0.000 (0.001) loss 0.4773 (1.2177) lr 1.9511e-03 eta 1 day, 20:08:56
epoch [11/50] batch [1860/2000] time 2.035 (2.033) data 0.000 (0.001) loss 1.0247 (1.2148) lr 1.9511e-03 eta 1 day, 20:08:14
epoch [11/50] batch [1880/2000] time 2.030 (2.033) data 0.000 (0.001) loss 3.1779 (1.2157) lr 1.9511e-03 eta 1 day, 20:07:35
epoch [11/50] batch [1900/2000] time 2.059 (2.033) data 0.000 (0.001) loss 0.5937 (1.2140) lr 1.9511e-03 eta 1 day, 20:06:48
epoch [11/50] batch [1920/2000] time 2.060 (2.033) data 0.000 (0.001) loss 1.6189 (1.2171) lr 1.9511e-03 eta 1 day, 20:06:13
epoch [11/50] batch [1940/2000] time 2.054 (2.034) data 0.000 (0.001) loss 0.1764 (1.2157) lr 1.9511e-03 eta 1 day, 20:05:39
epoch [11/50] batch [1960/2000] time 2.058 (2.034) data 0.000 (0.001) loss 0.5153 (1.2156) lr 1.9511e-03 eta 1 day, 20:04:57
epoch [11/50] batch [1980/2000] time 2.056 (2.034) data 0.000 (0.001) loss 1.8191 (1.2167) lr 1.9511e-03 eta 1 day, 20:04:17
epoch [11/50] batch [2000/2000] time 1.986 (2.034) data 0.000 (0.001) loss 0.3233 (1.2133) lr 1.9298e-03 eta 1 day, 20:03:38
epoch [12/50] batch [20/2000] time 2.058 (2.077) data 0.000 (0.034) loss 0.1182 (1.5913) lr 1.9298e-03 eta 1 day, 20:59:50
epoch [12/50] batch [40/2000] time 2.059 (2.054) data 0.000 (0.017) loss 0.9741 (1.4786) lr 1.9298e-03 eta 1 day, 20:29:20
epoch [12/50] batch [60/2000] time 2.019 (2.048) data 0.001 (0.012) loss 1.8972 (1.3634) lr 1.9298e-03 eta 1 day, 20:20:16
epoch [12/50] batch [80/2000] time 2.000 (2.047) data 0.000 (0.009) loss 2.0406 (1.2631) lr 1.9298e-03 eta 1 day, 20:18:02
epoch [12/50] batch [100/2000] time 2.033 (2.045) data 0.000 (0.007) loss 2.0351 (1.2264) lr 1.9298e-03 eta 1 day, 20:14:38
epoch [12/50] batch [120/2000] time 2.061 (2.043) data 0.000 (0.006) loss 0.5513 (1.1915) lr 1.9298e-03 eta 1 day, 20:11:24
epoch [12/50] batch [140/2000] time 2.032 (2.041) data 0.000 (0.005) loss 0.3476 (1.1773) lr 1.9298e-03 eta 1 day, 20:08:29
epoch [12/50] batch [160/2000] time 2.001 (2.039) data 0.000 (0.005) loss 2.8640 (1.2195) lr 1.9298e-03 eta 1 day, 20:05:26
epoch [12/50] batch [180/2000] time 2.033 (2.039) data 0.000 (0.004) loss 0.0698 (1.2092) lr 1.9298e-03 eta 1 day, 20:04:00
epoch [12/50] batch [200/2000] time 2.000 (2.038) data 0.000 (0.004) loss 1.2700 (1.1791) lr 1.9298e-03 eta 1 day, 20:02:47
epoch [12/50] batch [220/2000] time 2.059 (2.038) data 0.000 (0.003) loss 2.8130 (1.1905) lr 1.9298e-03 eta 1 day, 20:01:34
epoch [12/50] batch [240/2000] time 2.058 (2.038) data 0.000 (0.003) loss 0.6094 (1.1926) lr 1.9298e-03 eta 1 day, 20:00:45
epoch [12/50] batch [260/2000] time 2.056 (2.037) data 0.000 (0.003) loss 0.4523 (1.2175) lr 1.9298e-03 eta 1 day, 19:59:54
epoch [12/50] batch [280/2000] time 2.033 (2.037) data 0.000 (0.003) loss 1.9926 (1.2402) lr 1.9298e-03 eta 1 day, 19:59:07
epoch [12/50] batch [300/2000] time 2.001 (2.037) data 0.000 (0.003) loss 0.6726 (1.2367) lr 1.9298e-03 eta 1 day, 19:58:16
epoch [12/50] batch [320/2000] time 1.998 (2.037) data 0.000 (0.002) loss 1.2774 (1.2217) lr 1.9298e-03 eta 1 day, 19:57:37
epoch [12/50] batch [340/2000] time 2.002 (2.037) data 0.000 (0.002) loss 0.8974 (1.2171) lr 1.9298e-03 eta 1 day, 19:57:02
epoch [12/50] batch [360/2000] time 2.061 (2.038) data 0.000 (0.002) loss 0.1546 (1.2131) lr 1.9298e-03 eta 1 day, 19:56:50
epoch [12/50] batch [380/2000] time 2.000 (2.037) data 0.000 (0.002) loss 1.3910 (1.2084) lr 1.9298e-03 eta 1 day, 19:55:38
epoch [12/50] batch [400/2000] time 2.035 (2.037) data 0.000 (0.002) loss 2.9218 (1.2061) lr 1.9298e-03 eta 1 day, 19:54:57
epoch [12/50] batch [420/2000] time 2.037 (2.037) data 0.000 (0.002) loss 0.8767 (1.2045) lr 1.9298e-03 eta 1 day, 19:54:02
epoch [12/50] batch [440/2000] time 2.033 (2.037) data 0.000 (0.002) loss 1.1803 (1.2080) lr 1.9298e-03 eta 1 day, 19:53:00
epoch [12/50] batch [460/2000] time 1.997 (2.037) data 0.000 (0.002) loss 0.7653 (1.1986) lr 1.9298e-03 eta 1 day, 19:51:59
epoch [12/50] batch [480/2000] time 2.051 (2.037) data 0.000 (0.002) loss 2.1770 (1.2065) lr 1.9298e-03 eta 1 day, 19:51:13
epoch [12/50] batch [500/2000] time 2.051 (2.037) data 0.000 (0.002) loss 3.4109 (1.2228) lr 1.9298e-03 eta 1 day, 19:50:32
epoch [12/50] batch [520/2000] time 2.053 (2.036) data 0.000 (0.002) loss 1.2060 (1.2223) lr 1.9298e-03 eta 1 day, 19:49:42
epoch [12/50] batch [540/2000] time 1.998 (2.036) data 0.000 (0.002) loss 1.5698 (1.2214) lr 1.9298e-03 eta 1 day, 19:48:51
epoch [12/50] batch [560/2000] time 1.998 (2.036) data 0.000 (0.001) loss 1.4512 (1.2161) lr 1.9298e-03 eta 1 day, 19:48:09
epoch [12/50] batch [580/2000] time 2.031 (2.036) data 0.000 (0.001) loss 0.3839 (1.2257) lr 1.9298e-03 eta 1 day, 19:47:28
epoch [12/50] batch [600/2000] time 2.365 (2.039) data 0.001 (0.001) loss 0.7774 (1.2285) lr 1.9298e-03 eta 1 day, 19:50:30
epoch [12/50] batch [620/2000] time 2.058 (2.044) data 0.000 (0.001) loss 1.2427 (1.2416) lr 1.9298e-03 eta 1 day, 19:56:35
epoch [12/50] batch [640/2000] time 2.009 (2.044) data 0.000 (0.001) loss 2.6307 (1.2499) lr 1.9298e-03 eta 1 day, 19:55:37
epoch [12/50] batch [660/2000] time 1.984 (2.044) data 0.000 (0.001) loss 1.5542 (1.2428) lr 1.9298e-03 eta 1 day, 19:54:37
epoch [12/50] batch [680/2000] time 1.980 (2.044) data 0.000 (0.001) loss 0.9949 (1.2334) lr 1.9298e-03 eta 1 day, 19:53:49
epoch [12/50] batch [700/2000] time 2.053 (2.044) data 0.000 (0.001) loss 0.9245 (1.2280) lr 1.9298e-03 eta 1 day, 19:52:50
epoch [12/50] batch [720/2000] time 2.055 (2.043) data 0.000 (0.001) loss 0.3481 (1.2327) lr 1.9298e-03 eta 1 day, 19:51:47
epoch [12/50] batch [740/2000] time 2.029 (2.043) data 0.000 (0.001) loss 1.2687 (1.2269) lr 1.9298e-03 eta 1 day, 19:50:19
epoch [12/50] batch [760/2000] time 2.050 (2.042) data 0.000 (0.001) loss 0.8946 (1.2307) lr 1.9298e-03 eta 1 day, 19:49:15
epoch [12/50] batch [780/2000] time 2.052 (2.042) data 0.000 (0.001) loss 2.1570 (1.2340) lr 1.9298e-03 eta 1 day, 19:48:16
epoch [12/50] batch [800/2000] time 2.061 (2.042) data 0.000 (0.001) loss 0.1234 (1.2262) lr 1.9298e-03 eta 1 day, 19:46:58
epoch [12/50] batch [820/2000] time 2.006 (2.041) data 0.000 (0.001) loss 0.4298 (1.2324) lr 1.9298e-03 eta 1 day, 19:45:57
epoch [12/50] batch [840/2000] time 1.996 (2.041) data 0.000 (0.001) loss 0.5007 (1.2320) lr 1.9298e-03 eta 1 day, 19:44:58
epoch [12/50] batch [860/2000] time 2.051 (2.041) data 0.000 (0.001) loss 3.1976 (1.2296) lr 1.9298e-03 eta 1 day, 19:43:49
epoch [12/50] batch [880/2000] time 1.976 (2.041) data 0.000 (0.001) loss 0.2039 (1.2222) lr 1.9298e-03 eta 1 day, 19:42:46
epoch [12/50] batch [900/2000] time 2.059 (2.040) data 0.000 (0.001) loss 0.4235 (1.2276) lr 1.9298e-03 eta 1 day, 19:41:56
epoch [12/50] batch [920/2000] time 2.007 (2.040) data 0.000 (0.001) loss 1.0298 (1.2245) lr 1.9298e-03 eta 1 day, 19:41:04
epoch [12/50] batch [940/2000] time 2.001 (2.040) data 0.000 (0.001) loss 1.0724 (1.2243) lr 1.9298e-03 eta 1 day, 19:40:15
epoch [12/50] batch [960/2000] time 2.061 (2.040) data 0.000 (0.001) loss 1.1181 (1.2240) lr 1.9298e-03 eta 1 day, 19:39:38
epoch [12/50] batch [980/2000] time 2.059 (2.040) data 0.000 (0.001) loss 0.6697 (1.2227) lr 1.9298e-03 eta 1 day, 19:38:54
epoch [12/50] batch [1000/2000] time 2.038 (2.040) data 0.000 (0.001) loss 0.5805 (1.2237) lr 1.9298e-03 eta 1 day, 19:37:54
epoch [12/50] batch [1020/2000] time 2.033 (2.040) data 0.000 (0.001) loss 0.5202 (1.2189) lr 1.9298e-03 eta 1 day, 19:37:17
epoch [12/50] batch [1040/2000] time 2.057 (2.040) data 0.000 (0.001) loss 1.1970 (1.2215) lr 1.9298e-03 eta 1 day, 19:36:24
epoch [12/50] batch [1060/2000] time 2.057 (2.040) data 0.000 (0.001) loss 0.4964 (1.2228) lr 1.9298e-03 eta 1 day, 19:35:43
epoch [12/50] batch [1080/2000] time 2.062 (2.040) data 0.000 (0.001) loss 0.1033 (1.2149) lr 1.9298e-03 eta 1 day, 19:34:55
epoch [12/50] batch [1100/2000] time 2.062 (2.040) data 0.000 (0.001) loss 0.0913 (1.2214) lr 1.9298e-03 eta 1 day, 19:34:16
epoch [12/50] batch [1120/2000] time 1.978 (2.040) data 0.000 (0.001) loss 1.5563 (1.2205) lr 1.9298e-03 eta 1 day, 19:33:26
epoch [12/50] batch [1140/2000] time 2.000 (2.039) data 0.001 (0.001) loss 1.0248 (1.2219) lr 1.9298e-03 eta 1 day, 19:32:34
epoch [12/50] batch [1160/2000] time 2.032 (2.039) data 0.000 (0.001) loss 2.5481 (1.2277) lr 1.9298e-03 eta 1 day, 19:31:47
epoch [12/50] batch [1180/2000] time 1.999 (2.039) data 0.000 (0.001) loss 0.4821 (1.2334) lr 1.9298e-03 eta 1 day, 19:30:47
epoch [12/50] batch [1200/2000] time 2.062 (2.039) data 0.000 (0.001) loss 0.1403 (1.2322) lr 1.9298e-03 eta 1 day, 19:30:05
epoch [12/50] batch [1220/2000] time 2.059 (2.039) data 0.000 (0.001) loss 1.9435 (1.2370) lr 1.9298e-03 eta 1 day, 19:29:13
epoch [12/50] batch [1240/2000] time 2.031 (2.039) data 0.000 (0.001) loss 0.6708 (1.2352) lr 1.9298e-03 eta 1 day, 19:28:15
epoch [12/50] batch [1260/2000] time 2.000 (2.039) data 0.000 (0.001) loss 3.2416 (1.2397) lr 1.9298e-03 eta 1 day, 19:27:22
epoch [12/50] batch [1280/2000] time 2.038 (2.039) data 0.000 (0.001) loss 1.9271 (1.2402) lr 1.9298e-03 eta 1 day, 19:26:42
epoch [12/50] batch [1300/2000] time 2.057 (2.039) data 0.000 (0.001) loss 1.1884 (1.2359) lr 1.9298e-03 eta 1 day, 19:26:04
epoch [12/50] batch [1320/2000] time 2.057 (2.039) data 0.000 (0.001) loss 0.6404 (1.2341) lr 1.9298e-03 eta 1 day, 19:25:17
epoch [12/50] batch [1340/2000] time 2.034 (2.038) data 0.000 (0.001) loss 1.8256 (1.2326) lr 1.9298e-03 eta 1 day, 19:24:20
epoch [12/50] batch [1360/2000] time 2.057 (2.038) data 0.000 (0.001) loss 1.1628 (1.2344) lr 1.9298e-03 eta 1 day, 19:23:39
epoch [12/50] batch [1380/2000] time 2.060 (2.038) data 0.000 (0.001) loss 0.2745 (1.2287) lr 1.9298e-03 eta 1 day, 19:22:53
epoch [12/50] batch [1400/2000] time 2.031 (2.038) data 0.000 (0.001) loss 0.4573 (1.2273) lr 1.9298e-03 eta 1 day, 19:22:08
epoch [12/50] batch [1420/2000] time 2.060 (2.038) data 0.000 (0.001) loss 0.9970 (1.2271) lr 1.9298e-03 eta 1 day, 19:21:23
epoch [12/50] batch [1440/2000] time 2.034 (2.038) data 0.000 (0.001) loss 0.2548 (1.2267) lr 1.9298e-03 eta 1 day, 19:20:45
epoch [12/50] batch [1460/2000] time 2.004 (2.038) data 0.000 (0.001) loss 2.2047 (1.2271) lr 1.9298e-03 eta 1 day, 19:20:05
epoch [12/50] batch [1480/2000] time 2.005 (2.038) data 0.000 (0.001) loss 1.4131 (1.2319) lr 1.9298e-03 eta 1 day, 19:19:29
epoch [12/50] batch [1500/2000] time 2.061 (2.038) data 0.000 (0.001) loss 1.4819 (1.2315) lr 1.9298e-03 eta 1 day, 19:18:43
epoch [12/50] batch [1520/2000] time 2.058 (2.038) data 0.000 (0.001) loss 1.1233 (1.2348) lr 1.9298e-03 eta 1 day, 19:18:01
epoch [12/50] batch [1540/2000] time 2.059 (2.038) data 0.000 (0.001) loss 0.3131 (1.2323) lr 1.9298e-03 eta 1 day, 19:17:18
epoch [12/50] batch [1560/2000] time 2.059 (2.038) data 0.000 (0.001) loss 0.0500 (1.2334) lr 1.9298e-03 eta 1 day, 19:16:33
epoch [12/50] batch [1580/2000] time 2.003 (2.038) data 0.000 (0.001) loss 1.0647 (1.2313) lr 1.9298e-03 eta 1 day, 19:15:40
epoch [12/50] batch [1600/2000] time 2.033 (2.038) data 0.000 (0.001) loss 0.1899 (1.2324) lr 1.9298e-03 eta 1 day, 19:14:56
epoch [12/50] batch [1620/2000] time 2.058 (2.038) data 0.000 (0.001) loss 1.6890 (1.2323) lr 1.9298e-03 eta 1 day, 19:14:17
epoch [12/50] batch [1640/2000] time 1.997 (2.038) data 0.000 (0.001) loss 1.0688 (1.2353) lr 1.9298e-03 eta 1 day, 19:13:40
epoch [12/50] batch [1660/2000] time 2.062 (2.038) data 0.000 (0.001) loss 2.5498 (1.2365) lr 1.9298e-03 eta 1 day, 19:12:56
epoch [12/50] batch [1680/2000] time 2.037 (2.038) data 0.002 (0.001) loss 1.7188 (1.2350) lr 1.9298e-03 eta 1 day, 19:12:09
epoch [12/50] batch [1700/2000] time 2.057 (2.038) data 0.000 (0.001) loss 0.8038 (1.2343) lr 1.9298e-03 eta 1 day, 19:11:24
epoch [12/50] batch [1720/2000] time 2.061 (2.038) data 0.000 (0.001) loss 0.0857 (1.2334) lr 1.9298e-03 eta 1 day, 19:10:43
epoch [12/50] batch [1740/2000] time 2.058 (2.038) data 0.000 (0.001) loss 0.7185 (1.2307) lr 1.9298e-03 eta 1 day, 19:10:07
epoch [12/50] batch [1760/2000] time 2.002 (2.038) data 0.000 (0.001) loss 0.2564 (1.2316) lr 1.9298e-03 eta 1 day, 19:09:23
epoch [12/50] batch [1780/2000] time 2.059 (2.038) data 0.000 (0.001) loss 0.1067 (1.2355) lr 1.9298e-03 eta 1 day, 19:08:42
epoch [12/50] batch [1800/2000] time 2.060 (2.038) data 0.000 (0.001) loss 1.7163 (1.2332) lr 1.9298e-03 eta 1 day, 19:07:57
epoch [12/50] batch [1820/2000] time 2.056 (2.038) data 0.000 (0.001) loss 0.9304 (1.2358) lr 1.9298e-03 eta 1 day, 19:07:15
epoch [12/50] batch [1840/2000] time 1.977 (2.038) data 0.000 (0.001) loss 0.7638 (1.2391) lr 1.9298e-03 eta 1 day, 19:06:27
epoch [12/50] batch [1860/2000] time 2.001 (2.038) data 0.000 (0.001) loss 2.0697 (1.2388) lr 1.9298e-03 eta 1 day, 19:05:37
epoch [12/50] batch [1880/2000] time 2.058 (2.037) data 0.000 (0.001) loss 0.9786 (1.2401) lr 1.9298e-03 eta 1 day, 19:04:52
epoch [12/50] batch [1900/2000] time 2.062 (2.037) data 0.000 (0.001) loss 1.1404 (1.2419) lr 1.9298e-03 eta 1 day, 19:04:13
epoch [12/50] batch [1920/2000] time 2.035 (2.037) data 0.000 (0.001) loss 0.9258 (1.2438) lr 1.9298e-03 eta 1 day, 19:03:26
epoch [12/50] batch [1940/2000] time 2.056 (2.037) data 0.000 (0.001) loss 2.4280 (1.2479) lr 1.9298e-03 eta 1 day, 19:02:39
epoch [12/50] batch [1960/2000] time 2.009 (2.037) data 0.000 (0.001) loss 0.0349 (1.2452) lr 1.9298e-03 eta 1 day, 19:01:55
epoch [12/50] batch [1980/2000] time 2.058 (2.037) data 0.000 (0.001) loss 1.0013 (1.2440) lr 1.9298e-03 eta 1 day, 19:01:11
epoch [12/50] batch [2000/2000] time 2.054 (2.037) data 0.000 (0.001) loss 2.4405 (1.2445) lr 1.9048e-03 eta 1 day, 19:00:26
epoch [13/50] batch [20/2000] time 2.002 (2.064) data 0.000 (0.034) loss 0.7271 (1.0705) lr 1.9048e-03 eta 1 day, 19:33:52
epoch [13/50] batch [40/2000] time 2.059 (2.050) data 0.000 (0.017) loss 0.4092 (1.2414) lr 1.9048e-03 eta 1 day, 19:15:29
epoch [13/50] batch [60/2000] time 2.034 (2.045) data 0.001 (0.012) loss 0.7348 (1.2738) lr 1.9048e-03 eta 1 day, 19:08:02
epoch [13/50] batch [80/2000] time 2.057 (2.042) data 0.000 (0.009) loss 0.5837 (1.2477) lr 1.9048e-03 eta 1 day, 19:03:30
epoch [13/50] batch [100/2000] time 2.062 (2.041) data 0.000 (0.007) loss 0.7330 (1.2385) lr 1.9048e-03 eta 1 day, 19:02:05
epoch [13/50] batch [120/2000] time 2.058 (2.039) data 0.000 (0.006) loss 2.2074 (1.1965) lr 1.9048e-03 eta 1 day, 18:58:47
epoch [13/50] batch [140/2000] time 2.057 (2.038) data 0.000 (0.005) loss 0.7844 (1.2109) lr 1.9048e-03 eta 1 day, 18:56:33
epoch [13/50] batch [160/2000] time 2.031 (2.037) data 0.000 (0.005) loss 0.8540 (1.2579) lr 1.9048e-03 eta 1 day, 18:55:20
epoch [13/50] batch [180/2000] time 2.060 (2.037) data 0.000 (0.004) loss 2.3047 (1.2658) lr 1.9048e-03 eta 1 day, 18:54:39
epoch [13/50] batch [200/2000] time 2.058 (2.038) data 0.000 (0.004) loss 1.3726 (1.2591) lr 1.9048e-03 eta 1 day, 18:54:13
epoch [13/50] batch [220/2000] time 2.001 (2.038) data 0.004 (0.003) loss 0.9280 (1.2597) lr 1.9048e-03 eta 1 day, 18:53:53
epoch [13/50] batch [240/2000] time 2.065 (2.038) data 0.000 (0.003) loss 0.9613 (1.2497) lr 1.9048e-03 eta 1 day, 18:53:11
epoch [13/50] batch [260/2000] time 2.057 (2.038) data 0.000 (0.003) loss 0.7089 (1.2230) lr 1.9048e-03 eta 1 day, 18:52:01
epoch [13/50] batch [280/2000] time 2.057 (2.037) data 0.000 (0.003) loss 0.3606 (1.2151) lr 1.9048e-03 eta 1 day, 18:51:10
epoch [13/50] batch [300/2000] time 2.057 (2.037) data 0.000 (0.003) loss 0.3558 (1.2202) lr 1.9048e-03 eta 1 day, 18:49:56
epoch [13/50] batch [320/2000] time 2.056 (2.037) data 0.000 (0.002) loss 3.3125 (1.2252) lr 1.9048e-03 eta 1 day, 18:48:59
epoch [13/50] batch [340/2000] time 2.034 (2.037) data 0.000 (0.002) loss 1.6287 (1.2336) lr 1.9048e-03 eta 1 day, 18:48:07
epoch [13/50] batch [360/2000] time 2.002 (2.037) data 0.000 (0.002) loss 0.7026 (1.2250) lr 1.9048e-03 eta 1 day, 18:47:28
epoch [13/50] batch [380/2000] time 2.003 (2.036) data 0.000 (0.002) loss 0.9951 (1.2209) lr 1.9048e-03 eta 1 day, 18:46:31
epoch [13/50] batch [400/2000] time 2.058 (2.036) data 0.000 (0.002) loss 0.8998 (1.2080) lr 1.9048e-03 eta 1 day, 18:45:54
epoch [13/50] batch [420/2000] time 2.057 (2.036) data 0.000 (0.002) loss 0.7432 (1.1999) lr 1.9048e-03 eta 1 day, 18:45:10
epoch [13/50] batch [440/2000] time 2.059 (2.036) data 0.000 (0.002) loss 0.6257 (1.2024) lr 1.9048e-03 eta 1 day, 18:44:34
epoch [13/50] batch [460/2000] time 2.060 (2.036) data 0.000 (0.002) loss 0.5375 (1.2164) lr 1.9048e-03 eta 1 day, 18:43:46
epoch [13/50] batch [480/2000] time 2.040 (2.036) data 0.000 (0.002) loss 1.7341 (1.2179) lr 1.9048e-03 eta 1 day, 18:43:09
epoch [13/50] batch [500/2000] time 2.001 (2.036) data 0.000 (0.002) loss 1.9036 (1.2284) lr 1.9048e-03 eta 1 day, 18:42:20
epoch [13/50] batch [520/2000] time 2.057 (2.036) data 0.000 (0.002) loss 2.4405 (1.2341) lr 1.9048e-03 eta 1 day, 18:41:27
epoch [13/50] batch [540/2000] time 2.035 (2.036) data 0.000 (0.002) loss 1.0357 (1.2323) lr 1.9048e-03 eta 1 day, 18:40:56
epoch [13/50] batch [560/2000] time 2.059 (2.036) data 0.000 (0.001) loss 0.2763 (1.2387) lr 1.9048e-03 eta 1 day, 18:40:11
epoch [13/50] batch [580/2000] time 2.037 (2.036) data 0.000 (0.001) loss 2.2871 (1.2405) lr 1.9048e-03 eta 1 day, 18:39:30
epoch [13/50] batch [600/2000] time 2.035 (2.036) data 0.001 (0.001) loss 1.1479 (1.2394) lr 1.9048e-03 eta 1 day, 18:38:45
epoch [13/50] batch [620/2000] time 2.035 (2.036) data 0.000 (0.001) loss 0.0911 (1.2342) lr 1.9048e-03 eta 1 day, 18:37:53
epoch [13/50] batch [640/2000] time 2.000 (2.036) data 0.000 (0.001) loss 0.1997 (1.2296) lr 1.9048e-03 eta 1 day, 18:37:00
epoch [13/50] batch [660/2000] time 2.035 (2.036) data 0.000 (0.001) loss 0.5563 (1.2204) lr 1.9048e-03 eta 1 day, 18:36:23
epoch [13/50] batch [680/2000] time 2.056 (2.036) data 0.000 (0.001) loss 1.2258 (1.2133) lr 1.9048e-03 eta 1 day, 18:35:30
epoch [13/50] batch [700/2000] time 2.035 (2.035) data 0.000 (0.001) loss 0.3039 (1.2169) lr 1.9048e-03 eta 1 day, 18:34:31
epoch [13/50] batch [720/2000] time 2.007 (2.035) data 0.000 (0.001) loss 0.4615 (1.2221) lr 1.9048e-03 eta 1 day, 18:33:41
epoch [13/50] batch [740/2000] time 2.034 (2.035) data 0.000 (0.001) loss 1.9717 (1.2307) lr 1.9048e-03 eta 1 day, 18:33:08
epoch [13/50] batch [760/2000] time 2.033 (2.036) data 0.000 (0.001) loss 1.9829 (1.2214) lr 1.9048e-03 eta 1 day, 18:32:39
epoch [13/50] batch [780/2000] time 2.053 (2.036) data 0.000 (0.001) loss 0.2280 (1.2180) lr 1.9048e-03 eta 1 day, 18:31:50
epoch [13/50] batch [800/2000] time 2.054 (2.035) data 0.000 (0.001) loss 0.4277 (1.2223) lr 1.9048e-03 eta 1 day, 18:30:42
epoch [13/50] batch [820/2000] time 2.057 (2.035) data 0.000 (0.001) loss 1.4362 (1.2235) lr 1.9048e-03 eta 1 day, 18:29:53
epoch [13/50] batch [840/2000] time 2.008 (2.035) data 0.000 (0.001) loss 2.2693 (1.2244) lr 1.9048e-03 eta 1 day, 18:29:07
epoch [13/50] batch [860/2000] time 2.059 (2.035) data 0.000 (0.001) loss 1.4481 (1.2250) lr 1.9048e-03 eta 1 day, 18:28:30
epoch [13/50] batch [880/2000] time 1.999 (2.035) data 0.000 (0.001) loss 3.0359 (1.2292) lr 1.9048e-03 eta 1 day, 18:27:52
epoch [13/50] batch [900/2000] time 1.999 (2.035) data 0.000 (0.001) loss 0.9472 (1.2328) lr 1.9048e-03 eta 1 day, 18:27:11
epoch [13/50] batch [920/2000] time 2.034 (2.035) data 0.000 (0.001) loss 1.6348 (1.2357) lr 1.9048e-03 eta 1 day, 18:26:29
epoch [13/50] batch [940/2000] time 2.033 (2.035) data 0.000 (0.001) loss 0.4718 (1.2300) lr 1.9048e-03 eta 1 day, 18:25:36
epoch [13/50] batch [960/2000] time 2.061 (2.035) data 0.000 (0.001) loss 0.4514 (1.2369) lr 1.9048e-03 eta 1 day, 18:25:01
epoch [13/50] batch [980/2000] time 2.059 (2.035) data 0.000 (0.001) loss 0.2959 (1.2332) lr 1.9048e-03 eta 1 day, 18:24:25
epoch [13/50] batch [1000/2000] time 2.066 (2.035) data 0.000 (0.001) loss 0.9788 (1.2351) lr 1.9048e-03 eta 1 day, 18:23:50
epoch [13/50] batch [1020/2000] time 2.001 (2.035) data 0.000 (0.001) loss 1.8525 (1.2363) lr 1.9048e-03 eta 1 day, 18:23:13
epoch [13/50] batch [1040/2000] time 2.060 (2.035) data 0.000 (0.001) loss 2.4138 (1.2327) lr 1.9048e-03 eta 1 day, 18:22:35
epoch [13/50] batch [1060/2000] time 2.033 (2.035) data 0.000 (0.001) loss 0.4413 (1.2292) lr 1.9048e-03 eta 1 day, 18:21:55
epoch [13/50] batch [1080/2000] time 2.031 (2.035) data 0.000 (0.001) loss 0.0936 (1.2248) lr 1.9048e-03 eta 1 day, 18:21:18
epoch [13/50] batch [1100/2000] time 2.003 (2.035) data 0.000 (0.001) loss 0.3987 (1.2254) lr 1.9048e-03 eta 1 day, 18:20:33
epoch [13/50] batch [1120/2000] time 1.997 (2.035) data 0.000 (0.001) loss 1.6572 (1.2226) lr 1.9048e-03 eta 1 day, 18:19:45
epoch [13/50] batch [1140/2000] time 2.034 (2.035) data 0.001 (0.001) loss 1.1679 (1.2256) lr 1.9048e-03 eta 1 day, 18:18:54
epoch [13/50] batch [1160/2000] time 2.058 (2.035) data 0.000 (0.001) loss 2.9995 (1.2215) lr 1.9048e-03 eta 1 day, 18:18:18
epoch [13/50] batch [1180/2000] time 2.061 (2.035) data 0.000 (0.001) loss 1.2632 (1.2172) lr 1.9048e-03 eta 1 day, 18:17:39
epoch [13/50] batch [1200/2000] time 2.058 (2.035) data 0.000 (0.001) loss 1.0117 (1.2216) lr 1.9048e-03 eta 1 day, 18:17:02
epoch [13/50] batch [1220/2000] time 2.034 (2.035) data 0.000 (0.001) loss 0.3741 (1.2183) lr 1.9048e-03 eta 1 day, 18:16:14
epoch [13/50] batch [1240/2000] time 2.034 (2.035) data 0.000 (0.001) loss 0.6595 (1.2188) lr 1.9048e-03 eta 1 day, 18:15:33
epoch [13/50] batch [1260/2000] time 1.975 (2.035) data 0.000 (0.001) loss 2.2333 (1.2241) lr 1.9048e-03 eta 1 day, 18:14:44
epoch [13/50] batch [1280/2000] time 2.032 (2.035) data 0.000 (0.001) loss 1.5397 (1.2253) lr 1.9048e-03 eta 1 day, 18:14:05
epoch [13/50] batch [1300/2000] time 2.056 (2.035) data 0.000 (0.001) loss 1.9840 (1.2299) lr 1.9048e-03 eta 1 day, 18:13:19
epoch [13/50] batch [1320/2000] time 2.056 (2.035) data 0.000 (0.001) loss 0.1253 (1.2232) lr 1.9048e-03 eta 1 day, 18:12:40
epoch [13/50] batch [1340/2000] time 1.999 (2.035) data 0.000 (0.001) loss 3.2568 (1.2256) lr 1.9048e-03 eta 1 day, 18:11:52
epoch [13/50] batch [1360/2000] time 2.056 (2.035) data 0.000 (0.001) loss 1.7151 (1.2273) lr 1.9048e-03 eta 1 day, 18:11:08
epoch [13/50] batch [1380/2000] time 2.064 (2.035) data 0.000 (0.001) loss 1.0161 (1.2294) lr 1.9048e-03 eta 1 day, 18:10:22
epoch [13/50] batch [1400/2000] time 2.056 (2.035) data 0.000 (0.001) loss 1.6497 (1.2316) lr 1.9048e-03 eta 1 day, 18:09:39
epoch [13/50] batch [1420/2000] time 1.974 (2.034) data 0.000 (0.001) loss 2.7791 (1.2360) lr 1.9048e-03 eta 1 day, 18:08:47
epoch [13/50] batch [1440/2000] time 2.032 (2.034) data 0.000 (0.001) loss 1.4958 (1.2372) lr 1.9048e-03 eta 1 day, 18:08:11
epoch [13/50] batch [1460/2000] time 2.030 (2.034) data 0.000 (0.001) loss 0.8475 (1.2394) lr 1.9048e-03 eta 1 day, 18:07:26
epoch [13/50] batch [1480/2000] time 2.033 (2.034) data 0.000 (0.001) loss 0.5087 (1.2410) lr 1.9048e-03 eta 1 day, 18:06:48
epoch [13/50] batch [1500/2000] time 2.036 (2.034) data 0.000 (0.001) loss 0.2983 (1.2419) lr 1.9048e-03 eta 1 day, 18:06:02
epoch [13/50] batch [1520/2000] time 2.041 (2.035) data 0.000 (0.001) loss 1.7997 (1.2405) lr 1.9048e-03 eta 1 day, 18:05:31
epoch [13/50] batch [1540/2000] time 2.031 (2.035) data 0.000 (0.001) loss 0.3721 (1.2413) lr 1.9048e-03 eta 1 day, 18:04:57
epoch [13/50] batch [1560/2000] time 2.058 (2.035) data 0.000 (0.001) loss 0.1262 (1.2383) lr 1.9048e-03 eta 1 day, 18:04:08
epoch [13/50] batch [1580/2000] time 2.001 (2.035) data 0.000 (0.001) loss 0.6222 (1.2389) lr 1.9048e-03 eta 1 day, 18:03:28
epoch [13/50] batch [1600/2000] time 2.003 (2.035) data 0.000 (0.001) loss 2.7183 (1.2370) lr 1.9048e-03 eta 1 day, 18:02:53
epoch [13/50] batch [1620/2000] time 2.059 (2.035) data 0.000 (0.001) loss 0.3437 (1.2353) lr 1.9048e-03 eta 1 day, 18:02:14
epoch [13/50] batch [1640/2000] time 2.058 (2.035) data 0.000 (0.001) loss 0.2483 (1.2324) lr 1.9048e-03 eta 1 day, 18:01:31
epoch [13/50] batch [1660/2000] time 2.056 (2.035) data 0.000 (0.001) loss 1.7873 (1.2365) lr 1.9048e-03 eta 1 day, 18:00:53
epoch [13/50] batch [1680/2000] time 2.067 (2.035) data 0.001 (0.001) loss 0.5407 (1.2308) lr 1.9048e-03 eta 1 day, 18:00:14
epoch [13/50] batch [1700/2000] time 2.062 (2.035) data 0.000 (0.001) loss 0.9987 (1.2286) lr 1.9048e-03 eta 1 day, 17:59:33
epoch [13/50] batch [1720/2000] time 2.035 (2.035) data 0.000 (0.001) loss 0.2756 (1.2271) lr 1.9048e-03 eta 1 day, 17:59:00
epoch [13/50] batch [1740/2000] time 1.987 (2.035) data 0.002 (0.001) loss 2.1851 (1.2267) lr 1.9048e-03 eta 1 day, 17:58:13
epoch [13/50] batch [1760/2000] time 2.036 (2.035) data 0.000 (0.001) loss 0.6178 (1.2284) lr 1.9048e-03 eta 1 day, 17:57:35
epoch [13/50] batch [1780/2000] time 2.031 (2.035) data 0.000 (0.001) loss 0.4544 (1.2337) lr 1.9048e-03 eta 1 day, 17:56:56
epoch [13/50] batch [1800/2000] time 1.975 (2.035) data 0.000 (0.001) loss 2.7622 (1.2334) lr 1.9048e-03 eta 1 day, 17:56:19
epoch [13/50] batch [1820/2000] time 2.001 (2.035) data 0.000 (0.001) loss 1.4642 (1.2341) lr 1.9048e-03 eta 1 day, 17:55:39
epoch [13/50] batch [1840/2000] time 2.066 (2.035) data 0.000 (0.001) loss 0.0634 (1.2308) lr 1.9048e-03 eta 1 day, 17:54:57
epoch [13/50] batch [1860/2000] time 2.060 (2.035) data 0.000 (0.001) loss 0.0127 (1.2289) lr 1.9048e-03 eta 1 day, 17:54:15
epoch [13/50] batch [1880/2000] time 2.063 (2.035) data 0.000 (0.001) loss 0.2859 (1.2288) lr 1.9048e-03 eta 1 day, 17:53:38
epoch [13/50] batch [1900/2000] time 2.004 (2.035) data 0.000 (0.001) loss 1.4962 (1.2291) lr 1.9048e-03 eta 1 day, 17:53:00
epoch [13/50] batch [1920/2000] time 2.034 (2.035) data 0.000 (0.001) loss 0.3172 (1.2295) lr 1.9048e-03 eta 1 day, 17:52:26
epoch [13/50] batch [1940/2000] time 2.055 (2.035) data 0.000 (0.001) loss 1.5372 (1.2292) lr 1.9048e-03 eta 1 day, 17:51:46
epoch [13/50] batch [1960/2000] time 2.001 (2.035) data 0.000 (0.001) loss 1.2897 (1.2327) lr 1.9048e-03 eta 1 day, 17:51:03
epoch [13/50] batch [1980/2000] time 2.061 (2.035) data 0.000 (0.001) loss 1.2438 (1.2282) lr 1.9048e-03 eta 1 day, 17:50:24
epoch [13/50] batch [2000/2000] time 2.033 (2.035) data 0.000 (0.001) loss 0.6866 (1.2266) lr 1.8763e-03 eta 1 day, 17:49:37
epoch [14/50] batch [20/2000] time 2.058 (2.069) data 0.000 (0.034) loss 0.4280 (0.8493) lr 1.8763e-03 eta 1 day, 18:31:35
epoch [14/50] batch [40/2000] time 2.046 (2.053) data 0.000 (0.017) loss 0.4632 (0.9664) lr 1.8763e-03 eta 1 day, 18:10:10
epoch [14/50] batch [60/2000] time 2.056 (2.049) data 0.000 (0.012) loss 2.8231 (1.1552) lr 1.8763e-03 eta 1 day, 18:04:41
epoch [14/50] batch [80/2000] time 2.055 (2.045) data 0.000 (0.009) loss 1.1255 (1.1091) lr 1.8763e-03 eta 1 day, 17:59:50
epoch [14/50] batch [100/2000] time 2.002 (2.042) data 0.000 (0.007) loss 1.3554 (1.1354) lr 1.8763e-03 eta 1 day, 17:55:26
epoch [14/50] batch [120/2000] time 2.033 (2.041) data 0.000 (0.006) loss 2.1933 (1.2555) lr 1.8763e-03 eta 1 day, 17:53:37
epoch [14/50] batch [140/2000] time 2.058 (2.041) data 0.000 (0.005) loss 0.7075 (1.2733) lr 1.8763e-03 eta 1 day, 17:51:55
epoch [14/50] batch [160/2000] time 2.035 (2.040) data 0.000 (0.005) loss 0.7121 (1.2520) lr 1.8763e-03 eta 1 day, 17:50:24
epoch [14/50] batch [180/2000] time 2.057 (2.039) data 0.000 (0.004) loss 1.7969 (1.2361) lr 1.8763e-03 eta 1 day, 17:48:50
epoch [14/50] batch [200/2000] time 2.060 (2.039) data 0.000 (0.004) loss 0.6028 (1.1773) lr 1.8763e-03 eta 1 day, 17:48:07
epoch [14/50] batch [220/2000] time 2.056 (2.039) data 0.000 (0.003) loss 2.1658 (1.1913) lr 1.8763e-03 eta 1 day, 17:47:27
epoch [14/50] batch [240/2000] time 1.998 (2.038) data 0.000 (0.003) loss 2.1272 (1.2131) lr 1.8763e-03 eta 1 day, 17:45:36
epoch [14/50] batch [260/2000] time 1.998 (2.038) data 0.000 (0.003) loss 1.1019 (1.2166) lr 1.8763e-03 eta 1 day, 17:45:02
epoch [14/50] batch [280/2000] time 2.055 (2.039) data 0.000 (0.003) loss 0.8663 (1.2331) lr 1.8763e-03 eta 1 day, 17:44:45
epoch [14/50] batch [300/2000] time 2.032 (2.038) data 0.000 (0.003) loss 0.4899 (1.2219) lr 1.8763e-03 eta 1 day, 17:43:22
epoch [14/50] batch [320/2000] time 2.060 (2.037) data 0.000 (0.002) loss 2.0840 (1.2309) lr 1.8763e-03 eta 1 day, 17:41:42
epoch [14/50] batch [340/2000] time 2.040 (2.037) data 0.000 (0.002) loss 0.8722 (1.2441) lr 1.8763e-03 eta 1 day, 17:40:51
epoch [14/50] batch [360/2000] time 2.034 (2.037) data 0.000 (0.002) loss 1.1280 (1.2404) lr 1.8763e-03 eta 1 day, 17:40:13
epoch [14/50] batch [380/2000] time 2.034 (2.037) data 0.000 (0.002) loss 0.9639 (1.2286) lr 1.8763e-03 eta 1 day, 17:38:55
epoch [14/50] batch [400/2000] time 2.035 (2.036) data 0.000 (0.002) loss 1.4691 (1.2209) lr 1.8763e-03 eta 1 day, 17:37:57
epoch [14/50] batch [420/2000] time 2.033 (2.036) data 0.000 (0.002) loss 1.5087 (1.2232) lr 1.8763e-03 eta 1 day, 17:37:18
epoch [14/50] batch [440/2000] time 2.059 (2.036) data 0.000 (0.002) loss 3.4815 (1.2262) lr 1.8763e-03 eta 1 day, 17:36:24
epoch [14/50] batch [460/2000] time 2.056 (2.036) data 0.000 (0.002) loss 1.7891 (1.2242) lr 1.8763e-03 eta 1 day, 17:35:41
epoch [14/50] batch [480/2000] time 2.034 (2.036) data 0.000 (0.002) loss 0.9471 (1.2181) lr 1.8763e-03 eta 1 day, 17:34:54
epoch [14/50] batch [500/2000] time 2.055 (2.036) data 0.000 (0.002) loss 0.1281 (1.2092) lr 1.8763e-03 eta 1 day, 17:34:13
epoch [14/50] batch [520/2000] time 2.037 (2.036) data 0.000 (0.002) loss 0.8354 (1.2048) lr 1.8763e-03 eta 1 day, 17:33:42
epoch [14/50] batch [540/2000] time 2.003 (2.036) data 0.000 (0.002) loss 0.1385 (1.1983) lr 1.8763e-03 eta 1 day, 17:33:01
epoch [14/50] batch [560/2000] time 2.004 (2.036) data 0.000 (0.001) loss 1.2578 (1.1980) lr 1.8763e-03 eta 1 day, 17:32:12
epoch [14/50] batch [580/2000] time 2.005 (2.036) data 0.000 (0.001) loss 0.7368 (1.1999) lr 1.8763e-03 eta 1 day, 17:31:35
epoch [14/50] batch [600/2000] time 2.058 (2.036) data 0.001 (0.001) loss 0.4649 (1.1902) lr 1.8763e-03 eta 1 day, 17:30:50
epoch [14/50] batch [620/2000] time 2.061 (2.036) data 0.000 (0.001) loss 0.5800 (1.1855) lr 1.8763e-03 eta 1 day, 17:30:24
epoch [14/50] batch [640/2000] time 2.062 (2.036) data 0.000 (0.001) loss 0.8066 (1.1891) lr 1.8763e-03 eta 1 day, 17:29:46
epoch [14/50] batch [660/2000] time 2.057 (2.036) data 0.000 (0.001) loss 0.4811 (1.1939) lr 1.8763e-03 eta 1 day, 17:28:53
epoch [14/50] batch [680/2000] time 2.005 (2.036) data 0.000 (0.001) loss 1.6485 (1.1928) lr 1.8763e-03 eta 1 day, 17:28:02
epoch [14/50] batch [700/2000] time 2.058 (2.036) data 0.000 (0.001) loss 1.1713 (1.1833) lr 1.8763e-03 eta 1 day, 17:27:24
epoch [14/50] batch [720/2000] time 1.999 (2.036) data 0.000 (0.001) loss 1.5059 (1.1886) lr 1.8763e-03 eta 1 day, 17:26:46
epoch [14/50] batch [740/2000] time 2.058 (2.036) data 0.000 (0.001) loss 1.9105 (1.1934) lr 1.8763e-03 eta 1 day, 17:26:15
epoch [14/50] batch [760/2000] time 1.980 (2.036) data 0.000 (0.001) loss 0.2798 (1.1864) lr 1.8763e-03 eta 1 day, 17:25:14
epoch [14/50] batch [780/2000] time 2.033 (2.036) data 0.000 (0.001) loss 0.5834 (1.1878) lr 1.8763e-03 eta 1 day, 17:24:37
epoch [14/50] batch [800/2000] time 2.003 (2.036) data 0.000 (0.001) loss 3.4073 (1.1974) lr 1.8763e-03 eta 1 day, 17:23:48
epoch [14/50] batch [820/2000] time 2.057 (2.036) data 0.000 (0.001) loss 3.0526 (1.2072) lr 1.8763e-03 eta 1 day, 17:23:03
epoch [14/50] batch [840/2000] time 2.059 (2.036) data 0.000 (0.001) loss 1.8895 (1.2036) lr 1.8763e-03 eta 1 day, 17:22:20
epoch [14/50] batch [860/2000] time 2.058 (2.036) data 0.000 (0.001) loss 1.2735 (1.2004) lr 1.8763e-03 eta 1 day, 17:21:40
epoch [14/50] batch [880/2000] time 2.035 (2.036) data 0.000 (0.001) loss 0.4211 (1.1996) lr 1.8763e-03 eta 1 day, 17:20:46
epoch [14/50] batch [900/2000] time 2.003 (2.036) data 0.000 (0.001) loss 2.2730 (1.1979) lr 1.8763e-03 eta 1 day, 17:20:05
epoch [14/50] batch [920/2000] time 2.061 (2.036) data 0.000 (0.001) loss 0.9383 (1.2006) lr 1.8763e-03 eta 1 day, 17:19:28
epoch [14/50] batch [940/2000] time 2.041 (2.036) data 0.001 (0.001) loss 0.6867 (1.1977) lr 1.8763e-03 eta 1 day, 17:18:53
epoch [14/50] batch [960/2000] time 2.066 (2.036) data 0.000 (0.001) loss 0.8221 (1.1998) lr 1.8763e-03 eta 1 day, 17:18:24
epoch [14/50] batch [980/2000] time 2.041 (2.036) data 0.000 (0.001) loss 0.4901 (1.2014) lr 1.8763e-03 eta 1 day, 17:17:59
epoch [14/50] batch [1000/2000] time 2.063 (2.036) data 0.000 (0.001) loss 1.0062 (1.2021) lr 1.8763e-03 eta 1 day, 17:17:26
epoch [14/50] batch [1020/2000] time 2.061 (2.036) data 0.000 (0.001) loss 0.8258 (1.2053) lr 1.8763e-03 eta 1 day, 17:16:48
epoch [14/50] batch [1040/2000] time 1.980 (2.036) data 0.000 (0.001) loss 1.1424 (1.2086) lr 1.8763e-03 eta 1 day, 17:16:11
epoch [14/50] batch [1060/2000] time 2.059 (2.036) data 0.000 (0.001) loss 2.7191 (1.2117) lr 1.8763e-03 eta 1 day, 17:15:34
epoch [14/50] batch [1080/2000] time 2.002 (2.036) data 0.000 (0.001) loss 0.1408 (1.2083) lr 1.8763e-03 eta 1 day, 17:14:45
epoch [14/50] batch [1100/2000] time 2.038 (2.036) data 0.000 (0.001) loss 0.3909 (1.2025) lr 1.8763e-03 eta 1 day, 17:14:07
epoch [14/50] batch [1120/2000] time 2.036 (2.037) data 0.000 (0.001) loss 0.4421 (1.1997) lr 1.8763e-03 eta 1 day, 17:13:40
epoch [14/50] batch [1140/2000] time 2.060 (2.037) data 0.001 (0.001) loss 0.2074 (1.1964) lr 1.8763e-03 eta 1 day, 17:12:59
epoch [14/50] batch [1160/2000] time 2.058 (2.037) data 0.000 (0.001) loss 3.8512 (1.1974) lr 1.8763e-03 eta 1 day, 17:12:19
epoch [14/50] batch [1180/2000] time 2.064 (2.037) data 0.000 (0.001) loss 0.7155 (1.2014) lr 1.8763e-03 eta 1 day, 17:11:46
epoch [14/50] batch [1200/2000] time 2.058 (2.037) data 0.000 (0.001) loss 1.6530 (1.2017) lr 1.8763e-03 eta 1 day, 17:11:06
epoch [14/50] batch [1220/2000] time 2.002 (2.037) data 0.000 (0.001) loss 1.2857 (1.2017) lr 1.8763e-03 eta 1 day, 17:10:26
epoch [14/50] batch [1240/2000] time 2.000 (2.037) data 0.000 (0.001) loss 0.5919 (1.2041) lr 1.8763e-03 eta 1 day, 17:09:37
epoch [14/50] batch [1260/2000] time 2.065 (2.036) data 0.000 (0.001) loss 1.7636 (1.2053) lr 1.8763e-03 eta 1 day, 17:08:54
epoch [14/50] batch [1280/2000] time 2.035 (2.036) data 0.000 (0.001) loss 1.8088 (1.2093) lr 1.8763e-03 eta 1 day, 17:08:06
epoch [14/50] batch [1300/2000] time 2.005 (2.036) data 0.000 (0.001) loss 1.3992 (1.2079) lr 1.8763e-03 eta 1 day, 17:07:19
epoch [14/50] batch [1320/2000] time 2.042 (2.036) data 0.000 (0.001) loss 1.6035 (1.2082) lr 1.8763e-03 eta 1 day, 17:06:42
epoch [14/50] batch [1340/2000] time 2.006 (2.036) data 0.000 (0.001) loss 1.1820 (1.2123) lr 1.8763e-03 eta 1 day, 17:06:01
epoch [14/50] batch [1360/2000] time 2.053 (2.036) data 0.000 (0.001) loss 1.2331 (1.2095) lr 1.8763e-03 eta 1 day, 17:05:24
epoch [14/50] batch [1380/2000] time 2.054 (2.036) data 0.000 (0.001) loss 1.2185 (1.2094) lr 1.8763e-03 eta 1 day, 17:04:45
epoch [14/50] batch [1400/2000] time 2.050 (2.036) data 0.000 (0.001) loss 1.6147 (1.2060) lr 1.8763e-03 eta 1 day, 17:03:57
epoch [14/50] batch [1420/2000] time 2.028 (2.036) data 0.000 (0.001) loss 1.0736 (1.2132) lr 1.8763e-03 eta 1 day, 17:03:02
epoch [14/50] batch [1440/2000] time 2.056 (2.036) data 0.000 (0.001) loss 1.0074 (1.2146) lr 1.8763e-03 eta 1 day, 17:02:05
epoch [14/50] batch [1460/2000] time 2.028 (2.036) data 0.000 (0.001) loss 2.0041 (1.2119) lr 1.8763e-03 eta 1 day, 17:01:21
epoch [14/50] batch [1480/2000] time 1.977 (2.036) data 0.000 (0.001) loss 3.2492 (1.2112) lr 1.8763e-03 eta 1 day, 17:00:35
epoch [14/50] batch [1500/2000] time 2.027 (2.036) data 0.000 (0.001) loss 1.3523 (1.2083) lr 1.8763e-03 eta 1 day, 16:59:45
epoch [14/50] batch [1520/2000] time 2.028 (2.035) data 0.000 (0.001) loss 1.0128 (1.2055) lr 1.8763e-03 eta 1 day, 16:58:50
epoch [14/50] batch [1540/2000] time 1.998 (2.035) data 0.000 (0.001) loss 1.2231 (1.2037) lr 1.8763e-03 eta 1 day, 16:58:01
epoch [14/50] batch [1560/2000] time 2.052 (2.035) data 0.000 (0.001) loss 0.2620 (1.2061) lr 1.8763e-03 eta 1 day, 16:57:09
epoch [14/50] batch [1580/2000] time 2.031 (2.035) data 0.000 (0.001) loss 0.8388 (1.2063) lr 1.8763e-03 eta 1 day, 16:56:22
epoch [14/50] batch [1600/2000] time 2.061 (2.035) data 0.000 (0.001) loss 0.3398 (1.2018) lr 1.8763e-03 eta 1 day, 16:55:34
epoch [14/50] batch [1620/2000] time 2.052 (2.035) data 0.000 (0.001) loss 0.1967 (1.2026) lr 1.8763e-03 eta 1 day, 16:54:53
epoch [14/50] batch [1640/2000] time 2.056 (2.035) data 0.000 (0.001) loss 1.5181 (1.1996) lr 1.8763e-03 eta 1 day, 16:54:13
epoch [14/50] batch [1660/2000] time 1.999 (2.035) data 0.000 (0.001) loss 1.6293 (1.1983) lr 1.8763e-03 eta 1 day, 16:53:34
epoch [14/50] batch [1680/2000] time 2.055 (2.035) data 0.001 (0.001) loss 0.5829 (1.1967) lr 1.8763e-03 eta 1 day, 16:52:52
epoch [14/50] batch [1700/2000] time 2.055 (2.035) data 0.000 (0.001) loss 0.4418 (1.1952) lr 1.8763e-03 eta 1 day, 16:52:12
epoch [14/50] batch [1720/2000] time 2.000 (2.035) data 0.000 (0.001) loss 0.3179 (1.1952) lr 1.8763e-03 eta 1 day, 16:51:23
epoch [14/50] batch [1740/2000] time 2.053 (2.035) data 0.000 (0.001) loss 0.6982 (1.1948) lr 1.8763e-03 eta 1 day, 16:50:40
epoch [14/50] batch [1760/2000] time 2.051 (2.035) data 0.000 (0.001) loss 0.3559 (1.1949) lr 1.8763e-03 eta 1 day, 16:49:48
epoch [14/50] batch [1780/2000] time 2.058 (2.035) data 0.000 (0.001) loss 0.3830 (1.1956) lr 1.8763e-03 eta 1 day, 16:49:08
epoch [14/50] batch [1800/2000] time 2.054 (2.035) data 0.000 (0.001) loss 2.1794 (1.1974) lr 1.8763e-03 eta 1 day, 16:48:22
epoch [14/50] batch [1820/2000] time 2.029 (2.035) data 0.000 (0.001) loss 1.0281 (1.1957) lr 1.8763e-03 eta 1 day, 16:47:35
epoch [14/50] batch [1840/2000] time 2.057 (2.035) data 0.000 (0.001) loss 0.4423 (1.1984) lr 1.8763e-03 eta 1 day, 16:46:55
epoch [14/50] batch [1860/2000] time 2.056 (2.035) data 0.000 (0.001) loss 1.6589 (1.2027) lr 1.8763e-03 eta 1 day, 16:46:12
epoch [14/50] batch [1880/2000] time 2.056 (2.035) data 0.000 (0.001) loss 0.8529 (1.2009) lr 1.8763e-03 eta 1 day, 16:45:34
epoch [14/50] batch [1900/2000] time 2.059 (2.035) data 0.000 (0.001) loss 0.1346 (1.2014) lr 1.8763e-03 eta 1 day, 16:44:55
epoch [14/50] batch [1920/2000] time 2.056 (2.035) data 0.000 (0.001) loss 1.3322 (1.2031) lr 1.8763e-03 eta 1 day, 16:44:18
epoch [14/50] batch [1940/2000] time 2.000 (2.035) data 0.000 (0.001) loss 0.5064 (1.2070) lr 1.8763e-03 eta 1 day, 16:43:36
epoch [14/50] batch [1960/2000] time 2.053 (2.035) data 0.000 (0.001) loss 1.9081 (1.2064) lr 1.8763e-03 eta 1 day, 16:42:56
epoch [14/50] batch [1980/2000] time 2.052 (2.035) data 0.000 (0.001) loss 4.0737 (1.2093) lr 1.8763e-03 eta 1 day, 16:42:14
epoch [14/50] batch [2000/2000] time 1.974 (2.035) data 0.000 (0.001) loss 2.0175 (1.2087) lr 1.8443e-03 eta 1 day, 16:41:32
epoch [15/50] batch [20/2000] time 2.004 (2.067) data 0.000 (0.028) loss 0.6058 (1.3672) lr 1.8443e-03 eta 1 day, 17:19:18
epoch [15/50] batch [40/2000] time 2.001 (2.050) data 0.000 (0.014) loss 0.8932 (1.3960) lr 1.8443e-03 eta 1 day, 16:58:54
epoch [15/50] batch [60/2000] time 2.052 (2.043) data 0.001 (0.009) loss 2.1589 (1.3913) lr 1.8443e-03 eta 1 day, 16:49:25
epoch [15/50] batch [80/2000] time 2.032 (2.037) data 0.000 (0.007) loss 4.2599 (1.3518) lr 1.8443e-03 eta 1 day, 16:42:15
epoch [15/50] batch [100/2000] time 2.057 (2.038) data 0.000 (0.006) loss 1.9309 (1.3819) lr 1.8443e-03 eta 1 day, 16:42:18
epoch [15/50] batch [120/2000] time 1.974 (2.038) data 0.000 (0.005) loss 0.9709 (1.3492) lr 1.8443e-03 eta 1 day, 16:41:43
epoch [15/50] batch [140/2000] time 2.002 (2.037) data 0.000 (0.004) loss 2.6338 (1.3692) lr 1.8443e-03 eta 1 day, 16:39:51
epoch [15/50] batch [160/2000] time 2.055 (2.037) data 0.000 (0.004) loss 1.7209 (1.3404) lr 1.8443e-03 eta 1 day, 16:38:28
epoch [15/50] batch [180/2000] time 2.032 (2.036) data 0.000 (0.003) loss 1.0571 (1.3229) lr 1.8443e-03 eta 1 day, 16:37:03
epoch [15/50] batch [200/2000] time 2.060 (2.036) data 0.000 (0.003) loss 0.9610 (1.2982) lr 1.8443e-03 eta 1 day, 16:36:42
epoch [15/50] batch [220/2000] time 2.005 (2.036) data 0.000 (0.003) loss 1.2342 (1.2732) lr 1.8443e-03 eta 1 day, 16:35:47
epoch [15/50] batch [240/2000] time 2.005 (2.036) data 0.000 (0.003) loss 1.2227 (1.2905) lr 1.8443e-03 eta 1 day, 16:35:31
epoch [15/50] batch [260/2000] time 2.002 (2.036) data 0.000 (0.002) loss 0.0684 (1.2727) lr 1.8443e-03 eta 1 day, 16:34:38
epoch [15/50] batch [280/2000] time 2.057 (2.036) data 0.000 (0.002) loss 0.6814 (1.2754) lr 1.8443e-03 eta 1 day, 16:33:59
epoch [15/50] batch [300/2000] time 2.036 (2.038) data 0.000 (0.002) loss 2.4913 (1.2539) lr 1.8443e-03 eta 1 day, 16:35:17
epoch [15/50] batch [320/2000] time 2.234 (2.038) data 0.000 (0.002) loss 0.3476 (1.2387) lr 1.8443e-03 eta 1 day, 16:35:01
epoch [15/50] batch [340/2000] time 2.057 (2.039) data 0.000 (0.002) loss 1.6531 (1.2433) lr 1.8443e-03 eta 1 day, 16:34:53
epoch [15/50] batch [360/2000] time 2.034 (2.038) data 0.000 (0.002) loss 0.2660 (1.2493) lr 1.8443e-03 eta 1 day, 16:33:34
epoch [15/50] batch [380/2000] time 2.063 (2.038) data 0.000 (0.002) loss 0.6809 (1.2606) lr 1.8443e-03 eta 1 day, 16:32:24
epoch [15/50] batch [400/2000] time 2.063 (2.038) data 0.000 (0.002) loss 0.5310 (1.2496) lr 1.8443e-03 eta 1 day, 16:32:02
epoch [15/50] batch [420/2000] time 2.056 (2.038) data 0.000 (0.002) loss 1.0663 (1.2453) lr 1.8443e-03 eta 1 day, 16:31:33
epoch [15/50] batch [440/2000] time 1.999 (2.038) data 0.000 (0.002) loss 0.6792 (1.2395) lr 1.8443e-03 eta 1 day, 16:30:36
epoch [15/50] batch [460/2000] time 2.037 (2.039) data 0.003 (0.001) loss 0.7405 (1.2349) lr 1.8443e-03 eta 1 day, 16:30:55
epoch [15/50] batch [480/2000] time 2.056 (2.039) data 0.000 (0.001) loss 1.4030 (1.2325) lr 1.8443e-03 eta 1 day, 16:30:07
epoch [15/50] batch [500/2000] time 2.058 (2.039) data 0.000 (0.001) loss 1.3420 (1.2245) lr 1.8443e-03 eta 1 day, 16:29:26
epoch [15/50] batch [520/2000] time 2.061 (2.038) data 0.005 (0.001) loss 0.3486 (1.2157) lr 1.8443e-03 eta 1 day, 16:28:12
epoch [15/50] batch [540/2000] time 2.054 (2.038) data 0.000 (0.001) loss 2.0988 (1.2147) lr 1.8443e-03 eta 1 day, 16:27:15
epoch [15/50] batch [560/2000] time 1.998 (2.038) data 0.000 (0.001) loss 1.6855 (1.2215) lr 1.8443e-03 eta 1 day, 16:26:35
epoch [15/50] batch [580/2000] time 2.000 (2.038) data 0.000 (0.001) loss 0.1017 (1.2183) lr 1.8443e-03 eta 1 day, 16:25:38
epoch [15/50] batch [600/2000] time 2.030 (2.038) data 0.001 (0.001) loss 0.7731 (1.2247) lr 1.8443e-03 eta 1 day, 16:24:54
epoch [15/50] batch [620/2000] time 2.054 (2.038) data 0.000 (0.001) loss 0.9474 (1.2237) lr 1.8443e-03 eta 1 day, 16:24:10
epoch [15/50] batch [640/2000] time 2.032 (2.037) data 0.000 (0.001) loss 1.2558 (1.2238) lr 1.8443e-03 eta 1 day, 16:23:02
epoch [15/50] batch [660/2000] time 2.058 (2.037) data 0.000 (0.001) loss 1.7022 (1.2177) lr 1.8443e-03 eta 1 day, 16:22:16
epoch [15/50] batch [680/2000] time 2.059 (2.037) data 0.000 (0.001) loss 1.3131 (1.2145) lr 1.8443e-03 eta 1 day, 16:21:34
epoch [15/50] batch [700/2000] time 2.059 (2.037) data 0.000 (0.001) loss 1.1510 (1.2248) lr 1.8443e-03 eta 1 day, 16:20:47
epoch [15/50] batch [720/2000] time 2.058 (2.037) data 0.000 (0.001) loss 1.6196 (1.2221) lr 1.8443e-03 eta 1 day, 16:19:59
epoch [15/50] batch [740/2000] time 2.058 (2.037) data 0.000 (0.001) loss 1.6862 (1.2176) lr 1.8443e-03 eta 1 day, 16:19:20
epoch [15/50] batch [760/2000] time 2.000 (2.037) data 0.000 (0.001) loss 1.0246 (1.2131) lr 1.8443e-03 eta 1 day, 16:18:34
epoch [15/50] batch [780/2000] time 2.033 (2.037) data 0.000 (0.001) loss 0.4037 (1.2035) lr 1.8443e-03 eta 1 day, 16:18:02
epoch [15/50] batch [800/2000] time 2.060 (2.037) data 0.000 (0.001) loss 0.8096 (1.2079) lr 1.8443e-03 eta 1 day, 16:17:10
epoch [15/50] batch [820/2000] time 2.057 (2.037) data 0.000 (0.001) loss 0.1992 (1.2025) lr 1.8443e-03 eta 1 day, 16:16:25
epoch [15/50] batch [840/2000] time 2.060 (2.037) data 0.000 (0.001) loss 1.2741 (1.1984) lr 1.8443e-03 eta 1 day, 16:15:49
epoch [15/50] batch [860/2000] time 2.035 (2.037) data 0.000 (0.001) loss 2.2354 (1.1903) lr 1.8443e-03 eta 1 day, 16:15:07
epoch [15/50] batch [880/2000] time 2.064 (2.037) data 0.000 (0.001) loss 3.4695 (1.1935) lr 1.8443e-03 eta 1 day, 16:14:39
epoch [15/50] batch [900/2000] time 2.068 (2.037) data 0.005 (0.001) loss 2.7718 (1.2045) lr 1.8443e-03 eta 1 day, 16:14:12
epoch [15/50] batch [920/2000] time 2.036 (2.037) data 0.000 (0.001) loss 0.4221 (1.2062) lr 1.8443e-03 eta 1 day, 16:13:29
epoch [15/50] batch [940/2000] time 2.061 (2.037) data 0.000 (0.001) loss 1.3736 (1.2077) lr 1.8443e-03 eta 1 day, 16:12:57
epoch [15/50] batch [960/2000] time 2.001 (2.037) data 0.000 (0.001) loss 0.5340 (1.2051) lr 1.8443e-03 eta 1 day, 16:12:03
epoch [15/50] batch [980/2000] time 2.061 (2.037) data 0.000 (0.001) loss 1.8409 (1.2057) lr 1.8443e-03 eta 1 day, 16:11:26
epoch [15/50] batch [1000/2000] time 2.061 (2.037) data 0.000 (0.001) loss 0.2787 (1.2078) lr 1.8443e-03 eta 1 day, 16:10:46
epoch [15/50] batch [1020/2000] time 2.059 (2.037) data 0.000 (0.001) loss 0.9677 (1.2156) lr 1.8443e-03 eta 1 day, 16:09:57
epoch [15/50] batch [1040/2000] time 2.064 (2.037) data 0.000 (0.001) loss 1.6668 (1.2139) lr 1.8443e-03 eta 1 day, 16:09:15
epoch [15/50] batch [1060/2000] time 2.041 (2.037) data 0.003 (0.001) loss 1.5543 (1.2181) lr 1.8443e-03 eta 1 day, 16:08:43
epoch [15/50] batch [1080/2000] time 2.057 (2.037) data 0.000 (0.001) loss 0.1274 (1.2158) lr 1.8443e-03 eta 1 day, 16:07:57
epoch [15/50] batch [1100/2000] time 2.062 (2.037) data 0.000 (0.001) loss 2.6726 (1.2172) lr 1.8443e-03 eta 1 day, 16:07:14
epoch [15/50] batch [1120/2000] time 2.000 (2.037) data 0.000 (0.001) loss 1.4760 (1.2151) lr 1.8443e-03 eta 1 day, 16:06:31
epoch [15/50] batch [1140/2000] time 2.058 (2.037) data 0.001 (0.001) loss 3.2172 (1.2231) lr 1.8443e-03 eta 1 day, 16:05:56
epoch [15/50] batch [1160/2000] time 2.057 (2.037) data 0.000 (0.001) loss 1.4194 (1.2293) lr 1.8443e-03 eta 1 day, 16:05:21
epoch [15/50] batch [1180/2000] time 2.058 (2.037) data 0.000 (0.001) loss 1.2389 (1.2263) lr 1.8443e-03 eta 1 day, 16:04:40
epoch [15/50] batch [1200/2000] time 2.182 (2.038) data 0.000 (0.001) loss 2.7407 (1.2290) lr 1.8443e-03 eta 1 day, 16:05:06
epoch [15/50] batch [1220/2000] time 2.269 (2.044) data 0.000 (0.001) loss 0.2854 (1.2289) lr 1.8443e-03 eta 1 day, 16:11:29
epoch [15/50] batch [1240/2000] time 2.139 (2.051) data 0.000 (0.001) loss 1.6038 (1.2282) lr 1.8443e-03 eta 1 day, 16:18:26
epoch [15/50] batch [1260/2000] time 1.996 (2.050) data 0.000 (0.001) loss 0.1595 (1.2285) lr 1.8443e-03 eta 1 day, 16:17:31
epoch [15/50] batch [1280/2000] time 2.057 (2.050) data 0.000 (0.001) loss 0.9325 (1.2300) lr 1.8443e-03 eta 1 day, 16:16:27
epoch [15/50] batch [1300/2000] time 1.999 (2.050) data 0.000 (0.001) loss 0.5217 (1.2261) lr 1.8443e-03 eta 1 day, 16:15:34
epoch [15/50] batch [1320/2000] time 2.057 (2.050) data 0.000 (0.001) loss 3.5903 (1.2254) lr 1.8443e-03 eta 1 day, 16:14:39
epoch [15/50] batch [1340/2000] time 2.057 (2.050) data 0.000 (0.001) loss 1.1711 (1.2310) lr 1.8443e-03 eta 1 day, 16:13:42
epoch [15/50] batch [1360/2000] time 2.034 (2.049) data 0.000 (0.001) loss 0.0736 (1.2315) lr 1.8443e-03 eta 1 day, 16:12:37
epoch [15/50] batch [1380/2000] time 2.030 (2.049) data 0.000 (0.001) loss 0.2549 (1.2305) lr 1.8443e-03 eta 1 day, 16:11:41
epoch [15/50] batch [1400/2000] time 1.998 (2.049) data 0.000 (0.001) loss 1.7511 (1.2283) lr 1.8443e-03 eta 1 day, 16:10:40
epoch [15/50] batch [1420/2000] time 2.055 (2.049) data 0.000 (0.001) loss 2.0693 (1.2279) lr 1.8443e-03 eta 1 day, 16:09:48
epoch [15/50] batch [1440/2000] time 2.053 (2.048) data 0.000 (0.001) loss 3.2017 (1.2358) lr 1.8443e-03 eta 1 day, 16:08:46
epoch [15/50] batch [1460/2000] time 2.054 (2.048) data 0.000 (0.001) loss 2.2530 (1.2377) lr 1.8443e-03 eta 1 day, 16:07:48
epoch [15/50] batch [1480/2000] time 2.031 (2.048) data 0.000 (0.001) loss 0.5095 (1.2330) lr 1.8443e-03 eta 1 day, 16:06:42
epoch [15/50] batch [1500/2000] time 2.054 (2.048) data 0.000 (0.001) loss 0.1031 (1.2323) lr 1.8443e-03 eta 1 day, 16:05:50
epoch [15/50] batch [1520/2000] time 2.054 (2.047) data 0.000 (0.001) loss 0.5510 (1.2315) lr 1.8443e-03 eta 1 day, 16:04:56
epoch [15/50] batch [1540/2000] time 2.056 (2.047) data 0.000 (0.001) loss 0.9128 (1.2326) lr 1.8443e-03 eta 1 day, 16:04:04
epoch [15/50] batch [1560/2000] time 2.057 (2.047) data 0.000 (0.001) loss 0.4651 (1.2357) lr 1.8443e-03 eta 1 day, 16:03:14
epoch [15/50] batch [1580/2000] time 1.999 (2.047) data 0.000 (0.001) loss 0.5489 (1.2415) lr 1.8443e-03 eta 1 day, 16:02:16
epoch [15/50] batch [1600/2000] time 2.057 (2.047) data 0.000 (0.001) loss 0.8785 (1.2389) lr 1.8443e-03 eta 1 day, 16:01:26
epoch [15/50] batch [1620/2000] time 1.976 (2.046) data 0.000 (0.001) loss 0.1527 (1.2379) lr 1.8443e-03 eta 1 day, 16:00:28
epoch [15/50] batch [1640/2000] time 2.057 (2.046) data 0.000 (0.001) loss 0.3307 (1.2399) lr 1.8443e-03 eta 1 day, 15:59:43
epoch [15/50] batch [1660/2000] time 2.056 (2.046) data 0.000 (0.001) loss 3.4755 (1.2422) lr 1.8443e-03 eta 1 day, 15:58:56
epoch [15/50] batch [1680/2000] time 2.002 (2.046) data 0.001 (0.001) loss 0.2046 (1.2416) lr 1.8443e-03 eta 1 day, 15:58:04
epoch [15/50] batch [1700/2000] time 2.060 (2.046) data 0.000 (0.001) loss 0.4480 (1.2417) lr 1.8443e-03 eta 1 day, 15:57:18
epoch [15/50] batch [1720/2000] time 2.057 (2.046) data 0.000 (0.001) loss 1.3314 (1.2396) lr 1.8443e-03 eta 1 day, 15:56:35
epoch [15/50] batch [1740/2000] time 2.057 (2.046) data 0.001 (0.001) loss 0.4743 (1.2365) lr 1.8443e-03 eta 1 day, 15:55:47
epoch [15/50] batch [1760/2000] time 2.054 (2.046) data 0.000 (0.001) loss 0.5353 (1.2324) lr 1.8443e-03 eta 1 day, 15:54:57
epoch [15/50] batch [1780/2000] time 1.998 (2.046) data 0.000 (0.001) loss 0.5352 (1.2323) lr 1.8443e-03 eta 1 day, 15:54:04
epoch [15/50] batch [1800/2000] time 2.033 (2.046) data 0.000 (0.001) loss 2.5722 (1.2325) lr 1.8443e-03 eta 1 day, 15:53:17
epoch [15/50] batch [1820/2000] time 1.972 (2.045) data 0.000 (0.001) loss 3.1098 (1.2377) lr 1.8443e-03 eta 1 day, 15:52:25
epoch [15/50] batch [1840/2000] time 2.056 (2.045) data 0.000 (0.001) loss 0.2184 (1.2367) lr 1.8443e-03 eta 1 day, 15:51:34
epoch [15/50] batch [1860/2000] time 2.061 (2.045) data 0.000 (0.001) loss 0.7662 (1.2352) lr 1.8443e-03 eta 1 day, 15:50:44
epoch [15/50] batch [1880/2000] time 2.061 (2.045) data 0.000 (0.001) loss 3.2448 (1.2355) lr 1.8443e-03 eta 1 day, 15:49:55
epoch [15/50] batch [1900/2000] time 2.032 (2.045) data 0.000 (0.001) loss 1.6725 (1.2323) lr 1.8443e-03 eta 1 day, 15:49:07
epoch [15/50] batch [1920/2000] time 1.996 (2.045) data 0.000 (0.001) loss 2.0840 (1.2312) lr 1.8443e-03 eta 1 day, 15:48:20
epoch [15/50] batch [1940/2000] time 2.055 (2.045) data 0.000 (0.001) loss 0.1557 (1.2302) lr 1.8443e-03 eta 1 day, 15:47:29
epoch [15/50] batch [1960/2000] time 2.056 (2.045) data 0.000 (0.001) loss 1.9081 (1.2299) lr 1.8443e-03 eta 1 day, 15:46:39
epoch [15/50] batch [1980/2000] time 1.976 (2.044) data 0.000 (0.001) loss 0.9648 (1.2294) lr 1.8443e-03 eta 1 day, 15:45:50
epoch [15/50] batch [2000/2000] time 1.995 (2.044) data 0.000 (0.001) loss 1.1709 (1.2288) lr 1.8090e-03 eta 1 day, 15:45:05
epoch [16/50] batch [20/2000] time 2.056 (2.073) data 0.000 (0.034) loss 0.9871 (1.2574) lr 1.8090e-03 eta 1 day, 16:17:41
epoch [16/50] batch [40/2000] time 2.002 (2.052) data 0.000 (0.017) loss 0.7923 (1.2895) lr 1.8090e-03 eta 1 day, 15:52:22
epoch [16/50] batch [60/2000] time 2.001 (2.046) data 0.001 (0.012) loss 0.0980 (1.3581) lr 1.8090e-03 eta 1 day, 15:44:50
epoch [16/50] batch [80/2000] time 2.055 (2.045) data 0.000 (0.009) loss 0.6100 (1.3272) lr 1.8090e-03 eta 1 day, 15:43:07
epoch [16/50] batch [100/2000] time 1.977 (2.041) data 0.000 (0.007) loss 1.2893 (1.3645) lr 1.8090e-03 eta 1 day, 15:37:39
epoch [16/50] batch [120/2000] time 2.057 (2.040) data 0.000 (0.006) loss 2.4581 (1.3539) lr 1.8090e-03 eta 1 day, 15:36:16
epoch [16/50] batch [140/2000] time 2.061 (2.040) data 0.000 (0.005) loss 0.3199 (1.3229) lr 1.8090e-03 eta 1 day, 15:34:49
epoch [16/50] batch [160/2000] time 2.033 (2.038) data 0.000 (0.005) loss 0.1005 (1.2578) lr 1.8090e-03 eta 1 day, 15:32:32
epoch [16/50] batch [180/2000] time 2.057 (2.037) data 0.000 (0.004) loss 0.7458 (1.2344) lr 1.8090e-03 eta 1 day, 15:30:57
epoch [16/50] batch [200/2000] time 2.040 (2.037) data 0.000 (0.004) loss 0.9794 (1.2352) lr 1.8090e-03 eta 1 day, 15:30:16
epoch [16/50] batch [220/2000] time 2.060 (2.037) data 0.000 (0.003) loss 0.2156 (1.1975) lr 1.8090e-03 eta 1 day, 15:29:35
epoch [16/50] batch [240/2000] time 2.059 (2.037) data 0.000 (0.003) loss 2.0363 (1.1828) lr 1.8090e-03 eta 1 day, 15:28:32
epoch [16/50] batch [260/2000] time 2.002 (2.037) data 0.001 (0.003) loss 2.4308 (1.1972) lr 1.8090e-03 eta 1 day, 15:27:36
epoch [16/50] batch [280/2000] time 2.055 (2.037) data 0.000 (0.003) loss 0.8528 (1.1785) lr 1.8090e-03 eta 1 day, 15:26:59
epoch [16/50] batch [300/2000] time 2.057 (2.037) data 0.000 (0.003) loss 0.9858 (1.1703) lr 1.8090e-03 eta 1 day, 15:26:24
epoch [16/50] batch [320/2000] time 2.057 (2.037) data 0.000 (0.002) loss 0.9182 (1.1693) lr 1.8090e-03 eta 1 day, 15:25:32
epoch [16/50] batch [340/2000] time 2.001 (2.037) data 0.000 (0.002) loss 1.0339 (1.1672) lr 1.8090e-03 eta 1 day, 15:24:41
epoch [16/50] batch [360/2000] time 2.040 (2.037) data 0.000 (0.002) loss 1.7597 (1.1591) lr 1.8090e-03 eta 1 day, 15:23:51
epoch [16/50] batch [380/2000] time 2.060 (2.037) data 0.000 (0.002) loss 0.4012 (1.1676) lr 1.8090e-03 eta 1 day, 15:23:02
epoch [16/50] batch [400/2000] time 2.063 (2.037) data 0.000 (0.002) loss 0.4587 (1.1556) lr 1.8090e-03 eta 1 day, 15:22:28
epoch [16/50] batch [420/2000] time 2.034 (2.037) data 0.000 (0.002) loss 0.1995 (1.1671) lr 1.8090e-03 eta 1 day, 15:21:51
epoch [16/50] batch [440/2000] time 2.000 (2.037) data 0.000 (0.002) loss 1.5083 (1.1645) lr 1.8090e-03 eta 1 day, 15:21:03
epoch [16/50] batch [460/2000] time 2.060 (2.036) data 0.000 (0.002) loss 0.4018 (1.1731) lr 1.8090e-03 eta 1 day, 15:20:17
epoch [16/50] batch [480/2000] time 2.003 (2.036) data 0.000 (0.002) loss 1.4667 (1.1710) lr 1.8090e-03 eta 1 day, 15:19:30
epoch [16/50] batch [500/2000] time 2.060 (2.036) data 0.000 (0.002) loss 1.9987 (1.1871) lr 1.8090e-03 eta 1 day, 15:18:43
epoch [16/50] batch [520/2000] time 2.059 (2.036) data 0.000 (0.002) loss 0.4382 (1.1956) lr 1.8090e-03 eta 1 day, 15:17:47
epoch [16/50] batch [540/2000] time 2.008 (2.036) data 0.000 (0.002) loss 1.3944 (1.1913) lr 1.8090e-03 eta 1 day, 15:17:03
epoch [16/50] batch [560/2000] time 2.042 (2.036) data 0.000 (0.001) loss 1.1811 (1.1914) lr 1.8090e-03 eta 1 day, 15:16:16
epoch [16/50] batch [580/2000] time 2.066 (2.036) data 0.003 (0.001) loss 0.9780 (1.1863) lr 1.8090e-03 eta 1 day, 15:15:39
epoch [16/50] batch [600/2000] time 1.982 (2.036) data 0.001 (0.001) loss 3.7664 (1.1879) lr 1.8090e-03 eta 1 day, 15:14:50
epoch [16/50] batch [620/2000] time 2.035 (2.036) data 0.000 (0.001) loss 0.5580 (1.1838) lr 1.8090e-03 eta 1 day, 15:14:21
epoch [16/50] batch [640/2000] time 2.057 (2.036) data 0.000 (0.001) loss 3.0106 (1.1949) lr 1.8090e-03 eta 1 day, 15:13:48
epoch [16/50] batch [660/2000] time 2.005 (2.036) data 0.000 (0.001) loss 2.6242 (1.1977) lr 1.8090e-03 eta 1 day, 15:13:14
epoch [16/50] batch [680/2000] time 2.004 (2.036) data 0.000 (0.001) loss 3.7237 (1.2017) lr 1.8090e-03 eta 1 day, 15:12:28
epoch [16/50] batch [700/2000] time 2.058 (2.036) data 0.000 (0.001) loss 1.4091 (1.2044) lr 1.8090e-03 eta 1 day, 15:11:50
epoch [16/50] batch [720/2000] time 1.979 (2.036) data 0.000 (0.001) loss 0.9784 (1.2032) lr 1.8090e-03 eta 1 day, 15:11:05
epoch [16/50] batch [740/2000] time 2.002 (2.036) data 0.000 (0.001) loss 1.7509 (1.2008) lr 1.8090e-03 eta 1 day, 15:10:33
epoch [16/50] batch [760/2000] time 2.059 (2.036) data 0.000 (0.001) loss 0.3962 (1.1945) lr 1.8090e-03 eta 1 day, 15:09:51
epoch [16/50] batch [780/2000] time 2.002 (2.036) data 0.000 (0.001) loss 0.4501 (1.1856) lr 1.8090e-03 eta 1 day, 15:08:58
epoch [16/50] batch [800/2000] time 2.002 (2.036) data 0.000 (0.001) loss 1.5247 (1.1899) lr 1.8090e-03 eta 1 day, 15:08:13
epoch [16/50] batch [820/2000] time 2.053 (2.036) data 0.000 (0.001) loss 2.5057 (1.2019) lr 1.8090e-03 eta 1 day, 15:07:20
epoch [16/50] batch [840/2000] time 2.050 (2.036) data 0.000 (0.001) loss 0.7262 (1.1906) lr 1.8090e-03 eta 1 day, 15:06:37
epoch [16/50] batch [860/2000] time 2.027 (2.036) data 0.000 (0.001) loss 0.2143 (1.1850) lr 1.8090e-03 eta 1 day, 15:05:56
epoch [16/50] batch [880/2000] time 2.030 (2.036) data 0.000 (0.001) loss 0.7267 (1.1878) lr 1.8090e-03 eta 1 day, 15:05:02
epoch [16/50] batch [900/2000] time 2.052 (2.035) data 0.000 (0.001) loss 2.3235 (1.1828) lr 1.8090e-03 eta 1 day, 15:04:12
epoch [16/50] batch [920/2000] time 2.050 (2.035) data 0.000 (0.001) loss 3.3621 (1.1846) lr 1.8090e-03 eta 1 day, 15:03:25
epoch [16/50] batch [940/2000] time 1.998 (2.035) data 0.000 (0.001) loss 3.8050 (1.1822) lr 1.8090e-03 eta 1 day, 15:02:33
epoch [16/50] batch [960/2000] time 1.973 (2.035) data 0.000 (0.001) loss 0.8326 (1.1886) lr 1.8090e-03 eta 1 day, 15:01:33
epoch [16/50] batch [980/2000] time 2.028 (2.035) data 0.000 (0.001) loss 1.1950 (1.1891) lr 1.8090e-03 eta 1 day, 15:00:36
epoch [16/50] batch [1000/2000] time 1.999 (2.034) data 0.000 (0.001) loss 0.3228 (1.1871) lr 1.8090e-03 eta 1 day, 14:59:36
epoch [16/50] batch [1020/2000] time 1.996 (2.034) data 0.000 (0.001) loss 0.2425 (1.1859) lr 1.8090e-03 eta 1 day, 14:58:46
epoch [16/50] batch [1040/2000] time 2.049 (2.034) data 0.000 (0.001) loss 1.8150 (1.1865) lr 1.8090e-03 eta 1 day, 14:57:58
epoch [16/50] batch [1060/2000] time 1.998 (2.034) data 0.000 (0.001) loss 1.6052 (1.1905) lr 1.8090e-03 eta 1 day, 14:57:06
epoch [16/50] batch [1080/2000] time 2.024 (2.034) data 0.000 (0.001) loss 1.4514 (1.1911) lr 1.8090e-03 eta 1 day, 14:56:22
epoch [16/50] batch [1100/2000] time 2.029 (2.034) data 0.000 (0.001) loss 1.3354 (1.1938) lr 1.8090e-03 eta 1 day, 14:55:33
epoch [16/50] batch [1120/2000] time 1.995 (2.034) data 0.000 (0.001) loss 2.4264 (1.1941) lr 1.8090e-03 eta 1 day, 14:54:53
epoch [16/50] batch [1140/2000] time 1.994 (2.034) data 0.001 (0.001) loss 0.5244 (1.1995) lr 1.8090e-03 eta 1 day, 14:53:53
epoch [16/50] batch [1160/2000] time 2.048 (2.034) data 0.000 (0.001) loss 1.3303 (1.2045) lr 1.8090e-03 eta 1 day, 14:53:11
epoch [16/50] batch [1180/2000] time 2.029 (2.033) data 0.000 (0.001) loss 0.1245 (1.1988) lr 1.8090e-03 eta 1 day, 14:52:23
epoch [16/50] batch [1200/2000] time 2.055 (2.034) data 0.000 (0.001) loss 0.9659 (1.1980) lr 1.8090e-03 eta 1 day, 14:51:45
epoch [16/50] batch [1220/2000] time 2.054 (2.033) data 0.000 (0.001) loss 1.0318 (1.2003) lr 1.8090e-03 eta 1 day, 14:51:02
epoch [16/50] batch [1240/2000] time 2.052 (2.034) data 0.000 (0.001) loss 2.5027 (1.2093) lr 1.8090e-03 eta 1 day, 14:50:27
epoch [16/50] batch [1260/2000] time 1.999 (2.033) data 0.000 (0.001) loss 1.3363 (1.2093) lr 1.8090e-03 eta 1 day, 14:49:34
epoch [16/50] batch [1280/2000] time 2.059 (2.033) data 0.000 (0.001) loss 1.6760 (1.2124) lr 1.8090e-03 eta 1 day, 14:48:35
epoch [16/50] batch [1300/2000] time 2.029 (2.033) data 0.000 (0.001) loss 2.1470 (1.2131) lr 1.8090e-03 eta 1 day, 14:47:51
epoch [16/50] batch [1320/2000] time 2.047 (2.033) data 0.000 (0.001) loss 0.4245 (1.2165) lr 1.8090e-03 eta 1 day, 14:47:03
epoch [16/50] batch [1340/2000] time 2.048 (2.033) data 0.000 (0.001) loss 0.3274 (1.2176) lr 1.8090e-03 eta 1 day, 14:46:21
epoch [16/50] batch [1360/2000] time 2.051 (2.033) data 0.000 (0.001) loss 2.6528 (1.2257) lr 1.8090e-03 eta 1 day, 14:45:35
epoch [16/50] batch [1380/2000] time 1.978 (2.033) data 0.000 (0.001) loss 0.7140 (1.2262) lr 1.8090e-03 eta 1 day, 14:44:54
epoch [16/50] batch [1400/2000] time 2.054 (2.033) data 0.000 (0.001) loss 0.7844 (1.2240) lr 1.8090e-03 eta 1 day, 14:44:15
epoch [16/50] batch [1420/2000] time 2.053 (2.033) data 0.000 (0.001) loss 0.4886 (1.2253) lr 1.8090e-03 eta 1 day, 14:43:32
epoch [16/50] batch [1440/2000] time 2.026 (2.033) data 0.000 (0.001) loss 0.1829 (1.2287) lr 1.8090e-03 eta 1 day, 14:42:42
epoch [16/50] batch [1460/2000] time 2.052 (2.033) data 0.000 (0.001) loss 0.5233 (1.2315) lr 1.8090e-03 eta 1 day, 14:41:56
epoch [16/50] batch [1480/2000] time 2.001 (2.033) data 0.000 (0.001) loss 1.6420 (1.2303) lr 1.8090e-03 eta 1 day, 14:41:13
epoch [16/50] batch [1500/2000] time 2.054 (2.033) data 0.000 (0.001) loss 0.1165 (1.2298) lr 1.8090e-03 eta 1 day, 14:40:33
epoch [16/50] batch [1520/2000] time 2.050 (2.033) data 0.000 (0.001) loss 0.5418 (1.2295) lr 1.8090e-03 eta 1 day, 14:39:57
epoch [16/50] batch [1540/2000] time 2.056 (2.033) data 0.000 (0.001) loss 1.1209 (1.2271) lr 1.8090e-03 eta 1 day, 14:39:17
epoch [16/50] batch [1560/2000] time 2.050 (2.033) data 0.000 (0.001) loss 2.2647 (1.2260) lr 1.8090e-03 eta 1 day, 14:38:39
epoch [16/50] batch [1580/2000] time 2.050 (2.033) data 0.000 (0.001) loss 0.9468 (1.2247) lr 1.8090e-03 eta 1 day, 14:37:56
epoch [16/50] batch [1600/2000] time 2.055 (2.033) data 0.000 (0.001) loss 0.6326 (1.2246) lr 1.8090e-03 eta 1 day, 14:37:19
epoch [16/50] batch [1620/2000] time 2.032 (2.033) data 0.000 (0.001) loss 1.7309 (1.2233) lr 1.8090e-03 eta 1 day, 14:36:46
epoch [16/50] batch [1640/2000] time 2.057 (2.033) data 0.000 (0.001) loss 1.1206 (1.2222) lr 1.8090e-03 eta 1 day, 14:36:08
epoch [16/50] batch [1660/2000] time 2.055 (2.033) data 0.000 (0.001) loss 0.6043 (1.2191) lr 1.8090e-03 eta 1 day, 14:35:30
epoch [16/50] batch [1680/2000] time 2.057 (2.033) data 0.001 (0.001) loss 0.6953 (1.2214) lr 1.8090e-03 eta 1 day, 14:34:54
epoch [16/50] batch [1700/2000] time 2.048 (2.033) data 0.000 (0.001) loss 1.8334 (1.2189) lr 1.8090e-03 eta 1 day, 14:34:21
epoch [16/50] batch [1720/2000] time 2.049 (2.033) data 0.000 (0.001) loss 2.1321 (1.2178) lr 1.8090e-03 eta 1 day, 14:33:37
epoch [16/50] batch [1740/2000] time 1.996 (2.033) data 0.000 (0.001) loss 1.7676 (1.2238) lr 1.8090e-03 eta 1 day, 14:32:55
epoch [16/50] batch [1760/2000] time 2.028 (2.033) data 0.000 (0.001) loss 1.1555 (1.2246) lr 1.8090e-03 eta 1 day, 14:32:12
epoch [16/50] batch [1780/2000] time 2.050 (2.033) data 0.000 (0.001) loss 0.6668 (1.2218) lr 1.8090e-03 eta 1 day, 14:31:33
epoch [16/50] batch [1800/2000] time 2.051 (2.033) data 0.000 (0.001) loss 0.6017 (1.2174) lr 1.8090e-03 eta 1 day, 14:30:47
epoch [16/50] batch [1820/2000] time 2.051 (2.033) data 0.000 (0.001) loss 1.4168 (1.2173) lr 1.8090e-03 eta 1 day, 14:30:04
epoch [16/50] batch [1840/2000] time 2.034 (2.033) data 0.000 (0.001) loss 0.1093 (1.2163) lr 1.8090e-03 eta 1 day, 14:29:19
epoch [16/50] batch [1860/2000] time 1.995 (2.033) data 0.000 (0.001) loss 0.1097 (1.2170) lr 1.8090e-03 eta 1 day, 14:28:35
epoch [16/50] batch [1880/2000] time 2.050 (2.033) data 0.000 (0.001) loss 2.5776 (1.2210) lr 1.8090e-03 eta 1 day, 14:27:51
epoch [16/50] batch [1900/2000] time 2.049 (2.033) data 0.000 (0.001) loss 0.2319 (1.2225) lr 1.8090e-03 eta 1 day, 14:27:10
epoch [16/50] batch [1920/2000] time 2.052 (2.033) data 0.000 (0.001) loss 3.1728 (1.2223) lr 1.8090e-03 eta 1 day, 14:26:26
epoch [16/50] batch [1940/2000] time 1.998 (2.033) data 0.000 (0.001) loss 2.2728 (1.2243) lr 1.8090e-03 eta 1 day, 14:25:40
epoch [16/50] batch [1960/2000] time 2.051 (2.033) data 0.000 (0.001) loss 0.5644 (1.2228) lr 1.8090e-03 eta 1 day, 14:24:59
epoch [16/50] batch [1980/2000] time 2.055 (2.033) data 0.000 (0.001) loss 0.5534 (1.2241) lr 1.8090e-03 eta 1 day, 14:24:13
epoch [16/50] batch [2000/2000] time 1.971 (2.032) data 0.000 (0.001) loss 1.2012 (1.2255) lr 1.7705e-03 eta 1 day, 14:23:25
epoch [17/50] batch [20/2000] time 1.975 (2.052) data 0.000 (0.030) loss 1.7129 (1.2376) lr 1.7705e-03 eta 1 day, 14:45:25
epoch [17/50] batch [40/2000] time 2.026 (2.042) data 0.000 (0.015) loss 3.2164 (1.3154) lr 1.7705e-03 eta 1 day, 14:33:06
epoch [17/50] batch [60/2000] time 1.997 (2.038) data 0.001 (0.010) loss 2.9578 (1.2905) lr 1.7705e-03 eta 1 day, 14:27:43
epoch [17/50] batch [80/2000] time 2.051 (2.036) data 0.000 (0.008) loss 0.3718 (1.2468) lr 1.7705e-03 eta 1 day, 14:24:31
epoch [17/50] batch [100/2000] time 2.030 (2.035) data 0.000 (0.006) loss 2.5585 (1.2377) lr 1.7705e-03 eta 1 day, 14:22:47
epoch [17/50] batch [120/2000] time 1.976 (2.034) data 0.000 (0.005) loss 1.7762 (1.1991) lr 1.7705e-03 eta 1 day, 14:21:17
epoch [17/50] batch [140/2000] time 2.056 (2.034) data 0.000 (0.005) loss 1.1660 (1.2059) lr 1.7705e-03 eta 1 day, 14:20:20
epoch [17/50] batch [160/2000] time 2.053 (2.034) data 0.000 (0.004) loss 0.6782 (1.1860) lr 1.7705e-03 eta 1 day, 14:20:09
epoch [17/50] batch [180/2000] time 2.069 (2.036) data 0.000 (0.004) loss 1.5203 (1.1555) lr 1.7705e-03 eta 1 day, 14:20:55
epoch [17/50] batch [200/2000] time 2.034 (2.036) data 0.000 (0.003) loss 1.1095 (1.1677) lr 1.7705e-03 eta 1 day, 14:20:48
epoch [17/50] batch [220/2000] time 2.058 (2.036) data 0.000 (0.003) loss 0.4068 (1.1501) lr 1.7705e-03 eta 1 day, 14:20:08
epoch [17/50] batch [240/2000] time 2.058 (2.036) data 0.000 (0.003) loss 1.6981 (1.1912) lr 1.7705e-03 eta 1 day, 14:19:26
epoch [17/50] batch [260/2000] time 2.003 (2.036) data 0.000 (0.003) loss 0.0340 (1.1784) lr 1.7705e-03 eta 1 day, 14:18:31
epoch [17/50] batch [280/2000] time 2.005 (2.036) data 0.000 (0.002) loss 1.9056 (1.1786) lr 1.7705e-03 eta 1 day, 14:18:12
epoch [17/50] batch [300/2000] time 2.002 (2.036) data 0.000 (0.002) loss 1.8634 (1.1690) lr 1.7705e-03 eta 1 day, 14:17:45
epoch [17/50] batch [320/2000] time 2.006 (2.036) data 0.000 (0.002) loss 1.3194 (1.1751) lr 1.7705e-03 eta 1 day, 14:16:48
epoch [17/50] batch [340/2000] time 2.063 (2.036) data 0.000 (0.002) loss 1.2331 (1.1756) lr 1.7705e-03 eta 1 day, 14:16:10
epoch [17/50] batch [360/2000] time 2.037 (2.036) data 0.000 (0.002) loss 0.2786 (1.1934) lr 1.7705e-03 eta 1 day, 14:15:36
epoch [17/50] batch [380/2000] time 2.061 (2.036) data 0.000 (0.002) loss 0.2535 (1.1806) lr 1.7705e-03 eta 1 day, 14:14:59
epoch [17/50] batch [400/2000] time 2.004 (2.036) data 0.000 (0.002) loss 0.2986 (1.1689) lr 1.7705e-03 eta 1 day, 14:14:09
epoch [17/50] batch [420/2000] time 2.043 (2.036) data 0.000 (0.002) loss 0.6755 (1.1519) lr 1.7705e-03 eta 1 day, 14:13:45
epoch [17/50] batch [440/2000] time 2.062 (2.036) data 0.000 (0.002) loss 2.3665 (1.1577) lr 1.7705e-03 eta 1 day, 14:12:52
epoch [17/50] batch [460/2000] time 2.037 (2.036) data 0.000 (0.002) loss 1.6512 (1.1732) lr 1.7705e-03 eta 1 day, 14:12:02
epoch [17/50] batch [480/2000] time 2.055 (2.036) data 0.000 (0.002) loss 0.7712 (1.1774) lr 1.7705e-03 eta 1 day, 14:11:09
epoch [17/50] batch [500/2000] time 2.052 (2.036) data 0.000 (0.001) loss 0.6601 (1.1736) lr 1.7705e-03 eta 1 day, 14:10:36
epoch [17/50] batch [520/2000] time 2.050 (2.036) data 0.000 (0.001) loss 0.8583 (1.1706) lr 1.7705e-03 eta 1 day, 14:09:35
epoch [17/50] batch [540/2000] time 2.054 (2.036) data 0.000 (0.001) loss 3.7421 (1.1687) lr 1.7705e-03 eta 1 day, 14:09:01
epoch [17/50] batch [560/2000] time 2.058 (2.036) data 0.000 (0.001) loss 1.2431 (1.1748) lr 1.7705e-03 eta 1 day, 14:07:58
epoch [17/50] batch [580/2000] time 2.055 (2.036) data 0.000 (0.001) loss 1.0721 (1.1932) lr 1.7705e-03 eta 1 day, 14:07:20
epoch [17/50] batch [600/2000] time 2.057 (2.035) data 0.001 (0.001) loss 2.2768 (1.1914) lr 1.7705e-03 eta 1 day, 14:06:18
epoch [17/50] batch [620/2000] time 2.034 (2.035) data 0.000 (0.001) loss 1.1598 (1.1890) lr 1.7705e-03 eta 1 day, 14:05:12
epoch [17/50] batch [640/2000] time 2.030 (2.035) data 0.000 (0.001) loss 1.3517 (1.1891) lr 1.7705e-03 eta 1 day, 14:04:21
epoch [17/50] batch [660/2000] time 2.060 (2.035) data 0.000 (0.001) loss 2.5006 (1.1889) lr 1.7705e-03 eta 1 day, 14:03:32
epoch [17/50] batch [680/2000] time 2.029 (2.035) data 0.000 (0.001) loss 0.2564 (1.1914) lr 1.7705e-03 eta 1 day, 14:02:55
epoch [17/50] batch [700/2000] time 2.058 (2.035) data 0.000 (0.001) loss 0.6270 (1.1859) lr 1.7705e-03 eta 1 day, 14:02:08
epoch [17/50] batch [720/2000] time 2.031 (2.034) data 0.000 (0.001) loss 2.5621 (1.1934) lr 1.7705e-03 eta 1 day, 14:01:18
epoch [17/50] batch [740/2000] time 2.056 (2.034) data 0.000 (0.001) loss 1.9154 (1.2002) lr 1.7705e-03 eta 1 day, 14:00:27
epoch [17/50] batch [760/2000] time 2.057 (2.034) data 0.000 (0.001) loss 1.1183 (1.2008) lr 1.7705e-03 eta 1 day, 13:59:47
epoch [17/50] batch [780/2000] time 2.058 (2.034) data 0.000 (0.001) loss 0.6888 (1.2030) lr 1.7705e-03 eta 1 day, 13:59:09
epoch [17/50] batch [800/2000] time 2.036 (2.034) data 0.000 (0.001) loss 0.1742 (1.1986) lr 1.7705e-03 eta 1 day, 13:58:15
epoch [17/50] batch [820/2000] time 2.034 (2.034) data 0.000 (0.001) loss 0.6229 (1.1891) lr 1.7705e-03 eta 1 day, 13:57:31
epoch [17/50] batch [840/2000] time 1.975 (2.034) data 0.000 (0.001) loss 1.7775 (1.1992) lr 1.7705e-03 eta 1 day, 13:56:59
epoch [17/50] batch [860/2000] time 1.999 (2.034) data 0.000 (0.001) loss 2.4529 (1.1990) lr 1.7705e-03 eta 1 day, 13:56:13
epoch [17/50] batch [880/2000] time 1.999 (2.034) data 0.000 (0.001) loss 0.6281 (1.2027) lr 1.7705e-03 eta 1 day, 13:55:24
epoch [17/50] batch [900/2000] time 2.057 (2.034) data 0.000 (0.001) loss 1.4496 (1.1999) lr 1.7705e-03 eta 1 day, 13:54:57
epoch [17/50] batch [920/2000] time 1.999 (2.034) data 0.000 (0.001) loss 1.0866 (1.1954) lr 1.7705e-03 eta 1 day, 13:54:22
epoch [17/50] batch [940/2000] time 2.058 (2.034) data 0.000 (0.001) loss 2.2281 (1.1939) lr 1.7705e-03 eta 1 day, 13:53:43
epoch [17/50] batch [960/2000] time 1.995 (2.034) data 0.000 (0.001) loss 2.5828 (1.1861) lr 1.7705e-03 eta 1 day, 13:52:56
epoch [17/50] batch [980/2000] time 2.055 (2.034) data 0.000 (0.001) loss 3.6827 (1.1841) lr 1.7705e-03 eta 1 day, 13:52:13
epoch [17/50] batch [1000/2000] time 1.997 (2.034) data 0.000 (0.001) loss 1.2755 (1.1791) lr 1.7705e-03 eta 1 day, 13:51:21
epoch [17/50] batch [1020/2000] time 2.053 (2.034) data 0.000 (0.001) loss 0.3455 (1.1759) lr 1.7705e-03 eta 1 day, 13:50:41
epoch [17/50] batch [1040/2000] time 1.997 (2.034) data 0.000 (0.001) loss 0.3514 (1.1757) lr 1.7705e-03 eta 1 day, 13:49:52
epoch [17/50] batch [1060/2000] time 1.996 (2.034) data 0.000 (0.001) loss 3.5718 (1.1798) lr 1.7705e-03 eta 1 day, 13:49:06
epoch [17/50] batch [1080/2000] time 1.999 (2.034) data 0.000 (0.001) loss 1.2332 (1.1836) lr 1.7705e-03 eta 1 day, 13:48:18
epoch [17/50] batch [1100/2000] time 2.057 (2.034) data 0.000 (0.001) loss 2.0699 (1.1889) lr 1.7705e-03 eta 1 day, 13:47:44
epoch [17/50] batch [1120/2000] time 1.999 (2.034) data 0.000 (0.001) loss 0.6497 (1.1944) lr 1.7705e-03 eta 1 day, 13:47:10
epoch [17/50] batch [1140/2000] time 2.054 (2.034) data 0.001 (0.001) loss 1.7436 (1.1935) lr 1.7705e-03 eta 1 day, 13:46:29
epoch [17/50] batch [1160/2000] time 2.001 (2.034) data 0.000 (0.001) loss 0.7510 (1.1893) lr 1.7705e-03 eta 1 day, 13:45:44
epoch [17/50] batch [1180/2000] time 2.057 (2.034) data 0.000 (0.001) loss 3.5194 (1.1958) lr 1.7705e-03 eta 1 day, 13:45:02
epoch [17/50] batch [1200/2000] time 2.032 (2.034) data 0.000 (0.001) loss 0.4967 (1.1969) lr 1.7705e-03 eta 1 day, 13:44:17
epoch [17/50] batch [1220/2000] time 1.996 (2.034) data 0.000 (0.001) loss 1.6717 (1.1985) lr 1.7705e-03 eta 1 day, 13:43:31
epoch [17/50] batch [1240/2000] time 1.997 (2.034) data 0.000 (0.001) loss 2.9831 (1.1956) lr 1.7705e-03 eta 1 day, 13:42:44
epoch [17/50] batch [1260/2000] time 2.035 (2.033) data 0.000 (0.001) loss 2.3325 (1.1995) lr 1.7705e-03 eta 1 day, 13:41:54
epoch [17/50] batch [1280/2000] time 2.063 (2.034) data 0.000 (0.001) loss 0.4735 (1.1984) lr 1.7705e-03 eta 1 day, 13:41:18
epoch [17/50] batch [1300/2000] time 2.060 (2.034) data 0.000 (0.001) loss 1.0603 (1.1964) lr 1.7705e-03 eta 1 day, 13:40:35
epoch [17/50] batch [1320/2000] time 2.062 (2.034) data 0.000 (0.001) loss 1.8334 (1.1979) lr 1.7705e-03 eta 1 day, 13:39:59
epoch [17/50] batch [1340/2000] time 2.056 (2.034) data 0.000 (0.001) loss 0.7076 (1.1991) lr 1.7705e-03 eta 1 day, 13:39:18
epoch [17/50] batch [1360/2000] time 2.057 (2.033) data 0.000 (0.001) loss 0.6255 (1.1985) lr 1.7705e-03 eta 1 day, 13:38:31
epoch [17/50] batch [1380/2000] time 2.033 (2.034) data 0.000 (0.001) loss 2.5405 (1.2039) lr 1.7705e-03 eta 1 day, 13:37:52
epoch [17/50] batch [1400/2000] time 2.059 (2.033) data 0.000 (0.001) loss 2.2074 (1.2078) lr 1.7705e-03 eta 1 day, 13:37:06
epoch [17/50] batch [1420/2000] time 2.058 (2.033) data 0.000 (0.001) loss 1.0805 (1.2103) lr 1.7705e-03 eta 1 day, 13:36:28
epoch [17/50] batch [1440/2000] time 1.998 (2.033) data 0.000 (0.001) loss 1.9065 (1.2146) lr 1.7705e-03 eta 1 day, 13:35:47
epoch [17/50] batch [1460/2000] time 2.002 (2.033) data 0.000 (0.001) loss 0.9610 (1.2142) lr 1.7705e-03 eta 1 day, 13:35:06
epoch [17/50] batch [1480/2000] time 2.063 (2.034) data 0.000 (0.001) loss 2.0466 (1.2120) lr 1.7705e-03 eta 1 day, 13:34:30
epoch [17/50] batch [1500/2000] time 2.060 (2.034) data 0.000 (0.001) loss 1.6190 (1.2131) lr 1.7705e-03 eta 1 day, 13:33:48
epoch [17/50] batch [1520/2000] time 2.056 (2.034) data 0.000 (0.001) loss 1.9385 (1.2146) lr 1.7705e-03 eta 1 day, 13:33:12
epoch [17/50] batch [1540/2000] time 2.058 (2.034) data 0.000 (0.001) loss 0.0592 (1.2141) lr 1.7705e-03 eta 1 day, 13:32:33
epoch [17/50] batch [1560/2000] time 1.996 (2.034) data 0.001 (0.001) loss 0.2282 (1.2149) lr 1.7705e-03 eta 1 day, 13:31:57
epoch [17/50] batch [1580/2000] time 2.055 (2.034) data 0.000 (0.001) loss 1.5773 (1.2177) lr 1.7705e-03 eta 1 day, 13:31:14
epoch [17/50] batch [1600/2000] time 2.057 (2.034) data 0.000 (0.001) loss 0.5969 (1.2179) lr 1.7705e-03 eta 1 day, 13:30:32
epoch [17/50] batch [1620/2000] time 2.035 (2.034) data 0.000 (0.001) loss 1.5501 (1.2184) lr 1.7705e-03 eta 1 day, 13:29:45
epoch [17/50] batch [1640/2000] time 2.052 (2.034) data 0.000 (0.001) loss 1.5120 (1.2191) lr 1.7705e-03 eta 1 day, 13:29:04
epoch [17/50] batch [1660/2000] time 2.033 (2.033) data 0.000 (0.001) loss 0.2022 (1.2193) lr 1.7705e-03 eta 1 day, 13:28:17
epoch [17/50] batch [1680/2000] time 2.057 (2.033) data 0.001 (0.001) loss 2.4061 (1.2206) lr 1.7705e-03 eta 1 day, 13:27:38
epoch [17/50] batch [1700/2000] time 2.031 (2.033) data 0.000 (0.001) loss 0.6925 (1.2163) lr 1.7705e-03 eta 1 day, 13:26:51
epoch [17/50] batch [1720/2000] time 2.032 (2.033) data 0.000 (0.001) loss 2.3212 (1.2143) lr 1.7705e-03 eta 1 day, 13:26:05
epoch [17/50] batch [1740/2000] time 1.971 (2.033) data 0.000 (0.001) loss 1.4305 (1.2140) lr 1.7705e-03 eta 1 day, 13:25:21
epoch [17/50] batch [1760/2000] time 2.053 (2.033) data 0.000 (0.001) loss 0.1910 (1.2125) lr 1.7705e-03 eta 1 day, 13:24:36
epoch [17/50] batch [1780/2000] time 2.054 (2.033) data 0.000 (0.001) loss 3.0649 (1.2164) lr 1.7705e-03 eta 1 day, 13:23:53
epoch [17/50] batch [1800/2000] time 1.998 (2.033) data 0.000 (0.001) loss 2.6807 (1.2169) lr 1.7705e-03 eta 1 day, 13:23:08
epoch [17/50] batch [1820/2000] time 2.056 (2.033) data 0.000 (0.001) loss 0.7964 (1.2180) lr 1.7705e-03 eta 1 day, 13:22:25
epoch [17/50] batch [1840/2000] time 2.054 (2.033) data 0.000 (0.001) loss 2.2124 (1.2203) lr 1.7705e-03 eta 1 day, 13:21:47
epoch [17/50] batch [1860/2000] time 1.995 (2.033) data 0.000 (0.001) loss 0.3673 (1.2168) lr 1.7705e-03 eta 1 day, 13:21:06
epoch [17/50] batch [1880/2000] time 2.028 (2.033) data 0.000 (0.001) loss 0.6528 (1.2185) lr 1.7705e-03 eta 1 day, 13:20:23
epoch [17/50] batch [1900/2000] time 2.030 (2.033) data 0.000 (0.001) loss 1.2877 (1.2204) lr 1.7705e-03 eta 1 day, 13:19:43
epoch [17/50] batch [1920/2000] time 2.055 (2.033) data 0.000 (0.001) loss 0.6741 (1.2183) lr 1.7705e-03 eta 1 day, 13:19:07
epoch [17/50] batch [1940/2000] time 2.053 (2.033) data 0.000 (0.001) loss 1.8684 (1.2177) lr 1.7705e-03 eta 1 day, 13:18:27
epoch [17/50] batch [1960/2000] time 2.052 (2.033) data 0.000 (0.001) loss 0.9764 (1.2147) lr 1.7705e-03 eta 1 day, 13:17:46
epoch [17/50] batch [1980/2000] time 2.051 (2.033) data 0.000 (0.001) loss 1.5984 (1.2142) lr 1.7705e-03 eta 1 day, 13:17:03
epoch [17/50] batch [2000/2000] time 2.028 (2.033) data 0.000 (0.001) loss 0.7222 (1.2126) lr 1.7290e-03 eta 1 day, 13:16:20
epoch [18/50] batch [20/2000] time 2.029 (2.062) data 0.000 (0.034) loss 0.3432 (1.0820) lr 1.7290e-03 eta 1 day, 13:47:54
epoch [18/50] batch [40/2000] time 2.054 (2.046) data 0.000 (0.017) loss 1.1651 (1.2477) lr 1.7290e-03 eta 1 day, 13:29:15
epoch [18/50] batch [60/2000] time 1.996 (2.040) data 0.001 (0.012) loss 1.0824 (1.1252) lr 1.7290e-03 eta 1 day, 13:22:12
epoch [18/50] batch [80/2000] time 2.058 (2.038) data 0.000 (0.009) loss 2.9473 (1.2008) lr 1.7290e-03 eta 1 day, 13:18:36
epoch [18/50] batch [100/2000] time 2.060 (2.039) data 0.000 (0.007) loss 2.6116 (1.2332) lr 1.7290e-03 eta 1 day, 13:19:18
epoch [18/50] batch [120/2000] time 2.033 (2.037) data 0.000 (0.006) loss 3.8141 (1.2760) lr 1.7290e-03 eta 1 day, 13:16:55
epoch [18/50] batch [140/2000] time 2.031 (2.036) data 0.000 (0.005) loss 1.8465 (1.2659) lr 1.7290e-03 eta 1 day, 13:15:18
epoch [18/50] batch [160/2000] time 2.038 (2.037) data 0.000 (0.005) loss 3.0742 (1.2385) lr 1.7290e-03 eta 1 day, 13:14:53
epoch [18/50] batch [180/2000] time 2.000 (2.035) data 0.000 (0.004) loss 1.8782 (1.2471) lr 1.7290e-03 eta 1 day, 13:12:47
epoch [18/50] batch [200/2000] time 2.057 (2.035) data 0.000 (0.004) loss 0.1051 (1.2521) lr 1.7290e-03 eta 1 day, 13:11:46
epoch [18/50] batch [220/2000] time 2.061 (2.035) data 0.000 (0.003) loss 1.4351 (1.2385) lr 1.7290e-03 eta 1 day, 13:10:36
epoch [18/50] batch [240/2000] time 1.998 (2.034) data 0.000 (0.003) loss 0.1267 (1.2420) lr 1.7290e-03 eta 1 day, 13:09:08
epoch [18/50] batch [260/2000] time 2.054 (2.034) data 0.000 (0.003) loss 0.1734 (1.2389) lr 1.7290e-03 eta 1 day, 13:08:16
epoch [18/50] batch [280/2000] time 1.999 (2.033) data 0.000 (0.003) loss 1.0811 (1.2328) lr 1.7290e-03 eta 1 day, 13:07:08
epoch [18/50] batch [300/2000] time 2.054 (2.034) data 0.000 (0.003) loss 0.6961 (1.2501) lr 1.7290e-03 eta 1 day, 13:07:16
epoch [18/50] batch [320/2000] time 2.054 (2.034) data 0.000 (0.002) loss 1.7575 (1.2455) lr 1.7290e-03 eta 1 day, 13:06:05
epoch [18/50] batch [340/2000] time 2.034 (2.033) data 0.000 (0.002) loss 1.0312 (1.2503) lr 1.7290e-03 eta 1 day, 13:04:36
epoch [18/50] batch [360/2000] time 2.004 (2.033) data 0.000 (0.002) loss 0.7698 (1.2399) lr 1.7290e-03 eta 1 day, 13:03:45
epoch [18/50] batch [380/2000] time 2.037 (2.033) data 0.000 (0.002) loss 1.0539 (1.2399) lr 1.7290e-03 eta 1 day, 13:03:20
epoch [18/50] batch [400/2000] time 2.062 (2.033) data 0.000 (0.002) loss 0.2585 (1.2260) lr 1.7290e-03 eta 1 day, 13:02:49
epoch [18/50] batch [420/2000] time 2.062 (2.034) data 0.000 (0.002) loss 0.5060 (1.2273) lr 1.7290e-03 eta 1 day, 13:02:37
epoch [18/50] batch [440/2000] time 2.061 (2.034) data 0.000 (0.002) loss 0.2640 (1.2286) lr 1.7290e-03 eta 1 day, 13:02:15
epoch [18/50] batch [460/2000] time 2.063 (2.034) data 0.000 (0.002) loss 1.2878 (1.2378) lr 1.7290e-03 eta 1 day, 13:01:45
epoch [18/50] batch [480/2000] time 2.065 (2.034) data 0.000 (0.002) loss 0.8286 (1.2366) lr 1.7290e-03 eta 1 day, 13:00:40
epoch [18/50] batch [500/2000] time 2.060 (2.034) data 0.000 (0.002) loss 0.9709 (1.2242) lr 1.7290e-03 eta 1 day, 13:00:09
epoch [18/50] batch [520/2000] time 2.034 (2.034) data 0.000 (0.002) loss 0.5609 (1.2211) lr 1.7290e-03 eta 1 day, 12:59:23
epoch [18/50] batch [540/2000] time 2.003 (2.034) data 0.000 (0.002) loss 2.2917 (1.2173) lr 1.7290e-03 eta 1 day, 12:58:47
epoch [18/50] batch [560/2000] time 2.062 (2.034) data 0.000 (0.001) loss 1.0310 (1.2208) lr 1.7290e-03 eta 1 day, 12:58:02
epoch [18/50] batch [580/2000] time 2.004 (2.034) data 0.000 (0.001) loss 2.2460 (1.2277) lr 1.7290e-03 eta 1 day, 12:57:51
epoch [18/50] batch [600/2000] time 2.000 (2.034) data 0.001 (0.001) loss 0.0450 (1.2264) lr 1.7290e-03 eta 1 day, 12:57:01
epoch [18/50] batch [620/2000] time 2.034 (2.034) data 0.000 (0.001) loss 0.7212 (1.2216) lr 1.7290e-03 eta 1 day, 12:56:11
epoch [18/50] batch [640/2000] time 2.060 (2.034) data 0.000 (0.001) loss 0.1810 (1.2178) lr 1.7290e-03 eta 1 day, 12:55:33
epoch [18/50] batch [660/2000] time 2.003 (2.034) data 0.000 (0.001) loss 2.5077 (1.2196) lr 1.7290e-03 eta 1 day, 12:54:47
epoch [18/50] batch [680/2000] time 2.006 (2.034) data 0.000 (0.001) loss 2.8381 (1.2207) lr 1.7290e-03 eta 1 day, 12:54:24
epoch [18/50] batch [700/2000] time 2.060 (2.034) data 0.000 (0.001) loss 0.8694 (1.2270) lr 1.7290e-03 eta 1 day, 12:53:46
epoch [18/50] batch [720/2000] time 2.063 (2.034) data 0.000 (0.001) loss 0.6767 (1.2247) lr 1.7290e-03 eta 1 day, 12:53:22
epoch [18/50] batch [740/2000] time 2.060 (2.034) data 0.000 (0.001) loss 0.1677 (1.2364) lr 1.7290e-03 eta 1 day, 12:52:46
epoch [18/50] batch [760/2000] time 2.038 (2.034) data 0.000 (0.001) loss 0.4023 (1.2248) lr 1.7290e-03 eta 1 day, 12:52:00
epoch [18/50] batch [780/2000] time 2.008 (2.034) data 0.000 (0.001) loss 1.3947 (1.2193) lr 1.7290e-03 eta 1 day, 12:51:11
epoch [18/50] batch [800/2000] time 2.057 (2.034) data 0.000 (0.001) loss 1.0502 (1.2152) lr 1.7290e-03 eta 1 day, 12:50:32
epoch [18/50] batch [820/2000] time 2.054 (2.034) data 0.000 (0.001) loss 0.0881 (1.2170) lr 1.7290e-03 eta 1 day, 12:49:54
epoch [18/50] batch [840/2000] time 2.055 (2.034) data 0.000 (0.001) loss 0.0514 (1.2168) lr 1.7290e-03 eta 1 day, 12:49:20
epoch [18/50] batch [860/2000] time 1.996 (2.034) data 0.000 (0.001) loss 0.2517 (1.2118) lr 1.7290e-03 eta 1 day, 12:48:28
epoch [18/50] batch [880/2000] time 2.059 (2.034) data 0.000 (0.001) loss 0.2205 (1.2136) lr 1.7290e-03 eta 1 day, 12:47:40
epoch [18/50] batch [900/2000] time 1.998 (2.034) data 0.000 (0.001) loss 2.6137 (1.2111) lr 1.7290e-03 eta 1 day, 12:46:50
epoch [18/50] batch [920/2000] time 2.027 (2.034) data 0.000 (0.001) loss 1.6174 (1.2153) lr 1.7290e-03 eta 1 day, 12:45:59
epoch [18/50] batch [940/2000] time 1.971 (2.034) data 0.000 (0.001) loss 0.0427 (1.2095) lr 1.7290e-03 eta 1 day, 12:45:09
epoch [18/50] batch [960/2000] time 2.056 (2.034) data 0.000 (0.001) loss 1.9401 (1.2097) lr 1.7290e-03 eta 1 day, 12:44:36
epoch [18/50] batch [980/2000] time 2.035 (2.034) data 0.000 (0.001) loss 1.4563 (1.2083) lr 1.7290e-03 eta 1 day, 12:43:50
epoch [18/50] batch [1000/2000] time 2.001 (2.033) data 0.000 (0.001) loss 0.6325 (1.2068) lr 1.7290e-03 eta 1 day, 12:42:54
epoch [18/50] batch [1020/2000] time 2.063 (2.034) data 0.000 (0.001) loss 1.5876 (1.2102) lr 1.7290e-03 eta 1 day, 12:42:27
epoch [18/50] batch [1040/2000] time 2.039 (2.034) data 0.000 (0.001) loss 0.5157 (1.2158) lr 1.7290e-03 eta 1 day, 12:41:46
epoch [18/50] batch [1060/2000] time 2.040 (2.034) data 0.000 (0.001) loss 1.5349 (1.2191) lr 1.7290e-03 eta 1 day, 12:41:15
epoch [18/50] batch [1080/2000] time 2.057 (2.034) data 0.000 (0.001) loss 0.1550 (1.2190) lr 1.7290e-03 eta 1 day, 12:40:37
epoch [18/50] batch [1100/2000] time 2.009 (2.034) data 0.000 (0.001) loss 0.2089 (1.2237) lr 1.7290e-03 eta 1 day, 12:39:59
epoch [18/50] batch [1120/2000] time 2.061 (2.034) data 0.000 (0.001) loss 1.5919 (1.2288) lr 1.7290e-03 eta 1 day, 12:39:19
epoch [18/50] batch [1140/2000] time 2.061 (2.034) data 0.001 (0.001) loss 0.2295 (1.2272) lr 1.7290e-03 eta 1 day, 12:38:38
epoch [18/50] batch [1160/2000] time 2.000 (2.034) data 0.000 (0.001) loss 0.8567 (1.2291) lr 1.7290e-03 eta 1 day, 12:37:52
epoch [18/50] batch [1180/2000] time 2.054 (2.034) data 0.000 (0.001) loss 1.4715 (1.2266) lr 1.7290e-03 eta 1 day, 12:37:10
epoch [18/50] batch [1200/2000] time 2.060 (2.034) data 0.000 (0.001) loss 1.3105 (1.2255) lr 1.7290e-03 eta 1 day, 12:36:22
epoch [18/50] batch [1220/2000] time 2.006 (2.034) data 0.000 (0.001) loss 0.4343 (1.2249) lr 1.7290e-03 eta 1 day, 12:35:44
epoch [18/50] batch [1240/2000] time 1.981 (2.034) data 0.000 (0.001) loss 0.9078 (1.2230) lr 1.7290e-03 eta 1 day, 12:35:09
epoch [18/50] batch [1260/2000] time 2.004 (2.034) data 0.000 (0.001) loss 2.5950 (1.2252) lr 1.7290e-03 eta 1 day, 12:34:24
epoch [18/50] batch [1280/2000] time 2.068 (2.034) data 0.000 (0.001) loss 0.9521 (1.2221) lr 1.7290e-03 eta 1 day, 12:33:51
epoch [18/50] batch [1300/2000] time 2.006 (2.034) data 0.000 (0.001) loss 0.7723 (1.2210) lr 1.7290e-03 eta 1 day, 12:33:13
epoch [18/50] batch [1320/2000] time 2.062 (2.034) data 0.000 (0.001) loss 0.7878 (1.2202) lr 1.7290e-03 eta 1 day, 12:32:26
epoch [18/50] batch [1340/2000] time 2.059 (2.034) data 0.000 (0.001) loss 0.9817 (1.2175) lr 1.7290e-03 eta 1 day, 12:31:48
epoch [18/50] batch [1360/2000] time 2.000 (2.034) data 0.000 (0.001) loss 0.6491 (1.2140) lr 1.7290e-03 eta 1 day, 12:31:01
epoch [18/50] batch [1380/2000] time 2.065 (2.034) data 0.000 (0.001) loss 1.1317 (1.2148) lr 1.7290e-03 eta 1 day, 12:30:20
epoch [18/50] batch [1400/2000] time 2.038 (2.034) data 0.000 (0.001) loss 0.6145 (1.2177) lr 1.7290e-03 eta 1 day, 12:29:49
epoch [18/50] batch [1420/2000] time 1.999 (2.034) data 0.000 (0.001) loss 3.2968 (1.2189) lr 1.7290e-03 eta 1 day, 12:29:04
epoch [18/50] batch [1440/2000] time 2.003 (2.034) data 0.000 (0.001) loss 3.9402 (1.2215) lr 1.7290e-03 eta 1 day, 12:28:22
epoch [18/50] batch [1460/2000] time 2.040 (2.034) data 0.000 (0.001) loss 0.8378 (1.2141) lr 1.7290e-03 eta 1 day, 12:27:40
epoch [18/50] batch [1480/2000] time 1.998 (2.034) data 0.000 (0.001) loss 3.3392 (1.2136) lr 1.7290e-03 eta 1 day, 12:27:06
epoch [18/50] batch [1500/2000] time 2.034 (2.034) data 0.000 (0.001) loss 1.9243 (1.2138) lr 1.7290e-03 eta 1 day, 12:26:25
epoch [18/50] batch [1520/2000] time 2.064 (2.034) data 0.000 (0.001) loss 2.4629 (1.2106) lr 1.7290e-03 eta 1 day, 12:25:51
epoch [18/50] batch [1540/2000] time 2.006 (2.034) data 0.000 (0.001) loss 1.3568 (1.2087) lr 1.7290e-03 eta 1 day, 12:25:09
epoch [18/50] batch [1560/2000] time 2.063 (2.034) data 0.000 (0.001) loss 1.1840 (1.2108) lr 1.7290e-03 eta 1 day, 12:24:27
epoch [18/50] batch [1580/2000] time 2.006 (2.034) data 0.001 (0.001) loss 1.9473 (1.2132) lr 1.7290e-03 eta 1 day, 12:23:48
epoch [18/50] batch [1600/2000] time 2.055 (2.034) data 0.000 (0.001) loss 1.0218 (1.2155) lr 1.7290e-03 eta 1 day, 12:23:11
epoch [18/50] batch [1620/2000] time 2.000 (2.034) data 0.000 (0.001) loss 0.8055 (1.2169) lr 1.7290e-03 eta 1 day, 12:22:34
epoch [18/50] batch [1640/2000] time 2.054 (2.034) data 0.000 (0.001) loss 0.2795 (1.2140) lr 1.7290e-03 eta 1 day, 12:21:50
epoch [18/50] batch [1660/2000] time 2.051 (2.034) data 0.000 (0.001) loss 2.1526 (1.2138) lr 1.7290e-03 eta 1 day, 12:21:12
epoch [18/50] batch [1680/2000] time 2.002 (2.034) data 0.001 (0.001) loss 0.4873 (1.2119) lr 1.7290e-03 eta 1 day, 12:20:32
epoch [18/50] batch [1700/2000] time 2.052 (2.034) data 0.000 (0.001) loss 0.1456 (1.2105) lr 1.7290e-03 eta 1 day, 12:19:52
epoch [18/50] batch [1720/2000] time 2.052 (2.034) data 0.000 (0.001) loss 0.7718 (1.2066) lr 1.7290e-03 eta 1 day, 12:19:09
epoch [18/50] batch [1740/2000] time 2.052 (2.034) data 0.000 (0.001) loss 1.4716 (1.2068) lr 1.7290e-03 eta 1 day, 12:18:28
epoch [18/50] batch [1760/2000] time 2.053 (2.034) data 0.000 (0.001) loss 0.0567 (1.2064) lr 1.7290e-03 eta 1 day, 12:17:42
epoch [18/50] batch [1780/2000] time 2.048 (2.034) data 0.000 (0.001) loss 3.2910 (1.2054) lr 1.7290e-03 eta 1 day, 12:17:03
epoch [18/50] batch [1800/2000] time 2.024 (2.034) data 0.000 (0.001) loss 3.1510 (1.2075) lr 1.7290e-03 eta 1 day, 12:16:15
epoch [18/50] batch [1820/2000] time 2.001 (2.034) data 0.000 (0.001) loss 0.6675 (1.2070) lr 1.7290e-03 eta 1 day, 12:15:27
epoch [18/50] batch [1840/2000] time 2.049 (2.034) data 0.000 (0.001) loss 0.9076 (1.2068) lr 1.7290e-03 eta 1 day, 12:14:48
epoch [18/50] batch [1860/2000] time 2.059 (2.034) data 0.000 (0.001) loss 0.6797 (1.2064) lr 1.7290e-03 eta 1 day, 12:14:07
epoch [18/50] batch [1880/2000] time 2.055 (2.034) data 0.000 (0.001) loss 1.3547 (1.2053) lr 1.7290e-03 eta 1 day, 12:13:29
epoch [18/50] batch [1900/2000] time 2.055 (2.034) data 0.000 (0.001) loss 1.0293 (1.2055) lr 1.7290e-03 eta 1 day, 12:12:50
epoch [18/50] batch [1920/2000] time 2.060 (2.034) data 0.000 (0.001) loss 2.6501 (1.2073) lr 1.7290e-03 eta 1 day, 12:12:07
epoch [18/50] batch [1940/2000] time 2.053 (2.034) data 0.000 (0.001) loss 0.4512 (1.2092) lr 1.7290e-03 eta 1 day, 12:11:23
epoch [18/50] batch [1960/2000] time 2.027 (2.034) data 0.000 (0.001) loss 1.2268 (1.2096) lr 1.7290e-03 eta 1 day, 12:10:42
epoch [18/50] batch [1980/2000] time 2.002 (2.034) data 0.000 (0.001) loss 0.7833 (1.2071) lr 1.7290e-03 eta 1 day, 12:09:56
epoch [18/50] batch [2000/2000] time 2.000 (2.034) data 0.000 (0.001) loss 0.0187 (1.2108) lr 1.6845e-03 eta 1 day, 12:09:16
epoch [19/50] batch [20/2000] time 1.977 (2.064) data 0.000 (0.028) loss 0.5085 (1.1198) lr 1.6845e-03 eta 1 day, 12:41:17
epoch [19/50] batch [40/2000] time 2.051 (2.049) data 0.000 (0.014) loss 1.6692 (1.1214) lr 1.6845e-03 eta 1 day, 12:24:05
epoch [19/50] batch [60/2000] time 2.049 (2.041) data 0.001 (0.009) loss 0.6125 (1.1283) lr 1.6845e-03 eta 1 day, 12:14:59
epoch [19/50] batch [80/2000] time 1.999 (2.038) data 0.000 (0.007) loss 1.5595 (1.1616) lr 1.6845e-03 eta 1 day, 12:11:22
epoch [19/50] batch [100/2000] time 2.031 (2.037) data 0.000 (0.006) loss 0.6180 (1.2274) lr 1.6845e-03 eta 1 day, 12:09:41
epoch [19/50] batch [120/2000] time 1.996 (2.036) data 0.000 (0.005) loss 1.0805 (1.2349) lr 1.6845e-03 eta 1 day, 12:08:11
epoch [19/50] batch [140/2000] time 2.027 (2.037) data 0.000 (0.004) loss 0.1650 (1.2093) lr 1.6845e-03 eta 1 day, 12:07:36
epoch [19/50] batch [160/2000] time 2.049 (2.036) data 0.000 (0.004) loss 1.5951 (1.2404) lr 1.6845e-03 eta 1 day, 12:06:15
epoch [19/50] batch [180/2000] time 2.055 (2.035) data 0.000 (0.003) loss 1.4179 (1.2578) lr 1.6845e-03 eta 1 day, 12:04:44
epoch [19/50] batch [200/2000] time 2.053 (2.035) data 0.000 (0.003) loss 1.0604 (1.2722) lr 1.6845e-03 eta 1 day, 12:03:24
epoch [19/50] batch [220/2000] time 2.030 (2.034) data 0.000 (0.003) loss 0.6846 (1.2557) lr 1.6845e-03 eta 1 day, 12:01:55
epoch [19/50] batch [240/2000] time 1.997 (2.033) data 0.000 (0.003) loss 1.0695 (1.2551) lr 1.6845e-03 eta 1 day, 12:00:32
epoch [19/50] batch [260/2000] time 2.051 (2.033) data 0.000 (0.002) loss 4.6277 (1.2391) lr 1.6845e-03 eta 1 day, 11:59:38
epoch [19/50] batch [280/2000] time 2.053 (2.032) data 0.000 (0.002) loss 2.4842 (1.2446) lr 1.6845e-03 eta 1 day, 11:58:30
epoch [19/50] batch [300/2000] time 1.998 (2.032) data 0.000 (0.002) loss 2.1837 (1.2459) lr 1.6845e-03 eta 1 day, 11:57:31
epoch [19/50] batch [320/2000] time 2.050 (2.032) data 0.000 (0.002) loss 1.5464 (1.2488) lr 1.6845e-03 eta 1 day, 11:56:37
epoch [19/50] batch [340/2000] time 2.051 (2.032) data 0.000 (0.002) loss 0.0993 (1.2607) lr 1.6845e-03 eta 1 day, 11:55:51
epoch [19/50] batch [360/2000] time 2.051 (2.032) data 0.000 (0.002) loss 1.3984 (1.2641) lr 1.6845e-03 eta 1 day, 11:55:03
epoch [19/50] batch [380/2000] time 2.000 (2.031) data 0.000 (0.002) loss 1.4139 (1.2596) lr 1.6845e-03 eta 1 day, 11:53:57
epoch [19/50] batch [400/2000] time 2.055 (2.031) data 0.000 (0.002) loss 1.0893 (1.2559) lr 1.6845e-03 eta 1 day, 11:53:05
epoch [19/50] batch [420/2000] time 2.048 (2.031) data 0.000 (0.002) loss 1.7117 (1.2655) lr 1.6845e-03 eta 1 day, 11:52:05
epoch [19/50] batch [440/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.5939 (1.2523) lr 1.6845e-03 eta 1 day, 11:50:49
epoch [19/50] batch [460/2000] time 2.025 (2.030) data 0.000 (0.001) loss 2.4981 (1.2507) lr 1.6845e-03 eta 1 day, 11:50:01
epoch [19/50] batch [480/2000] time 2.025 (2.030) data 0.000 (0.001) loss 0.1250 (1.2370) lr 1.6845e-03 eta 1 day, 11:48:58
epoch [19/50] batch [500/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.2235 (1.2278) lr 1.6845e-03 eta 1 day, 11:47:58
epoch [19/50] batch [520/2000] time 2.055 (2.029) data 0.000 (0.001) loss 0.3663 (1.2234) lr 1.6845e-03 eta 1 day, 11:47:11
epoch [19/50] batch [540/2000] time 2.024 (2.030) data 0.000 (0.001) loss 0.3037 (1.2275) lr 1.6845e-03 eta 1 day, 11:46:32
epoch [19/50] batch [560/2000] time 1.976 (2.029) data 0.000 (0.001) loss 3.7904 (1.2284) lr 1.6845e-03 eta 1 day, 11:45:51
epoch [19/50] batch [580/2000] time 2.023 (2.029) data 0.000 (0.001) loss 0.3388 (1.2266) lr 1.6845e-03 eta 1 day, 11:45:01
epoch [19/50] batch [600/2000] time 1.995 (2.029) data 0.000 (0.001) loss 0.6646 (1.2370) lr 1.6845e-03 eta 1 day, 11:44:17
epoch [19/50] batch [620/2000] time 2.047 (2.029) data 0.000 (0.001) loss 0.6536 (1.2332) lr 1.6845e-03 eta 1 day, 11:43:36
epoch [19/50] batch [640/2000] time 2.024 (2.029) data 0.000 (0.001) loss 0.6434 (1.2219) lr 1.6845e-03 eta 1 day, 11:42:31
epoch [19/50] batch [660/2000] time 2.046 (2.029) data 0.000 (0.001) loss 0.0641 (1.2063) lr 1.6845e-03 eta 1 day, 11:41:44
epoch [19/50] batch [680/2000] time 2.045 (2.029) data 0.000 (0.001) loss 0.9948 (1.2055) lr 1.6845e-03 eta 1 day, 11:41:08
epoch [19/50] batch [700/2000] time 1.997 (2.028) data 0.000 (0.001) loss 2.7954 (1.2030) lr 1.6845e-03 eta 1 day, 11:40:03
epoch [19/50] batch [720/2000] time 2.001 (2.028) data 0.000 (0.001) loss 0.9409 (1.1972) lr 1.6845e-03 eta 1 day, 11:39:20
epoch [19/50] batch [740/2000] time 2.057 (2.028) data 0.000 (0.001) loss 2.4473 (1.2011) lr 1.6845e-03 eta 1 day, 11:38:32
epoch [19/50] batch [760/2000] time 1.996 (2.028) data 0.000 (0.001) loss 1.8049 (1.1885) lr 1.6845e-03 eta 1 day, 11:37:49
epoch [19/50] batch [780/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.2591 (1.1871) lr 1.6845e-03 eta 1 day, 11:37:06
epoch [19/50] batch [800/2000] time 2.026 (2.028) data 0.000 (0.001) loss 1.0964 (1.1828) lr 1.6845e-03 eta 1 day, 11:36:21
epoch [19/50] batch [820/2000] time 2.052 (2.028) data 0.000 (0.001) loss 4.3774 (1.1845) lr 1.6845e-03 eta 1 day, 11:35:45
epoch [19/50] batch [840/2000] time 2.055 (2.028) data 0.000 (0.001) loss 2.6010 (1.1904) lr 1.6845e-03 eta 1 day, 11:35:06
epoch [19/50] batch [860/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.1820 (1.1944) lr 1.6845e-03 eta 1 day, 11:34:34
epoch [19/50] batch [880/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.4718 (1.1926) lr 1.6845e-03 eta 1 day, 11:33:49
epoch [19/50] batch [900/2000] time 2.047 (2.028) data 0.000 (0.001) loss 2.1512 (1.1887) lr 1.6845e-03 eta 1 day, 11:33:11
epoch [19/50] batch [920/2000] time 2.053 (2.028) data 0.000 (0.001) loss 0.3628 (1.1882) lr 1.6845e-03 eta 1 day, 11:32:28
epoch [19/50] batch [940/2000] time 2.032 (2.028) data 0.000 (0.001) loss 1.3043 (1.1861) lr 1.6845e-03 eta 1 day, 11:31:49
epoch [19/50] batch [960/2000] time 2.049 (2.028) data 0.000 (0.001) loss 2.2943 (1.1796) lr 1.6845e-03 eta 1 day, 11:31:03
epoch [19/50] batch [980/2000] time 1.992 (2.028) data 0.000 (0.001) loss 0.8836 (1.1809) lr 1.6845e-03 eta 1 day, 11:30:15
epoch [19/50] batch [1000/2000] time 2.025 (2.028) data 0.000 (0.001) loss 1.3215 (1.1853) lr 1.6845e-03 eta 1 day, 11:29:31
epoch [19/50] batch [1020/2000] time 1.993 (2.028) data 0.000 (0.001) loss 2.9829 (1.1860) lr 1.6845e-03 eta 1 day, 11:28:39
epoch [19/50] batch [1040/2000] time 1.995 (2.028) data 0.000 (0.001) loss 0.4840 (1.1809) lr 1.6845e-03 eta 1 day, 11:28:02
epoch [19/50] batch [1060/2000] time 2.028 (2.028) data 0.000 (0.001) loss 2.5860 (1.1895) lr 1.6845e-03 eta 1 day, 11:27:13
epoch [19/50] batch [1080/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.5803 (1.1920) lr 1.6845e-03 eta 1 day, 11:26:39
epoch [19/50] batch [1100/2000] time 2.026 (2.028) data 0.000 (0.001) loss 1.1526 (1.1957) lr 1.6845e-03 eta 1 day, 11:26:03
epoch [19/50] batch [1120/2000] time 1.976 (2.028) data 0.000 (0.001) loss 1.6829 (1.1921) lr 1.6845e-03 eta 1 day, 11:25:21
epoch [19/50] batch [1140/2000] time 2.053 (2.028) data 0.001 (0.001) loss 1.3551 (1.1929) lr 1.6845e-03 eta 1 day, 11:24:37
epoch [19/50] batch [1160/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.5214 (1.1931) lr 1.6845e-03 eta 1 day, 11:23:58
epoch [19/50] batch [1180/2000] time 1.973 (2.028) data 0.000 (0.001) loss 0.1502 (1.1903) lr 1.6845e-03 eta 1 day, 11:23:12
epoch [19/50] batch [1200/2000] time 1.995 (2.028) data 0.000 (0.001) loss 0.1723 (1.1884) lr 1.6845e-03 eta 1 day, 11:22:28
epoch [19/50] batch [1220/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.1231 (1.1862) lr 1.6845e-03 eta 1 day, 11:21:41
epoch [19/50] batch [1240/2000] time 2.047 (2.028) data 0.000 (0.001) loss 0.9584 (1.1846) lr 1.6845e-03 eta 1 day, 11:20:58
epoch [19/50] batch [1260/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.8512 (1.1868) lr 1.6845e-03 eta 1 day, 11:20:16
epoch [19/50] batch [1280/2000] time 2.053 (2.028) data 0.000 (0.001) loss 1.5125 (1.1834) lr 1.6845e-03 eta 1 day, 11:19:36
epoch [19/50] batch [1300/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.1246 (1.1884) lr 1.6845e-03 eta 1 day, 11:19:02
epoch [19/50] batch [1320/2000] time 1.997 (2.028) data 0.000 (0.001) loss 1.9601 (1.1909) lr 1.6845e-03 eta 1 day, 11:18:25
epoch [19/50] batch [1340/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.9194 (1.1873) lr 1.6845e-03 eta 1 day, 11:17:49
epoch [19/50] batch [1360/2000] time 2.000 (2.028) data 0.000 (0.001) loss 0.2565 (1.1890) lr 1.6845e-03 eta 1 day, 11:17:14
epoch [19/50] batch [1380/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.2222 (1.1897) lr 1.6845e-03 eta 1 day, 11:16:31
epoch [19/50] batch [1400/2000] time 1.977 (2.028) data 0.000 (0.001) loss 1.2611 (1.1918) lr 1.6845e-03 eta 1 day, 11:15:53
epoch [19/50] batch [1420/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.1267 (1.1881) lr 1.6845e-03 eta 1 day, 11:15:11
epoch [19/50] batch [1440/2000] time 2.055 (2.028) data 0.000 (0.001) loss 0.3952 (1.1847) lr 1.6845e-03 eta 1 day, 11:14:26
epoch [19/50] batch [1460/2000] time 1.973 (2.028) data 0.000 (0.001) loss 0.4767 (1.1824) lr 1.6845e-03 eta 1 day, 11:13:37
epoch [19/50] batch [1480/2000] time 1.997 (2.028) data 0.000 (0.001) loss 1.1306 (1.1867) lr 1.6845e-03 eta 1 day, 11:12:59
epoch [19/50] batch [1500/2000] time 2.000 (2.028) data 0.000 (0.001) loss 1.5729 (1.1850) lr 1.6845e-03 eta 1 day, 11:12:22
epoch [19/50] batch [1520/2000] time 2.003 (2.028) data 0.000 (0.001) loss 0.7562 (1.1781) lr 1.6845e-03 eta 1 day, 11:11:40
epoch [19/50] batch [1540/2000] time 2.034 (2.028) data 0.000 (0.001) loss 2.2161 (1.1769) lr 1.6845e-03 eta 1 day, 11:11:01
epoch [19/50] batch [1560/2000] time 2.029 (2.028) data 0.000 (0.001) loss 2.3192 (1.1798) lr 1.6845e-03 eta 1 day, 11:10:21
epoch [19/50] batch [1580/2000] time 1.998 (2.028) data 0.000 (0.001) loss 1.1374 (1.1815) lr 1.6845e-03 eta 1 day, 11:09:46
epoch [19/50] batch [1600/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.9208 (1.1858) lr 1.6845e-03 eta 1 day, 11:09:08
epoch [19/50] batch [1620/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.1405 (1.1881) lr 1.6845e-03 eta 1 day, 11:08:24
epoch [19/50] batch [1640/2000] time 2.028 (2.028) data 0.000 (0.001) loss 1.3102 (1.1852) lr 1.6845e-03 eta 1 day, 11:07:42
epoch [19/50] batch [1660/2000] time 1.999 (2.028) data 0.000 (0.001) loss 2.3736 (1.1866) lr 1.6845e-03 eta 1 day, 11:07:03
epoch [19/50] batch [1680/2000] time 2.028 (2.028) data 0.001 (0.001) loss 0.2174 (1.1876) lr 1.6845e-03 eta 1 day, 11:06:29
epoch [19/50] batch [1700/2000] time 2.057 (2.028) data 0.000 (0.001) loss 1.3901 (1.1894) lr 1.6845e-03 eta 1 day, 11:05:46
epoch [19/50] batch [1720/2000] time 2.049 (2.028) data 0.000 (0.001) loss 2.7238 (1.1966) lr 1.6845e-03 eta 1 day, 11:05:12
epoch [19/50] batch [1740/2000] time 2.054 (2.028) data 0.000 (0.001) loss 0.6802 (1.2007) lr 1.6845e-03 eta 1 day, 11:04:32
epoch [19/50] batch [1760/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.7643 (1.2004) lr 1.6845e-03 eta 1 day, 11:03:57
epoch [19/50] batch [1780/2000] time 2.053 (2.028) data 0.000 (0.001) loss 1.7014 (1.2007) lr 1.6845e-03 eta 1 day, 11:03:20
epoch [19/50] batch [1800/2000] time 1.997 (2.028) data 0.000 (0.001) loss 2.5583 (1.2001) lr 1.6845e-03 eta 1 day, 11:02:35
epoch [19/50] batch [1820/2000] time 2.030 (2.028) data 0.000 (0.001) loss 0.7365 (1.1986) lr 1.6845e-03 eta 1 day, 11:01:55
epoch [19/50] batch [1840/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.1658 (1.1981) lr 1.6845e-03 eta 1 day, 11:01:15
epoch [19/50] batch [1860/2000] time 2.001 (2.028) data 0.000 (0.001) loss 1.2341 (1.1992) lr 1.6845e-03 eta 1 day, 11:00:36
epoch [19/50] batch [1880/2000] time 2.029 (2.028) data 0.000 (0.000) loss 0.6666 (1.1987) lr 1.6845e-03 eta 1 day, 10:59:59
epoch [19/50] batch [1900/2000] time 2.053 (2.028) data 0.000 (0.000) loss 0.2988 (1.2004) lr 1.6845e-03 eta 1 day, 10:59:20
epoch [19/50] batch [1920/2000] time 2.030 (2.028) data 0.000 (0.000) loss 0.7712 (1.1996) lr 1.6845e-03 eta 1 day, 10:58:43
epoch [19/50] batch [1940/2000] time 2.053 (2.028) data 0.000 (0.000) loss 0.7280 (1.1991) lr 1.6845e-03 eta 1 day, 10:58:01
epoch [19/50] batch [1960/2000] time 2.000 (2.028) data 0.000 (0.000) loss 0.3630 (1.1995) lr 1.6845e-03 eta 1 day, 10:57:22
epoch [19/50] batch [1980/2000] time 2.002 (2.028) data 0.000 (0.000) loss 2.2179 (1.1993) lr 1.6845e-03 eta 1 day, 10:56:46
epoch [19/50] batch [2000/2000] time 2.028 (2.028) data 0.000 (0.000) loss 2.9095 (1.1962) lr 1.6374e-03 eta 1 day, 10:56:05
epoch [20/50] batch [20/2000] time 1.999 (2.052) data 0.000 (0.028) loss 0.2364 (1.2729) lr 1.6374e-03 eta 1 day, 11:19:44
epoch [20/50] batch [40/2000] time 2.027 (2.042) data 0.000 (0.014) loss 2.4930 (1.3012) lr 1.6374e-03 eta 1 day, 11:09:08
epoch [20/50] batch [60/2000] time 2.027 (2.036) data 0.001 (0.010) loss 0.8882 (1.2446) lr 1.6374e-03 eta 1 day, 11:01:57
epoch [20/50] batch [80/2000] time 2.032 (2.034) data 0.000 (0.007) loss 3.1420 (1.1714) lr 1.6374e-03 eta 1 day, 10:59:15
epoch [20/50] batch [100/2000] time 1.998 (2.033) data 0.000 (0.006) loss 1.9244 (1.1984) lr 1.6374e-03 eta 1 day, 10:57:05
epoch [20/50] batch [120/2000] time 1.999 (2.032) data 0.000 (0.005) loss 2.6982 (1.2425) lr 1.6374e-03 eta 1 day, 10:55:19
epoch [20/50] batch [140/2000] time 1.972 (2.031) data 0.000 (0.004) loss 0.9391 (1.2689) lr 1.6374e-03 eta 1 day, 10:54:28
epoch [20/50] batch [160/2000] time 2.027 (2.030) data 0.000 (0.004) loss 1.8339 (1.2640) lr 1.6374e-03 eta 1 day, 10:52:35
epoch [20/50] batch [180/2000] time 2.051 (2.030) data 0.000 (0.003) loss 0.9969 (1.2619) lr 1.6374e-03 eta 1 day, 10:51:52
epoch [20/50] batch [200/2000] time 1.983 (2.029) data 0.000 (0.003) loss 0.5929 (1.2711) lr 1.6374e-03 eta 1 day, 10:50:10
epoch [20/50] batch [220/2000] time 2.027 (2.029) data 0.000 (0.003) loss 1.2021 (1.2862) lr 1.6374e-03 eta 1 day, 10:49:04
epoch [20/50] batch [240/2000] time 2.005 (2.028) data 0.000 (0.003) loss 0.3547 (1.2733) lr 1.6374e-03 eta 1 day, 10:47:43
epoch [20/50] batch [260/2000] time 2.053 (2.028) data 0.000 (0.002) loss 0.0798 (1.2511) lr 1.6374e-03 eta 1 day, 10:46:50
epoch [20/50] batch [280/2000] time 2.053 (2.028) data 0.000 (0.002) loss 0.4262 (1.2246) lr 1.6374e-03 eta 1 day, 10:46:15
epoch [20/50] batch [300/2000] time 2.050 (2.028) data 0.000 (0.002) loss 0.1671 (1.2208) lr 1.6374e-03 eta 1 day, 10:45:53
epoch [20/50] batch [320/2000] time 1.997 (2.028) data 0.000 (0.002) loss 0.7788 (1.2009) lr 1.6374e-03 eta 1 day, 10:45:14
epoch [20/50] batch [340/2000] time 2.000 (2.028) data 0.000 (0.002) loss 1.3264 (1.1993) lr 1.6374e-03 eta 1 day, 10:44:32
epoch [20/50] batch [360/2000] time 1.974 (2.028) data 0.000 (0.002) loss 3.9929 (1.2003) lr 1.6374e-03 eta 1 day, 10:43:31
epoch [20/50] batch [380/2000] time 2.075 (2.028) data 0.000 (0.002) loss 0.1483 (1.2037) lr 1.6374e-03 eta 1 day, 10:43:02
epoch [20/50] batch [400/2000] time 2.037 (2.029) data 0.000 (0.002) loss 0.8380 (1.1912) lr 1.6374e-03 eta 1 day, 10:42:57
epoch [20/50] batch [420/2000] time 2.015 (2.030) data 0.000 (0.002) loss 2.1763 (1.1875) lr 1.6374e-03 eta 1 day, 10:43:13
epoch [20/50] batch [440/2000] time 2.052 (2.030) data 0.000 (0.002) loss 2.8030 (1.1845) lr 1.6374e-03 eta 1 day, 10:43:09
epoch [20/50] batch [460/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.9540 (1.1763) lr 1.6374e-03 eta 1 day, 10:42:43
epoch [20/50] batch [480/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.9801 (1.1880) lr 1.6374e-03 eta 1 day, 10:41:43
epoch [20/50] batch [500/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.4060 (1.1847) lr 1.6374e-03 eta 1 day, 10:41:01
epoch [20/50] batch [520/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.9402 (1.1737) lr 1.6374e-03 eta 1 day, 10:40:22
epoch [20/50] batch [540/2000] time 2.000 (2.030) data 0.000 (0.001) loss 1.3169 (1.1671) lr 1.6374e-03 eta 1 day, 10:39:36
epoch [20/50] batch [560/2000] time 2.003 (2.030) data 0.000 (0.001) loss 1.4595 (1.1579) lr 1.6374e-03 eta 1 day, 10:39:03
epoch [20/50] batch [580/2000] time 1.978 (2.030) data 0.000 (0.001) loss 1.0796 (1.1635) lr 1.6374e-03 eta 1 day, 10:38:14
epoch [20/50] batch [600/2000] time 2.034 (2.030) data 0.001 (0.001) loss 1.2714 (1.1605) lr 1.6374e-03 eta 1 day, 10:37:43
epoch [20/50] batch [620/2000] time 2.033 (2.031) data 0.000 (0.001) loss 1.1785 (1.1619) lr 1.6374e-03 eta 1 day, 10:37:20
epoch [20/50] batch [640/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.3876 (1.1684) lr 1.6374e-03 eta 1 day, 10:36:47
epoch [20/50] batch [660/2000] time 2.030 (2.031) data 0.000 (0.001) loss 1.1028 (1.1706) lr 1.6374e-03 eta 1 day, 10:36:03
epoch [20/50] batch [680/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.9498 (1.1810) lr 1.6374e-03 eta 1 day, 10:35:28
epoch [20/50] batch [700/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.0754 (1.1781) lr 1.6374e-03 eta 1 day, 10:34:43
epoch [20/50] batch [720/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.8609 (1.1857) lr 1.6374e-03 eta 1 day, 10:34:04
epoch [20/50] batch [740/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.6250 (1.1800) lr 1.6374e-03 eta 1 day, 10:33:23
epoch [20/50] batch [760/2000] time 2.031 (2.031) data 0.000 (0.001) loss 2.4099 (1.1857) lr 1.6374e-03 eta 1 day, 10:32:53
epoch [20/50] batch [780/2000] time 2.032 (2.031) data 0.000 (0.001) loss 1.3538 (1.1833) lr 1.6374e-03 eta 1 day, 10:32:07
epoch [20/50] batch [800/2000] time 2.057 (2.031) data 0.000 (0.001) loss 0.7963 (1.1905) lr 1.6374e-03 eta 1 day, 10:31:37
epoch [20/50] batch [820/2000] time 1.999 (2.031) data 0.000 (0.001) loss 1.2491 (1.1966) lr 1.6374e-03 eta 1 day, 10:31:06
epoch [20/50] batch [840/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.6963 (1.1982) lr 1.6374e-03 eta 1 day, 10:30:18
epoch [20/50] batch [860/2000] time 2.028 (2.031) data 0.000 (0.001) loss 1.4606 (1.2009) lr 1.6374e-03 eta 1 day, 10:29:30
epoch [20/50] batch [880/2000] time 2.055 (2.031) data 0.000 (0.001) loss 1.3416 (1.1975) lr 1.6374e-03 eta 1 day, 10:28:49
epoch [20/50] batch [900/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.7823 (1.2039) lr 1.6374e-03 eta 1 day, 10:27:57
epoch [20/50] batch [920/2000] time 2.000 (2.031) data 0.000 (0.001) loss 2.2822 (1.2019) lr 1.6374e-03 eta 1 day, 10:27:09
epoch [20/50] batch [940/2000] time 2.000 (2.031) data 0.000 (0.001) loss 2.2835 (1.2016) lr 1.6374e-03 eta 1 day, 10:26:26
epoch [20/50] batch [960/2000] time 2.057 (2.031) data 0.000 (0.001) loss 1.8012 (1.2053) lr 1.6374e-03 eta 1 day, 10:25:48
epoch [20/50] batch [980/2000] time 2.058 (2.031) data 0.000 (0.001) loss 0.3129 (1.2094) lr 1.6374e-03 eta 1 day, 10:25:11
epoch [20/50] batch [1000/2000] time 2.005 (2.031) data 0.000 (0.001) loss 1.2639 (1.2053) lr 1.6374e-03 eta 1 day, 10:24:29
epoch [20/50] batch [1020/2000] time 2.057 (2.031) data 0.000 (0.001) loss 0.1635 (1.2074) lr 1.6374e-03 eta 1 day, 10:24:02
epoch [20/50] batch [1040/2000] time 2.047 (2.031) data 0.000 (0.001) loss 2.0769 (1.2082) lr 1.6374e-03 eta 1 day, 10:23:15
epoch [20/50] batch [1060/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.1968 (1.2042) lr 1.6374e-03 eta 1 day, 10:22:34
epoch [20/50] batch [1080/2000] time 1.973 (2.031) data 0.000 (0.001) loss 0.8586 (1.2037) lr 1.6374e-03 eta 1 day, 10:21:56
epoch [20/50] batch [1100/2000] time 2.002 (2.031) data 0.000 (0.001) loss 2.2452 (1.2032) lr 1.6374e-03 eta 1 day, 10:21:18
epoch [20/50] batch [1120/2000] time 2.031 (2.031) data 0.000 (0.001) loss 0.4573 (1.2023) lr 1.6374e-03 eta 1 day, 10:20:41
epoch [20/50] batch [1140/2000] time 2.000 (2.031) data 0.001 (0.001) loss 1.5829 (1.2034) lr 1.6374e-03 eta 1 day, 10:20:00
epoch [20/50] batch [1160/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.4297 (1.1983) lr 1.6374e-03 eta 1 day, 10:19:16
epoch [20/50] batch [1180/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.3094 (1.1982) lr 1.6374e-03 eta 1 day, 10:18:37
epoch [20/50] batch [1200/2000] time 2.057 (2.031) data 0.000 (0.001) loss 1.6615 (1.2031) lr 1.6374e-03 eta 1 day, 10:18:07
epoch [20/50] batch [1220/2000] time 2.058 (2.031) data 0.000 (0.001) loss 2.1436 (1.2052) lr 1.6374e-03 eta 1 day, 10:17:32
epoch [20/50] batch [1240/2000] time 2.035 (2.031) data 0.000 (0.001) loss 1.9091 (1.2082) lr 1.6374e-03 eta 1 day, 10:16:47
epoch [20/50] batch [1260/2000] time 2.060 (2.031) data 0.000 (0.001) loss 0.2913 (1.2084) lr 1.6374e-03 eta 1 day, 10:16:11
epoch [20/50] batch [1280/2000] time 2.003 (2.031) data 0.000 (0.001) loss 0.8696 (1.2088) lr 1.6374e-03 eta 1 day, 10:15:32
epoch [20/50] batch [1300/2000] time 2.058 (2.031) data 0.000 (0.001) loss 1.9647 (1.2138) lr 1.6374e-03 eta 1 day, 10:14:58
epoch [20/50] batch [1320/2000] time 2.001 (2.031) data 0.000 (0.001) loss 1.2467 (1.2151) lr 1.6374e-03 eta 1 day, 10:14:17
epoch [20/50] batch [1340/2000] time 2.055 (2.031) data 0.000 (0.001) loss 1.7980 (1.2199) lr 1.6374e-03 eta 1 day, 10:13:37
epoch [20/50] batch [1360/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.4050 (1.2201) lr 1.6374e-03 eta 1 day, 10:12:51
epoch [20/50] batch [1380/2000] time 1.976 (2.031) data 0.000 (0.001) loss 0.3204 (1.2178) lr 1.6374e-03 eta 1 day, 10:12:11
epoch [20/50] batch [1400/2000] time 2.000 (2.031) data 0.000 (0.001) loss 2.0204 (1.2182) lr 1.6374e-03 eta 1 day, 10:11:30
epoch [20/50] batch [1420/2000] time 2.035 (2.031) data 0.000 (0.001) loss 0.6913 (1.2139) lr 1.6374e-03 eta 1 day, 10:10:52
epoch [20/50] batch [1440/2000] time 2.004 (2.031) data 0.000 (0.001) loss 2.3887 (1.2129) lr 1.6374e-03 eta 1 day, 10:10:13
epoch [20/50] batch [1460/2000] time 2.054 (2.031) data 0.000 (0.001) loss 0.3484 (1.2121) lr 1.6374e-03 eta 1 day, 10:09:38
epoch [20/50] batch [1480/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.2725 (1.2079) lr 1.6374e-03 eta 1 day, 10:08:58
epoch [20/50] batch [1500/2000] time 2.032 (2.031) data 0.000 (0.001) loss 0.1062 (1.2095) lr 1.6374e-03 eta 1 day, 10:08:24
epoch [20/50] batch [1520/2000] time 2.031 (2.031) data 0.000 (0.001) loss 0.0265 (1.2125) lr 1.6374e-03 eta 1 day, 10:07:43
epoch [20/50] batch [1540/2000] time 2.037 (2.031) data 0.000 (0.001) loss 1.3230 (1.2107) lr 1.6374e-03 eta 1 day, 10:06:58
epoch [20/50] batch [1560/2000] time 2.000 (2.032) data 0.000 (0.001) loss 0.3342 (1.2103) lr 1.6374e-03 eta 1 day, 10:06:28
epoch [20/50] batch [1580/2000] time 2.032 (2.032) data 0.000 (0.001) loss 0.7775 (1.2146) lr 1.6374e-03 eta 1 day, 10:05:52
epoch [20/50] batch [1600/2000] time 2.031 (2.032) data 0.000 (0.001) loss 1.7531 (1.2190) lr 1.6374e-03 eta 1 day, 10:05:18
epoch [20/50] batch [1620/2000] time 2.056 (2.032) data 0.000 (0.001) loss 2.5981 (1.2200) lr 1.6374e-03 eta 1 day, 10:04:38
epoch [20/50] batch [1640/2000] time 2.029 (2.032) data 0.000 (0.001) loss 3.0495 (1.2229) lr 1.6374e-03 eta 1 day, 10:03:55
epoch [20/50] batch [1660/2000] time 2.053 (2.032) data 0.000 (0.001) loss 1.5662 (1.2237) lr 1.6374e-03 eta 1 day, 10:03:19
epoch [20/50] batch [1680/2000] time 2.031 (2.032) data 0.000 (0.001) loss 0.1556 (1.2228) lr 1.6374e-03 eta 1 day, 10:02:41
epoch [20/50] batch [1700/2000] time 2.058 (2.032) data 0.000 (0.001) loss 0.8409 (1.2187) lr 1.6374e-03 eta 1 day, 10:01:58
epoch [20/50] batch [1720/2000] time 2.053 (2.032) data 0.000 (0.001) loss 0.0871 (1.2186) lr 1.6374e-03 eta 1 day, 10:01:23
epoch [20/50] batch [1740/2000] time 2.054 (2.032) data 0.000 (0.001) loss 3.3581 (1.2181) lr 1.6374e-03 eta 1 day, 10:00:42
epoch [20/50] batch [1760/2000] time 1.999 (2.032) data 0.000 (0.001) loss 0.6533 (1.2196) lr 1.6374e-03 eta 1 day, 10:00:04
epoch [20/50] batch [1780/2000] time 2.005 (2.032) data 0.000 (0.001) loss 2.1316 (1.2186) lr 1.6374e-03 eta 1 day, 9:59:20
epoch [20/50] batch [1800/2000] time 2.001 (2.032) data 0.000 (0.001) loss 0.4189 (1.2232) lr 1.6374e-03 eta 1 day, 9:58:37
epoch [20/50] batch [1820/2000] time 2.056 (2.032) data 0.000 (0.001) loss 0.9227 (1.2230) lr 1.6374e-03 eta 1 day, 9:57:54
epoch [20/50] batch [1840/2000] time 2.032 (2.032) data 0.000 (0.001) loss 0.5783 (1.2233) lr 1.6374e-03 eta 1 day, 9:57:19
epoch [20/50] batch [1860/2000] time 2.032 (2.032) data 0.000 (0.001) loss 0.7563 (1.2248) lr 1.6374e-03 eta 1 day, 9:56:41
epoch [20/50] batch [1880/2000] time 2.058 (2.032) data 0.000 (0.001) loss 1.2604 (1.2215) lr 1.6374e-03 eta 1 day, 9:55:57
epoch [20/50] batch [1900/2000] time 2.029 (2.032) data 0.000 (0.001) loss 0.4047 (1.2211) lr 1.6374e-03 eta 1 day, 9:55:17
epoch [20/50] batch [1920/2000] time 2.055 (2.032) data 0.000 (0.001) loss 0.0989 (1.2234) lr 1.6374e-03 eta 1 day, 9:54:37
epoch [20/50] batch [1940/2000] time 2.055 (2.032) data 0.000 (0.001) loss 0.9398 (1.2233) lr 1.6374e-03 eta 1 day, 9:53:57
epoch [20/50] batch [1960/2000] time 2.001 (2.032) data 0.000 (0.001) loss 0.2743 (1.2223) lr 1.6374e-03 eta 1 day, 9:53:19
epoch [20/50] batch [1980/2000] time 2.058 (2.032) data 0.000 (0.001) loss 1.6163 (1.2251) lr 1.6374e-03 eta 1 day, 9:52:42
epoch [20/50] batch [2000/2000] time 2.029 (2.032) data 0.000 (0.001) loss 0.8698 (1.2276) lr 1.5878e-03 eta 1 day, 9:52:01
epoch [21/50] batch [20/2000] time 2.001 (2.059) data 0.000 (0.034) loss 1.5582 (1.5758) lr 1.5878e-03 eta 1 day, 10:17:58
epoch [21/50] batch [40/2000] time 2.053 (2.047) data 0.000 (0.017) loss 0.7704 (1.4216) lr 1.5878e-03 eta 1 day, 10:05:26
epoch [21/50] batch [60/2000] time 1.999 (2.040) data 0.001 (0.011) loss 1.5163 (1.3423) lr 1.5878e-03 eta 1 day, 9:57:33
epoch [21/50] batch [80/2000] time 2.052 (2.036) data 0.000 (0.009) loss 3.5772 (1.3197) lr 1.5878e-03 eta 1 day, 9:53:37
epoch [21/50] batch [100/2000] time 2.053 (2.035) data 0.000 (0.007) loss 0.4568 (1.2826) lr 1.5878e-03 eta 1 day, 9:52:00
epoch [21/50] batch [120/2000] time 2.056 (2.035) data 0.000 (0.006) loss 1.1141 (1.3136) lr 1.5878e-03 eta 1 day, 9:50:41
epoch [21/50] batch [140/2000] time 2.056 (2.034) data 0.000 (0.005) loss 0.2050 (1.3527) lr 1.5878e-03 eta 1 day, 9:49:06
epoch [21/50] batch [160/2000] time 2.030 (2.033) data 0.000 (0.004) loss 0.2752 (1.3175) lr 1.5878e-03 eta 1 day, 9:47:08
epoch [21/50] batch [180/2000] time 1.998 (2.033) data 0.000 (0.004) loss 0.8915 (1.2593) lr 1.5878e-03 eta 1 day, 9:46:30
epoch [21/50] batch [200/2000] time 2.031 (2.033) data 0.000 (0.004) loss 1.0042 (1.2603) lr 1.5878e-03 eta 1 day, 9:46:09
epoch [21/50] batch [220/2000] time 2.055 (2.033) data 0.000 (0.003) loss 3.3936 (1.2781) lr 1.5878e-03 eta 1 day, 9:45:10
epoch [21/50] batch [240/2000] time 1.978 (2.032) data 0.000 (0.003) loss 3.0912 (1.2754) lr 1.5878e-03 eta 1 day, 9:44:18
epoch [21/50] batch [260/2000] time 2.055 (2.032) data 0.000 (0.003) loss 0.4634 (1.2624) lr 1.5878e-03 eta 1 day, 9:43:35
epoch [21/50] batch [280/2000] time 2.053 (2.032) data 0.000 (0.003) loss 1.1929 (1.2492) lr 1.5878e-03 eta 1 day, 9:42:59
epoch [21/50] batch [300/2000] time 2.028 (2.032) data 0.000 (0.002) loss 0.6616 (1.2373) lr 1.5878e-03 eta 1 day, 9:42:10
epoch [21/50] batch [320/2000] time 2.055 (2.032) data 0.000 (0.002) loss 1.6865 (1.2342) lr 1.5878e-03 eta 1 day, 9:40:58
epoch [21/50] batch [340/2000] time 1.973 (2.032) data 0.000 (0.002) loss 3.2027 (1.2340) lr 1.5878e-03 eta 1 day, 9:40:21
epoch [21/50] batch [360/2000] time 2.053 (2.032) data 0.000 (0.002) loss 0.2294 (1.2332) lr 1.5878e-03 eta 1 day, 9:39:31
epoch [21/50] batch [380/2000] time 2.031 (2.032) data 0.000 (0.002) loss 0.8610 (1.2562) lr 1.5878e-03 eta 1 day, 9:38:56
epoch [21/50] batch [400/2000] time 2.052 (2.032) data 0.000 (0.002) loss 2.0925 (1.2646) lr 1.5878e-03 eta 1 day, 9:38:15
epoch [21/50] batch [420/2000] time 1.999 (2.032) data 0.000 (0.002) loss 1.8079 (1.2506) lr 1.5878e-03 eta 1 day, 9:37:28
epoch [21/50] batch [440/2000] time 2.052 (2.032) data 0.000 (0.002) loss 2.3577 (1.2580) lr 1.5878e-03 eta 1 day, 9:37:07
epoch [21/50] batch [460/2000] time 2.001 (2.032) data 0.000 (0.002) loss 0.9927 (1.2523) lr 1.5878e-03 eta 1 day, 9:36:25
epoch [21/50] batch [480/2000] time 2.052 (2.032) data 0.000 (0.002) loss 0.3493 (1.2428) lr 1.5878e-03 eta 1 day, 9:35:37
epoch [21/50] batch [500/2000] time 1.997 (2.032) data 0.000 (0.002) loss 0.1980 (1.2313) lr 1.5878e-03 eta 1 day, 9:34:56
epoch [21/50] batch [520/2000] time 2.053 (2.032) data 0.000 (0.002) loss 0.8626 (1.2208) lr 1.5878e-03 eta 1 day, 9:34:14
epoch [21/50] batch [540/2000] time 1.999 (2.032) data 0.000 (0.001) loss 0.4293 (1.2243) lr 1.5878e-03 eta 1 day, 9:33:13
epoch [21/50] batch [560/2000] time 2.050 (2.031) data 0.000 (0.001) loss 2.4375 (1.2199) lr 1.5878e-03 eta 1 day, 9:32:27
epoch [21/50] batch [580/2000] time 2.031 (2.031) data 0.000 (0.001) loss 1.2719 (1.2167) lr 1.5878e-03 eta 1 day, 9:31:36
epoch [21/50] batch [600/2000] time 2.046 (2.031) data 0.001 (0.001) loss 2.4286 (1.2258) lr 1.5878e-03 eta 1 day, 9:30:54
epoch [21/50] batch [620/2000] time 1.994 (2.031) data 0.000 (0.001) loss 1.1870 (1.2243) lr 1.5878e-03 eta 1 day, 9:29:42
epoch [21/50] batch [640/2000] time 2.046 (2.031) data 0.000 (0.001) loss 0.0907 (1.2119) lr 1.5878e-03 eta 1 day, 9:28:55
epoch [21/50] batch [660/2000] time 2.028 (2.030) data 0.000 (0.001) loss 1.3579 (1.2076) lr 1.5878e-03 eta 1 day, 9:27:46
epoch [21/50] batch [680/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.9832 (1.2079) lr 1.5878e-03 eta 1 day, 9:27:09
epoch [21/50] batch [700/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.5730 (1.2143) lr 1.5878e-03 eta 1 day, 9:26:15
epoch [21/50] batch [720/2000] time 2.048 (2.030) data 0.000 (0.001) loss 1.2222 (1.2181) lr 1.5878e-03 eta 1 day, 9:25:28
epoch [21/50] batch [740/2000] time 2.047 (2.030) data 0.000 (0.001) loss 0.3760 (1.2279) lr 1.5878e-03 eta 1 day, 9:24:40
epoch [21/50] batch [760/2000] time 2.048 (2.030) data 0.000 (0.001) loss 0.6294 (1.2313) lr 1.5878e-03 eta 1 day, 9:23:52
epoch [21/50] batch [780/2000] time 1.994 (2.029) data 0.000 (0.001) loss 0.1499 (1.2281) lr 1.5878e-03 eta 1 day, 9:23:04
epoch [21/50] batch [800/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.6729 (1.2336) lr 1.5878e-03 eta 1 day, 9:22:14
epoch [21/50] batch [820/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.5698 (1.2337) lr 1.5878e-03 eta 1 day, 9:21:32
epoch [21/50] batch [840/2000] time 2.048 (2.029) data 0.000 (0.001) loss 3.3719 (1.2376) lr 1.5878e-03 eta 1 day, 9:20:53
epoch [21/50] batch [860/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.4609 (1.2400) lr 1.5878e-03 eta 1 day, 9:20:08
epoch [21/50] batch [880/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.3118 (1.2428) lr 1.5878e-03 eta 1 day, 9:19:16
epoch [21/50] batch [900/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.3306 (1.2424) lr 1.5878e-03 eta 1 day, 9:18:30
epoch [21/50] batch [920/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.7317 (1.2408) lr 1.5878e-03 eta 1 day, 9:17:50
epoch [21/50] batch [940/2000] time 2.055 (2.029) data 0.000 (0.001) loss 2.1092 (1.2396) lr 1.5878e-03 eta 1 day, 9:17:12
epoch [21/50] batch [960/2000] time 1.994 (2.029) data 0.000 (0.001) loss 0.0670 (1.2384) lr 1.5878e-03 eta 1 day, 9:16:35
epoch [21/50] batch [980/2000] time 1.994 (2.029) data 0.000 (0.001) loss 0.3324 (1.2356) lr 1.5878e-03 eta 1 day, 9:15:51
epoch [21/50] batch [1000/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.3561 (1.2322) lr 1.5878e-03 eta 1 day, 9:15:15
epoch [21/50] batch [1020/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.4526 (1.2223) lr 1.5878e-03 eta 1 day, 9:14:37
epoch [21/50] batch [1040/2000] time 2.047 (2.029) data 0.000 (0.001) loss 0.5265 (1.2261) lr 1.5878e-03 eta 1 day, 9:13:55
epoch [21/50] batch [1060/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.4180 (1.2206) lr 1.5878e-03 eta 1 day, 9:13:15
epoch [21/50] batch [1080/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.3531 (1.2183) lr 1.5878e-03 eta 1 day, 9:12:33
epoch [21/50] batch [1100/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.8161 (1.2152) lr 1.5878e-03 eta 1 day, 9:12:00
epoch [21/50] batch [1120/2000] time 2.024 (2.029) data 0.000 (0.001) loss 2.7578 (1.2137) lr 1.5878e-03 eta 1 day, 9:11:24
epoch [21/50] batch [1140/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.2970 (1.2125) lr 1.5878e-03 eta 1 day, 9:10:48
epoch [21/50] batch [1160/2000] time 1.994 (2.029) data 0.000 (0.001) loss 0.8288 (1.2199) lr 1.5878e-03 eta 1 day, 9:09:57
epoch [21/50] batch [1180/2000] time 1.996 (2.029) data 0.000 (0.001) loss 2.2358 (1.2186) lr 1.5878e-03 eta 1 day, 9:09:08
epoch [21/50] batch [1200/2000] time 1.993 (2.029) data 0.000 (0.001) loss 0.2979 (1.2206) lr 1.5878e-03 eta 1 day, 9:08:31
epoch [21/50] batch [1220/2000] time 1.971 (2.029) data 0.000 (0.001) loss 0.3826 (1.2158) lr 1.5878e-03 eta 1 day, 9:07:45
epoch [21/50] batch [1240/2000] time 2.050 (2.029) data 0.000 (0.001) loss 3.9560 (1.2171) lr 1.5878e-03 eta 1 day, 9:07:05
epoch [21/50] batch [1260/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.1634 (1.2115) lr 1.5878e-03 eta 1 day, 9:06:23
epoch [21/50] batch [1280/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.2598 (1.2123) lr 1.5878e-03 eta 1 day, 9:05:38
epoch [21/50] batch [1300/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.4153 (1.2094) lr 1.5878e-03 eta 1 day, 9:04:50
epoch [21/50] batch [1320/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.5038 (1.2065) lr 1.5878e-03 eta 1 day, 9:04:10
epoch [21/50] batch [1340/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.0508 (1.2058) lr 1.5878e-03 eta 1 day, 9:03:31
epoch [21/50] batch [1360/2000] time 2.047 (2.029) data 0.000 (0.001) loss 1.0165 (1.2053) lr 1.5878e-03 eta 1 day, 9:02:48
epoch [21/50] batch [1380/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.4328 (1.2061) lr 1.5878e-03 eta 1 day, 9:02:11
epoch [21/50] batch [1400/2000] time 2.024 (2.029) data 0.000 (0.001) loss 1.3357 (1.2079) lr 1.5878e-03 eta 1 day, 9:01:24
epoch [21/50] batch [1420/2000] time 1.994 (2.029) data 0.000 (0.001) loss 2.9624 (1.2108) lr 1.5878e-03 eta 1 day, 9:00:37
epoch [21/50] batch [1440/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.2136 (1.2112) lr 1.5878e-03 eta 1 day, 8:59:53
epoch [21/50] batch [1460/2000] time 2.047 (2.029) data 0.000 (0.001) loss 0.1122 (1.2086) lr 1.5878e-03 eta 1 day, 8:59:17
epoch [21/50] batch [1480/2000] time 1.992 (2.029) data 0.000 (0.001) loss 1.1012 (1.2133) lr 1.5878e-03 eta 1 day, 8:58:28
epoch [21/50] batch [1500/2000] time 2.025 (2.029) data 0.000 (0.001) loss 0.7989 (1.2087) lr 1.5878e-03 eta 1 day, 8:57:48
epoch [21/50] batch [1520/2000] time 2.026 (2.028) data 0.000 (0.001) loss 3.1165 (1.2092) lr 1.5878e-03 eta 1 day, 8:57:01
epoch [21/50] batch [1540/2000] time 1.973 (2.028) data 0.000 (0.001) loss 1.2515 (1.2090) lr 1.5878e-03 eta 1 day, 8:56:23
epoch [21/50] batch [1560/2000] time 2.025 (2.028) data 0.000 (0.001) loss 1.1553 (1.2049) lr 1.5878e-03 eta 1 day, 8:55:42
epoch [21/50] batch [1580/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.6038 (1.2040) lr 1.5878e-03 eta 1 day, 8:55:01
epoch [21/50] batch [1600/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.8042 (1.2006) lr 1.5878e-03 eta 1 day, 8:54:23
epoch [21/50] batch [1620/2000] time 2.056 (2.028) data 0.000 (0.001) loss 0.3353 (1.2020) lr 1.5878e-03 eta 1 day, 8:53:35
epoch [21/50] batch [1640/2000] time 1.994 (2.028) data 0.000 (0.001) loss 0.1708 (1.2011) lr 1.5878e-03 eta 1 day, 8:52:52
epoch [21/50] batch [1660/2000] time 2.049 (2.028) data 0.000 (0.001) loss 3.8897 (1.2080) lr 1.5878e-03 eta 1 day, 8:52:05
epoch [21/50] batch [1680/2000] time 2.028 (2.028) data 0.001 (0.001) loss 0.0465 (1.2102) lr 1.5878e-03 eta 1 day, 8:51:25
epoch [21/50] batch [1700/2000] time 2.028 (2.028) data 0.000 (0.001) loss 1.3262 (1.2089) lr 1.5878e-03 eta 1 day, 8:50:48
epoch [21/50] batch [1720/2000] time 1.995 (2.028) data 0.000 (0.001) loss 0.6069 (1.2090) lr 1.5878e-03 eta 1 day, 8:50:05
epoch [21/50] batch [1740/2000] time 1.998 (2.028) data 0.000 (0.001) loss 3.2478 (1.2133) lr 1.5878e-03 eta 1 day, 8:49:22
epoch [21/50] batch [1760/2000] time 2.051 (2.028) data 0.000 (0.001) loss 3.1242 (1.2133) lr 1.5878e-03 eta 1 day, 8:48:44
epoch [21/50] batch [1780/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.1310 (1.2118) lr 1.5878e-03 eta 1 day, 8:48:03
epoch [21/50] batch [1800/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.0914 (1.2114) lr 1.5878e-03 eta 1 day, 8:47:16
epoch [21/50] batch [1820/2000] time 2.027 (2.028) data 0.000 (0.001) loss 1.8796 (1.2120) lr 1.5878e-03 eta 1 day, 8:46:38
epoch [21/50] batch [1840/2000] time 2.046 (2.028) data 0.000 (0.001) loss 0.2557 (1.2080) lr 1.5878e-03 eta 1 day, 8:45:57
epoch [21/50] batch [1860/2000] time 2.027 (2.028) data 0.000 (0.001) loss 1.3695 (1.2109) lr 1.5878e-03 eta 1 day, 8:45:16
epoch [21/50] batch [1880/2000] time 2.031 (2.028) data 0.000 (0.001) loss 3.8688 (1.2111) lr 1.5878e-03 eta 1 day, 8:44:37
epoch [21/50] batch [1900/2000] time 1.977 (2.028) data 0.000 (0.001) loss 1.3065 (1.2112) lr 1.5878e-03 eta 1 day, 8:44:01
epoch [21/50] batch [1920/2000] time 2.055 (2.028) data 0.000 (0.001) loss 0.7196 (1.2121) lr 1.5878e-03 eta 1 day, 8:43:27
epoch [21/50] batch [1940/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.0447 (1.2141) lr 1.5878e-03 eta 1 day, 8:42:47
epoch [21/50] batch [1960/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.6687 (1.2120) lr 1.5878e-03 eta 1 day, 8:42:01
epoch [21/50] batch [1980/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.0305 (1.2099) lr 1.5878e-03 eta 1 day, 8:41:20
epoch [21/50] batch [2000/2000] time 1.993 (2.028) data 0.000 (0.001) loss 0.8154 (1.2101) lr 1.5358e-03 eta 1 day, 8:40:35
epoch [22/50] batch [20/2000] time 2.052 (2.053) data 0.000 (0.027) loss 0.7541 (0.6902) lr 1.5358e-03 eta 1 day, 9:04:00
epoch [22/50] batch [40/2000] time 2.027 (2.037) data 0.000 (0.014) loss 1.0364 (0.7952) lr 1.5358e-03 eta 1 day, 8:47:38
epoch [22/50] batch [60/2000] time 2.049 (2.036) data 0.001 (0.009) loss 0.6506 (0.9979) lr 1.5358e-03 eta 1 day, 8:45:42
epoch [22/50] batch [80/2000] time 2.003 (2.032) data 0.000 (0.007) loss 1.0197 (1.0288) lr 1.5358e-03 eta 1 day, 8:41:44
epoch [22/50] batch [100/2000] time 2.059 (2.033) data 0.000 (0.006) loss 2.1201 (1.0207) lr 1.5358e-03 eta 1 day, 8:41:33
epoch [22/50] batch [120/2000] time 2.001 (2.032) data 0.000 (0.005) loss 0.9536 (1.0360) lr 1.5358e-03 eta 1 day, 8:40:38
epoch [22/50] batch [140/2000] time 2.003 (2.032) data 0.000 (0.004) loss 0.6944 (1.0736) lr 1.5358e-03 eta 1 day, 8:39:36
epoch [22/50] batch [160/2000] time 2.031 (2.032) data 0.000 (0.004) loss 1.1680 (1.1001) lr 1.5358e-03 eta 1 day, 8:38:40
epoch [22/50] batch [180/2000] time 2.047 (2.031) data 0.000 (0.003) loss 1.6470 (1.0851) lr 1.5358e-03 eta 1 day, 8:37:36
epoch [22/50] batch [200/2000] time 2.025 (2.031) data 0.000 (0.003) loss 0.7180 (1.1128) lr 1.5358e-03 eta 1 day, 8:36:25
epoch [22/50] batch [220/2000] time 2.049 (2.030) data 0.000 (0.003) loss 1.0446 (1.1311) lr 1.5358e-03 eta 1 day, 8:35:16
epoch [22/50] batch [240/2000] time 2.047 (2.030) data 0.000 (0.002) loss 0.4070 (1.1327) lr 1.5358e-03 eta 1 day, 8:34:22
epoch [22/50] batch [260/2000] time 1.998 (2.030) data 0.000 (0.002) loss 0.2553 (1.1296) lr 1.5358e-03 eta 1 day, 8:33:31
epoch [22/50] batch [280/2000] time 2.050 (2.030) data 0.000 (0.002) loss 2.3471 (1.1462) lr 1.5358e-03 eta 1 day, 8:32:40
epoch [22/50] batch [300/2000] time 2.047 (2.030) data 0.000 (0.002) loss 1.6241 (1.1267) lr 1.5358e-03 eta 1 day, 8:31:50
epoch [22/50] batch [320/2000] time 2.049 (2.030) data 0.000 (0.002) loss 2.2282 (1.1634) lr 1.5358e-03 eta 1 day, 8:31:26
epoch [22/50] batch [340/2000] time 2.024 (2.030) data 0.000 (0.002) loss 0.2473 (1.1595) lr 1.5358e-03 eta 1 day, 8:30:49
epoch [22/50] batch [360/2000] time 2.052 (2.030) data 0.000 (0.002) loss 0.9099 (1.1572) lr 1.5358e-03 eta 1 day, 8:30:12
epoch [22/50] batch [380/2000] time 1.995 (2.030) data 0.000 (0.002) loss 0.0354 (1.1535) lr 1.5358e-03 eta 1 day, 8:29:18
epoch [22/50] batch [400/2000] time 2.047 (2.030) data 0.000 (0.002) loss 1.1063 (1.1609) lr 1.5358e-03 eta 1 day, 8:28:19
epoch [22/50] batch [420/2000] time 2.047 (2.029) data 0.000 (0.001) loss 1.2287 (1.1617) lr 1.5358e-03 eta 1 day, 8:27:17
epoch [22/50] batch [440/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.6027 (1.1447) lr 1.5358e-03 eta 1 day, 8:26:27
epoch [22/50] batch [460/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.7264 (1.1358) lr 1.5358e-03 eta 1 day, 8:25:57
epoch [22/50] batch [480/2000] time 2.033 (2.029) data 0.000 (0.001) loss 2.4460 (1.1378) lr 1.5358e-03 eta 1 day, 8:25:14
epoch [22/50] batch [500/2000] time 2.000 (2.029) data 0.000 (0.001) loss 1.7041 (1.1375) lr 1.5358e-03 eta 1 day, 8:24:41
epoch [22/50] batch [520/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.0108 (1.1399) lr 1.5358e-03 eta 1 day, 8:24:00
epoch [22/50] batch [540/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.2647 (1.1443) lr 1.5358e-03 eta 1 day, 8:23:30
epoch [22/50] batch [560/2000] time 1.994 (2.029) data 0.000 (0.001) loss 0.3888 (1.1464) lr 1.5358e-03 eta 1 day, 8:22:43
epoch [22/50] batch [580/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.2523 (1.1386) lr 1.5358e-03 eta 1 day, 8:22:21
epoch [22/50] batch [600/2000] time 1.996 (2.030) data 0.001 (0.001) loss 0.2995 (1.1304) lr 1.5358e-03 eta 1 day, 8:21:34
epoch [22/50] batch [620/2000] time 2.033 (2.030) data 0.000 (0.001) loss 2.5185 (1.1349) lr 1.5358e-03 eta 1 day, 8:21:05
epoch [22/50] batch [640/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.9333 (1.1444) lr 1.5358e-03 eta 1 day, 8:20:39
epoch [22/50] batch [660/2000] time 2.056 (2.030) data 0.000 (0.001) loss 0.2824 (1.1611) lr 1.5358e-03 eta 1 day, 8:20:11
epoch [22/50] batch [680/2000] time 2.057 (2.030) data 0.000 (0.001) loss 0.2297 (1.1557) lr 1.5358e-03 eta 1 day, 8:19:37
epoch [22/50] batch [700/2000] time 1.996 (2.031) data 0.000 (0.001) loss 0.4556 (1.1553) lr 1.5358e-03 eta 1 day, 8:19:08
epoch [22/50] batch [720/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.3991 (1.1512) lr 1.5358e-03 eta 1 day, 8:18:22
epoch [22/50] batch [740/2000] time 1.998 (2.030) data 0.000 (0.001) loss 3.0300 (1.1537) lr 1.5358e-03 eta 1 day, 8:17:30
epoch [22/50] batch [760/2000] time 2.056 (2.030) data 0.000 (0.001) loss 0.3292 (1.1569) lr 1.5358e-03 eta 1 day, 8:16:57
epoch [22/50] batch [780/2000] time 2.056 (2.030) data 0.000 (0.001) loss 1.1113 (1.1571) lr 1.5358e-03 eta 1 day, 8:16:15
epoch [22/50] batch [800/2000] time 1.972 (2.030) data 0.000 (0.001) loss 1.2626 (1.1588) lr 1.5358e-03 eta 1 day, 8:15:44
epoch [22/50] batch [820/2000] time 1.995 (2.031) data 0.000 (0.001) loss 1.1984 (1.1674) lr 1.5358e-03 eta 1 day, 8:15:06
epoch [22/50] batch [840/2000] time 2.054 (2.031) data 0.000 (0.001) loss 2.0267 (1.1749) lr 1.5358e-03 eta 1 day, 8:14:28
epoch [22/50] batch [860/2000] time 1.995 (2.030) data 0.000 (0.001) loss 1.4193 (1.1770) lr 1.5358e-03 eta 1 day, 8:13:37
epoch [22/50] batch [880/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.8822 (1.1705) lr 1.5358e-03 eta 1 day, 8:13:01
epoch [22/50] batch [900/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.4012 (1.1763) lr 1.5358e-03 eta 1 day, 8:12:10
epoch [22/50] batch [920/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.2083 (1.1787) lr 1.5358e-03 eta 1 day, 8:11:33
epoch [22/50] batch [940/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.7394 (1.1830) lr 1.5358e-03 eta 1 day, 8:10:54
epoch [22/50] batch [960/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.3155 (1.1861) lr 1.5358e-03 eta 1 day, 8:10:11
epoch [22/50] batch [980/2000] time 2.059 (2.031) data 0.000 (0.001) loss 0.4101 (1.1882) lr 1.5358e-03 eta 1 day, 8:09:42
epoch [22/50] batch [1000/2000] time 2.054 (2.031) data 0.000 (0.001) loss 1.5130 (1.1874) lr 1.5358e-03 eta 1 day, 8:09:06
epoch [22/50] batch [1020/2000] time 1.997 (2.031) data 0.000 (0.001) loss 0.4969 (1.1901) lr 1.5358e-03 eta 1 day, 8:08:25
epoch [22/50] batch [1040/2000] time 2.000 (2.031) data 0.000 (0.001) loss 0.5907 (1.1978) lr 1.5358e-03 eta 1 day, 8:07:46
epoch [22/50] batch [1060/2000] time 2.054 (2.031) data 0.000 (0.001) loss 0.6842 (1.1992) lr 1.5358e-03 eta 1 day, 8:07:09
epoch [22/50] batch [1080/2000] time 2.058 (2.031) data 0.000 (0.001) loss 1.0248 (1.2011) lr 1.5358e-03 eta 1 day, 8:06:33
epoch [22/50] batch [1100/2000] time 2.001 (2.031) data 0.000 (0.001) loss 2.8497 (1.2070) lr 1.5358e-03 eta 1 day, 8:05:48
epoch [22/50] batch [1120/2000] time 2.000 (2.031) data 0.000 (0.001) loss 2.2017 (1.2129) lr 1.5358e-03 eta 1 day, 8:05:11
epoch [22/50] batch [1140/2000] time 2.030 (2.031) data 0.001 (0.001) loss 0.5839 (1.2143) lr 1.5358e-03 eta 1 day, 8:04:25
epoch [22/50] batch [1160/2000] time 2.054 (2.031) data 0.000 (0.001) loss 1.2728 (1.2126) lr 1.5358e-03 eta 1 day, 8:03:43
epoch [22/50] batch [1180/2000] time 2.055 (2.031) data 0.000 (0.001) loss 0.3834 (1.2088) lr 1.5358e-03 eta 1 day, 8:03:02
epoch [22/50] batch [1200/2000] time 2.050 (2.031) data 0.000 (0.001) loss 1.2833 (1.2121) lr 1.5358e-03 eta 1 day, 8:02:25
epoch [22/50] batch [1220/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.3931 (1.2090) lr 1.5358e-03 eta 1 day, 8:01:41
epoch [22/50] batch [1240/2000] time 1.995 (2.031) data 0.000 (0.001) loss 0.6283 (1.2050) lr 1.5358e-03 eta 1 day, 8:01:03
epoch [22/50] batch [1260/2000] time 1.972 (2.031) data 0.000 (0.001) loss 0.7062 (1.2037) lr 1.5358e-03 eta 1 day, 8:00:24
epoch [22/50] batch [1280/2000] time 2.055 (2.031) data 0.000 (0.001) loss 0.8961 (1.1998) lr 1.5358e-03 eta 1 day, 7:59:47
epoch [22/50] batch [1300/2000] time 1.994 (2.031) data 0.000 (0.001) loss 0.2368 (1.2008) lr 1.5358e-03 eta 1 day, 7:59:01
epoch [22/50] batch [1320/2000] time 1.993 (2.031) data 0.000 (0.001) loss 1.0888 (1.1968) lr 1.5358e-03 eta 1 day, 7:58:18
epoch [22/50] batch [1340/2000] time 1.998 (2.031) data 0.000 (0.001) loss 3.6869 (1.2017) lr 1.5358e-03 eta 1 day, 7:57:37
epoch [22/50] batch [1360/2000] time 1.996 (2.031) data 0.000 (0.001) loss 0.7434 (1.1963) lr 1.5358e-03 eta 1 day, 7:56:51
epoch [22/50] batch [1380/2000] time 2.049 (2.031) data 0.000 (0.001) loss 0.1585 (1.1966) lr 1.5358e-03 eta 1 day, 7:56:12
epoch [22/50] batch [1400/2000] time 2.049 (2.031) data 0.000 (0.001) loss 0.9501 (1.1960) lr 1.5358e-03 eta 1 day, 7:55:28
epoch [22/50] batch [1420/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.2654 (1.1970) lr 1.5358e-03 eta 1 day, 7:54:47
epoch [22/50] batch [1440/2000] time 2.048 (2.030) data 0.000 (0.001) loss 1.6223 (1.2003) lr 1.5358e-03 eta 1 day, 7:54:03
epoch [22/50] batch [1460/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.1309 (1.2007) lr 1.5358e-03 eta 1 day, 7:53:18
epoch [22/50] batch [1480/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.3135 (1.2008) lr 1.5358e-03 eta 1 day, 7:52:36
epoch [22/50] batch [1500/2000] time 2.023 (2.030) data 0.000 (0.001) loss 1.2400 (1.2030) lr 1.5358e-03 eta 1 day, 7:51:56
epoch [22/50] batch [1520/2000] time 1.993 (2.030) data 0.000 (0.001) loss 0.7556 (1.2090) lr 1.5358e-03 eta 1 day, 7:51:14
epoch [22/50] batch [1540/2000] time 2.047 (2.030) data 0.000 (0.001) loss 1.5198 (1.2067) lr 1.5358e-03 eta 1 day, 7:50:30
epoch [22/50] batch [1560/2000] time 2.048 (2.030) data 0.000 (0.001) loss 0.9087 (1.2058) lr 1.5358e-03 eta 1 day, 7:49:47
epoch [22/50] batch [1580/2000] time 1.993 (2.030) data 0.000 (0.001) loss 0.4314 (1.2020) lr 1.5358e-03 eta 1 day, 7:49:03
epoch [22/50] batch [1600/2000] time 2.026 (2.030) data 0.000 (0.001) loss 1.0913 (1.2030) lr 1.5358e-03 eta 1 day, 7:48:17
epoch [22/50] batch [1620/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.6560 (1.1994) lr 1.5358e-03 eta 1 day, 7:47:33
epoch [22/50] batch [1640/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.8921 (1.1980) lr 1.5358e-03 eta 1 day, 7:46:47
epoch [22/50] batch [1660/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.0537 (1.1965) lr 1.5358e-03 eta 1 day, 7:46:10
epoch [22/50] batch [1680/2000] time 1.995 (2.030) data 0.001 (0.001) loss 1.5431 (1.2011) lr 1.5358e-03 eta 1 day, 7:45:27
epoch [22/50] batch [1700/2000] time 1.973 (2.030) data 0.000 (0.001) loss 0.2185 (1.1989) lr 1.5358e-03 eta 1 day, 7:44:41
epoch [22/50] batch [1720/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.6378 (1.1993) lr 1.5358e-03 eta 1 day, 7:44:02
epoch [22/50] batch [1740/2000] time 2.001 (2.030) data 0.000 (0.001) loss 2.0539 (1.2018) lr 1.5358e-03 eta 1 day, 7:43:21
epoch [22/50] batch [1760/2000] time 2.057 (2.030) data 0.000 (0.001) loss 1.1212 (1.2002) lr 1.5358e-03 eta 1 day, 7:42:41
epoch [22/50] batch [1780/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.5905 (1.2000) lr 1.5358e-03 eta 1 day, 7:42:05
epoch [22/50] batch [1800/2000] time 2.046 (2.030) data 0.000 (0.001) loss 0.1379 (1.2000) lr 1.5358e-03 eta 1 day, 7:41:27
epoch [22/50] batch [1820/2000] time 1.995 (2.030) data 0.000 (0.001) loss 0.2729 (1.2041) lr 1.5358e-03 eta 1 day, 7:40:39
epoch [22/50] batch [1840/2000] time 2.027 (2.030) data 0.000 (0.000) loss 1.3865 (1.2046) lr 1.5358e-03 eta 1 day, 7:39:53
epoch [22/50] batch [1860/2000] time 2.053 (2.030) data 0.000 (0.000) loss 1.2332 (1.2051) lr 1.5358e-03 eta 1 day, 7:39:11
epoch [22/50] batch [1880/2000] time 2.048 (2.030) data 0.000 (0.000) loss 3.4357 (1.2067) lr 1.5358e-03 eta 1 day, 7:38:28
epoch [22/50] batch [1900/2000] time 1.973 (2.030) data 0.000 (0.000) loss 1.2050 (1.2087) lr 1.5358e-03 eta 1 day, 7:37:45
epoch [22/50] batch [1920/2000] time 2.050 (2.030) data 0.000 (0.000) loss 1.9956 (1.2097) lr 1.5358e-03 eta 1 day, 7:37:03
epoch [22/50] batch [1940/2000] time 1.997 (2.030) data 0.000 (0.000) loss 0.2396 (1.2066) lr 1.5358e-03 eta 1 day, 7:36:21
epoch [22/50] batch [1960/2000] time 1.998 (2.030) data 0.000 (0.000) loss 1.4589 (1.2102) lr 1.5358e-03 eta 1 day, 7:35:38
epoch [22/50] batch [1980/2000] time 2.050 (2.030) data 0.000 (0.000) loss 0.9208 (1.2090) lr 1.5358e-03 eta 1 day, 7:35:00
epoch [22/50] batch [2000/2000] time 2.047 (2.030) data 0.000 (0.000) loss 0.0330 (1.2045) lr 1.4818e-03 eta 1 day, 7:34:22
epoch [23/50] batch [20/2000] time 2.049 (2.058) data 0.000 (0.027) loss 1.6667 (1.1292) lr 1.4818e-03 eta 1 day, 7:59:46
epoch [23/50] batch [40/2000] time 2.026 (2.044) data 0.000 (0.014) loss 1.8495 (1.2079) lr 1.4818e-03 eta 1 day, 7:46:09
epoch [23/50] batch [60/2000] time 1.997 (2.036) data 0.001 (0.009) loss 2.5266 (1.2550) lr 1.4818e-03 eta 1 day, 7:38:27
epoch [23/50] batch [80/2000] time 1.997 (2.033) data 0.000 (0.007) loss 0.7245 (1.2219) lr 1.4818e-03 eta 1 day, 7:34:34
epoch [23/50] batch [100/2000] time 1.994 (2.031) data 0.000 (0.006) loss 1.2302 (1.2232) lr 1.4818e-03 eta 1 day, 7:32:11
epoch [23/50] batch [120/2000] time 2.026 (2.031) data 0.000 (0.005) loss 0.2481 (1.1621) lr 1.4818e-03 eta 1 day, 7:31:19
epoch [23/50] batch [140/2000] time 2.049 (2.029) data 0.000 (0.004) loss 0.9065 (1.1658) lr 1.4818e-03 eta 1 day, 7:28:58
epoch [23/50] batch [160/2000] time 2.051 (2.029) data 0.000 (0.004) loss 0.7775 (1.1979) lr 1.4818e-03 eta 1 day, 7:28:44
epoch [23/50] batch [180/2000] time 2.029 (2.030) data 0.000 (0.003) loss 2.7697 (1.2202) lr 1.4818e-03 eta 1 day, 7:28:26
epoch [23/50] batch [200/2000] time 1.996 (2.030) data 0.000 (0.003) loss 2.4401 (1.2062) lr 1.4818e-03 eta 1 day, 7:27:36
epoch [23/50] batch [220/2000] time 1.999 (2.030) data 0.000 (0.003) loss 0.7708 (1.1929) lr 1.4818e-03 eta 1 day, 7:27:09
epoch [23/50] batch [240/2000] time 1.995 (2.030) data 0.000 (0.002) loss 0.7079 (1.1919) lr 1.4818e-03 eta 1 day, 7:26:44
epoch [23/50] batch [260/2000] time 2.031 (2.031) data 0.000 (0.002) loss 0.1231 (1.1787) lr 1.4818e-03 eta 1 day, 7:26:28
epoch [23/50] batch [280/2000] time 1.996 (2.030) data 0.000 (0.002) loss 0.3371 (1.1670) lr 1.4818e-03 eta 1 day, 7:25:10
epoch [23/50] batch [300/2000] time 2.053 (2.030) data 0.000 (0.002) loss 2.4741 (1.1704) lr 1.4818e-03 eta 1 day, 7:24:36
epoch [23/50] batch [320/2000] time 2.047 (2.030) data 0.000 (0.002) loss 0.6800 (1.1864) lr 1.4818e-03 eta 1 day, 7:23:44
epoch [23/50] batch [340/2000] time 2.053 (2.030) data 0.000 (0.002) loss 1.9755 (1.1864) lr 1.4818e-03 eta 1 day, 7:23:20
epoch [23/50] batch [360/2000] time 2.026 (2.030) data 0.000 (0.002) loss 1.4831 (1.1812) lr 1.4818e-03 eta 1 day, 7:22:28
epoch [23/50] batch [380/2000] time 2.048 (2.029) data 0.000 (0.002) loss 2.1926 (1.1696) lr 1.4818e-03 eta 1 day, 7:21:17
epoch [23/50] batch [400/2000] time 1.997 (2.029) data 0.000 (0.002) loss 0.8035 (1.1786) lr 1.4818e-03 eta 1 day, 7:20:03
epoch [23/50] batch [420/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.8939 (1.1766) lr 1.4818e-03 eta 1 day, 7:19:13
epoch [23/50] batch [440/2000] time 1.994 (2.028) data 0.000 (0.001) loss 0.4798 (1.1688) lr 1.4818e-03 eta 1 day, 7:18:12
epoch [23/50] batch [460/2000] time 2.050 (2.028) data 0.000 (0.001) loss 3.4606 (1.1734) lr 1.4818e-03 eta 1 day, 7:17:27
epoch [23/50] batch [480/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.9577 (1.1815) lr 1.4818e-03 eta 1 day, 7:16:49
epoch [23/50] batch [500/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.0977 (1.1708) lr 1.4818e-03 eta 1 day, 7:16:02
epoch [23/50] batch [520/2000] time 2.058 (2.028) data 0.000 (0.001) loss 1.0269 (1.1780) lr 1.4818e-03 eta 1 day, 7:15:17
epoch [23/50] batch [540/2000] time 2.055 (2.028) data 0.000 (0.001) loss 0.2205 (1.1786) lr 1.4818e-03 eta 1 day, 7:14:44
epoch [23/50] batch [560/2000] time 2.003 (2.028) data 0.000 (0.001) loss 0.5382 (1.1698) lr 1.4818e-03 eta 1 day, 7:14:14
epoch [23/50] batch [580/2000] time 1.978 (2.029) data 0.000 (0.001) loss 0.7375 (1.1727) lr 1.4818e-03 eta 1 day, 7:13:50
epoch [23/50] batch [600/2000] time 2.055 (2.029) data 0.001 (0.001) loss 0.4197 (1.1700) lr 1.4818e-03 eta 1 day, 7:13:10
epoch [23/50] batch [620/2000] time 2.055 (2.029) data 0.000 (0.001) loss 1.0099 (1.1687) lr 1.4818e-03 eta 1 day, 7:12:38
epoch [23/50] batch [640/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.9277 (1.1665) lr 1.4818e-03 eta 1 day, 7:12:07
epoch [23/50] batch [660/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.6299 (1.1691) lr 1.4818e-03 eta 1 day, 7:11:26
epoch [23/50] batch [680/2000] time 1.971 (2.029) data 0.000 (0.001) loss 0.2451 (1.1713) lr 1.4818e-03 eta 1 day, 7:10:47
epoch [23/50] batch [700/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.6204 (1.1729) lr 1.4818e-03 eta 1 day, 7:10:13
epoch [23/50] batch [720/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.4984 (1.1751) lr 1.4818e-03 eta 1 day, 7:09:37
epoch [23/50] batch [740/2000] time 2.059 (2.029) data 0.000 (0.001) loss 0.1758 (1.1674) lr 1.4818e-03 eta 1 day, 7:09:01
epoch [23/50] batch [760/2000] time 2.030 (2.029) data 0.000 (0.001) loss 2.1728 (1.1628) lr 1.4818e-03 eta 1 day, 7:08:22
epoch [23/50] batch [780/2000] time 1.974 (2.029) data 0.000 (0.001) loss 1.7195 (1.1702) lr 1.4818e-03 eta 1 day, 7:07:37
epoch [23/50] batch [800/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.3077 (1.1693) lr 1.4818e-03 eta 1 day, 7:07:00
epoch [23/50] batch [820/2000] time 2.029 (2.029) data 0.000 (0.001) loss 1.6579 (1.1709) lr 1.4818e-03 eta 1 day, 7:06:26
epoch [23/50] batch [840/2000] time 2.031 (2.030) data 0.000 (0.001) loss 1.8347 (1.1705) lr 1.4818e-03 eta 1 day, 7:05:54
epoch [23/50] batch [860/2000] time 2.030 (2.030) data 0.000 (0.001) loss 2.1488 (1.1646) lr 1.4818e-03 eta 1 day, 7:05:09
epoch [23/50] batch [880/2000] time 2.055 (2.029) data 0.000 (0.001) loss 0.9109 (1.1674) lr 1.4818e-03 eta 1 day, 7:04:22
epoch [23/50] batch [900/2000] time 2.055 (2.030) data 0.000 (0.001) loss 2.1091 (1.1751) lr 1.4818e-03 eta 1 day, 7:03:49
epoch [23/50] batch [920/2000] time 2.033 (2.029) data 0.000 (0.001) loss 0.8415 (1.1780) lr 1.4818e-03 eta 1 day, 7:02:59
epoch [23/50] batch [940/2000] time 2.038 (2.030) data 0.000 (0.001) loss 0.6386 (1.1838) lr 1.4818e-03 eta 1 day, 7:02:24
epoch [23/50] batch [960/2000] time 2.031 (2.029) data 0.000 (0.001) loss 1.0862 (1.1789) lr 1.4818e-03 eta 1 day, 7:01:37
epoch [23/50] batch [980/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.6521 (1.1707) lr 1.4818e-03 eta 1 day, 7:01:01
epoch [23/50] batch [1000/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.5095 (1.1708) lr 1.4818e-03 eta 1 day, 7:00:19
epoch [23/50] batch [1020/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.5653 (1.1648) lr 1.4818e-03 eta 1 day, 6:59:46
epoch [23/50] batch [1040/2000] time 2.026 (2.030) data 0.000 (0.001) loss 1.0921 (1.1623) lr 1.4818e-03 eta 1 day, 6:59:05
epoch [23/50] batch [1060/2000] time 2.055 (2.030) data 0.000 (0.001) loss 2.8116 (1.1704) lr 1.4818e-03 eta 1 day, 6:58:25
epoch [23/50] batch [1080/2000] time 2.046 (2.030) data 0.000 (0.001) loss 0.0763 (1.1758) lr 1.4818e-03 eta 1 day, 6:57:48
epoch [23/50] batch [1100/2000] time 2.030 (2.030) data 0.000 (0.001) loss 1.1228 (1.1760) lr 1.4818e-03 eta 1 day, 6:57:03
epoch [23/50] batch [1120/2000] time 2.023 (2.030) data 0.000 (0.001) loss 1.5990 (1.1782) lr 1.4818e-03 eta 1 day, 6:56:21
epoch [23/50] batch [1140/2000] time 1.995 (2.029) data 0.001 (0.001) loss 1.4472 (1.1828) lr 1.4818e-03 eta 1 day, 6:55:28
epoch [23/50] batch [1160/2000] time 2.055 (2.029) data 0.000 (0.001) loss 0.6050 (1.1873) lr 1.4818e-03 eta 1 day, 6:54:50
epoch [23/50] batch [1180/2000] time 2.025 (2.029) data 0.000 (0.001) loss 0.1745 (1.1868) lr 1.4818e-03 eta 1 day, 6:54:08
epoch [23/50] batch [1200/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.1961 (1.1897) lr 1.4818e-03 eta 1 day, 6:53:34
epoch [23/50] batch [1220/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.5379 (1.1943) lr 1.4818e-03 eta 1 day, 6:52:50
epoch [23/50] batch [1240/2000] time 2.032 (2.029) data 0.000 (0.001) loss 2.8871 (1.1914) lr 1.4818e-03 eta 1 day, 6:52:09
epoch [23/50] batch [1260/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.7022 (1.1880) lr 1.4818e-03 eta 1 day, 6:51:25
epoch [23/50] batch [1280/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.8336 (1.1899) lr 1.4818e-03 eta 1 day, 6:50:34
epoch [23/50] batch [1300/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.3102 (1.1867) lr 1.4818e-03 eta 1 day, 6:49:52
epoch [23/50] batch [1320/2000] time 2.047 (2.029) data 0.000 (0.001) loss 0.2521 (1.1883) lr 1.4818e-03 eta 1 day, 6:49:16
epoch [23/50] batch [1340/2000] time 2.024 (2.029) data 0.000 (0.001) loss 3.3200 (1.1940) lr 1.4818e-03 eta 1 day, 6:48:35
epoch [23/50] batch [1360/2000] time 2.024 (2.029) data 0.000 (0.001) loss 0.6502 (1.1944) lr 1.4818e-03 eta 1 day, 6:47:44
epoch [23/50] batch [1380/2000] time 2.047 (2.029) data 0.000 (0.001) loss 1.4076 (1.1923) lr 1.4818e-03 eta 1 day, 6:47:02
epoch [23/50] batch [1400/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.1721 (1.1909) lr 1.4818e-03 eta 1 day, 6:46:18
epoch [23/50] batch [1420/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.2042 (1.1936) lr 1.4818e-03 eta 1 day, 6:45:25
epoch [23/50] batch [1440/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.2050 (1.1930) lr 1.4818e-03 eta 1 day, 6:44:43
epoch [23/50] batch [1460/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.8209 (1.1920) lr 1.4818e-03 eta 1 day, 6:44:05
epoch [23/50] batch [1480/2000] time 1.995 (2.029) data 0.000 (0.001) loss 0.9292 (1.1918) lr 1.4818e-03 eta 1 day, 6:43:25
epoch [23/50] batch [1500/2000] time 2.050 (2.029) data 0.000 (0.001) loss 3.2330 (1.1935) lr 1.4818e-03 eta 1 day, 6:42:41
epoch [23/50] batch [1520/2000] time 2.002 (2.029) data 0.000 (0.001) loss 1.4908 (1.1950) lr 1.4818e-03 eta 1 day, 6:41:57
epoch [23/50] batch [1540/2000] time 1.995 (2.029) data 0.000 (0.001) loss 4.5453 (1.1980) lr 1.4818e-03 eta 1 day, 6:41:12
epoch [23/50] batch [1560/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.7640 (1.1965) lr 1.4818e-03 eta 1 day, 6:40:36
epoch [23/50] batch [1580/2000] time 2.047 (2.029) data 0.000 (0.001) loss 0.1194 (1.1921) lr 1.4818e-03 eta 1 day, 6:39:55
epoch [23/50] batch [1600/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.5002 (1.1907) lr 1.4818e-03 eta 1 day, 6:39:15
epoch [23/50] batch [1620/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.1214 (1.1903) lr 1.4818e-03 eta 1 day, 6:38:31
epoch [23/50] batch [1640/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.0591 (1.1882) lr 1.4818e-03 eta 1 day, 6:37:46
epoch [23/50] batch [1660/2000] time 2.046 (2.028) data 0.000 (0.001) loss 0.7567 (1.1878) lr 1.4818e-03 eta 1 day, 6:37:05
epoch [23/50] batch [1680/2000] time 2.026 (2.028) data 0.001 (0.001) loss 1.1977 (1.1857) lr 1.4818e-03 eta 1 day, 6:36:24
epoch [23/50] batch [1700/2000] time 2.026 (2.028) data 0.000 (0.001) loss 2.7127 (1.1869) lr 1.4818e-03 eta 1 day, 6:35:44
epoch [23/50] batch [1720/2000] time 2.047 (2.028) data 0.000 (0.001) loss 0.1145 (1.1865) lr 1.4818e-03 eta 1 day, 6:35:03
epoch [23/50] batch [1740/2000] time 1.994 (2.028) data 0.000 (0.001) loss 0.7142 (1.1901) lr 1.4818e-03 eta 1 day, 6:34:18
epoch [23/50] batch [1760/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.2729 (1.1899) lr 1.4818e-03 eta 1 day, 6:33:43
epoch [23/50] batch [1780/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.7238 (1.1935) lr 1.4818e-03 eta 1 day, 6:33:08
epoch [23/50] batch [1800/2000] time 2.045 (2.028) data 0.000 (0.001) loss 0.9846 (1.1931) lr 1.4818e-03 eta 1 day, 6:32:23
epoch [23/50] batch [1820/2000] time 1.995 (2.029) data 0.000 (0.001) loss 0.3935 (1.1921) lr 1.4818e-03 eta 1 day, 6:31:44
epoch [23/50] batch [1840/2000] time 1.993 (2.029) data 0.000 (0.001) loss 0.4837 (1.1911) lr 1.4818e-03 eta 1 day, 6:31:05
epoch [23/50] batch [1860/2000] time 2.026 (2.029) data 0.000 (0.000) loss 1.6132 (1.1932) lr 1.4818e-03 eta 1 day, 6:30:25
epoch [23/50] batch [1880/2000] time 2.049 (2.029) data 0.000 (0.000) loss 0.2922 (1.1950) lr 1.4818e-03 eta 1 day, 6:29:46
epoch [23/50] batch [1900/2000] time 1.994 (2.029) data 0.000 (0.000) loss 0.0531 (1.1939) lr 1.4818e-03 eta 1 day, 6:29:02
epoch [23/50] batch [1920/2000] time 2.049 (2.028) data 0.000 (0.000) loss 1.6840 (1.1938) lr 1.4818e-03 eta 1 day, 6:28:21
epoch [23/50] batch [1940/2000] time 2.055 (2.029) data 0.000 (0.000) loss 0.6811 (1.1939) lr 1.4818e-03 eta 1 day, 6:27:41
epoch [23/50] batch [1960/2000] time 2.000 (2.029) data 0.000 (0.000) loss 0.2167 (1.1910) lr 1.4818e-03 eta 1 day, 6:27:03
epoch [23/50] batch [1980/2000] time 2.029 (2.029) data 0.000 (0.000) loss 0.9785 (1.1945) lr 1.4818e-03 eta 1 day, 6:26:23
epoch [23/50] batch [2000/2000] time 1.999 (2.029) data 0.000 (0.000) loss 0.4406 (1.1945) lr 1.4258e-03 eta 1 day, 6:25:43
epoch [24/50] batch [20/2000] time 2.027 (2.057) data 0.000 (0.028) loss 0.7929 (1.1609) lr 1.4258e-03 eta 1 day, 6:50:17
epoch [24/50] batch [40/2000] time 2.049 (2.039) data 0.000 (0.014) loss 0.5362 (1.0365) lr 1.4258e-03 eta 1 day, 6:33:49
epoch [24/50] batch [60/2000] time 2.049 (2.035) data 0.000 (0.009) loss 0.8891 (0.9456) lr 1.4258e-03 eta 1 day, 6:29:18
epoch [24/50] batch [80/2000] time 1.997 (2.032) data 0.000 (0.007) loss 1.9750 (1.0551) lr 1.4258e-03 eta 1 day, 6:25:55
epoch [24/50] batch [100/2000] time 2.050 (2.033) data 0.000 (0.006) loss 1.0973 (1.0486) lr 1.4258e-03 eta 1 day, 6:26:10
epoch [24/50] batch [120/2000] time 2.028 (2.031) data 0.000 (0.005) loss 1.5739 (1.0535) lr 1.4258e-03 eta 1 day, 6:24:13
epoch [24/50] batch [140/2000] time 2.000 (2.032) data 0.000 (0.004) loss 0.3728 (1.0591) lr 1.4258e-03 eta 1 day, 6:23:36
epoch [24/50] batch [160/2000] time 2.048 (2.031) data 0.000 (0.004) loss 0.6992 (1.0849) lr 1.4258e-03 eta 1 day, 6:22:34
epoch [24/50] batch [180/2000] time 2.026 (2.031) data 0.000 (0.003) loss 1.4392 (1.0808) lr 1.4258e-03 eta 1 day, 6:21:22
epoch [24/50] batch [200/2000] time 1.972 (2.030) data 0.000 (0.003) loss 1.7872 (1.0926) lr 1.4258e-03 eta 1 day, 6:20:05
epoch [24/50] batch [220/2000] time 2.028 (2.030) data 0.000 (0.003) loss 2.6644 (1.0795) lr 1.4258e-03 eta 1 day, 6:19:09
epoch [24/50] batch [240/2000] time 1.993 (2.030) data 0.000 (0.002) loss 0.0218 (1.0906) lr 1.4258e-03 eta 1 day, 6:18:28
epoch [24/50] batch [260/2000] time 2.049 (2.029) data 0.000 (0.002) loss 3.6510 (1.0877) lr 1.4258e-03 eta 1 day, 6:17:24
epoch [24/50] batch [280/2000] time 1.998 (2.029) data 0.000 (0.002) loss 1.3059 (1.0893) lr 1.4258e-03 eta 1 day, 6:16:47
epoch [24/50] batch [300/2000] time 2.025 (2.029) data 0.000 (0.002) loss 2.0618 (1.0888) lr 1.4258e-03 eta 1 day, 6:15:40
epoch [24/50] batch [320/2000] time 2.024 (2.029) data 0.000 (0.002) loss 0.9753 (1.0912) lr 1.4258e-03 eta 1 day, 6:15:06
epoch [24/50] batch [340/2000] time 1.993 (2.028) data 0.000 (0.002) loss 2.3766 (1.0804) lr 1.4258e-03 eta 1 day, 6:13:44
epoch [24/50] batch [360/2000] time 2.022 (2.028) data 0.000 (0.002) loss 0.2415 (1.1083) lr 1.4258e-03 eta 1 day, 6:12:59
epoch [24/50] batch [380/2000] time 2.050 (2.028) data 0.000 (0.002) loss 1.0204 (1.1358) lr 1.4258e-03 eta 1 day, 6:12:12
epoch [24/50] batch [400/2000] time 2.026 (2.028) data 0.000 (0.002) loss 0.0369 (1.1361) lr 1.4258e-03 eta 1 day, 6:11:30
epoch [24/50] batch [420/2000] time 2.026 (2.028) data 0.000 (0.002) loss 0.3443 (1.1373) lr 1.4258e-03 eta 1 day, 6:10:56
epoch [24/50] batch [440/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.9480 (1.1372) lr 1.4258e-03 eta 1 day, 6:10:04
epoch [24/50] batch [460/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.7272 (1.1272) lr 1.4258e-03 eta 1 day, 6:09:17
epoch [24/50] batch [480/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.2836 (1.1366) lr 1.4258e-03 eta 1 day, 6:08:46
epoch [24/50] batch [500/2000] time 2.047 (2.028) data 0.000 (0.001) loss 0.2271 (1.1341) lr 1.4258e-03 eta 1 day, 6:08:11
epoch [24/50] batch [520/2000] time 1.998 (2.028) data 0.000 (0.001) loss 1.1964 (1.1243) lr 1.4258e-03 eta 1 day, 6:07:19
epoch [24/50] batch [540/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.8066 (1.1217) lr 1.4258e-03 eta 1 day, 6:06:39
epoch [24/50] batch [560/2000] time 1.996 (2.028) data 0.000 (0.001) loss 1.9163 (1.1363) lr 1.4258e-03 eta 1 day, 6:05:53
epoch [24/50] batch [580/2000] time 2.051 (2.027) data 0.000 (0.001) loss 1.3537 (1.1380) lr 1.4258e-03 eta 1 day, 6:05:07
epoch [24/50] batch [600/2000] time 1.976 (2.027) data 0.001 (0.001) loss 0.4685 (1.1630) lr 1.4258e-03 eta 1 day, 6:04:24
epoch [24/50] batch [620/2000] time 2.023 (2.028) data 0.000 (0.001) loss 0.1686 (1.1608) lr 1.4258e-03 eta 1 day, 6:03:54
epoch [24/50] batch [640/2000] time 2.027 (2.027) data 0.000 (0.001) loss 2.0117 (1.1694) lr 1.4258e-03 eta 1 day, 6:03:00
epoch [24/50] batch [660/2000] time 2.028 (2.027) data 0.000 (0.001) loss 1.1257 (1.1699) lr 1.4258e-03 eta 1 day, 6:02:19
epoch [24/50] batch [680/2000] time 2.027 (2.027) data 0.000 (0.001) loss 0.0510 (1.1625) lr 1.4258e-03 eta 1 day, 6:01:45
epoch [24/50] batch [700/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.1171 (1.1589) lr 1.4258e-03 eta 1 day, 6:01:09
epoch [24/50] batch [720/2000] time 2.051 (2.027) data 0.000 (0.001) loss 0.9477 (1.1579) lr 1.4258e-03 eta 1 day, 6:00:20
epoch [24/50] batch [740/2000] time 1.994 (2.027) data 0.000 (0.001) loss 1.5277 (1.1570) lr 1.4258e-03 eta 1 day, 5:59:35
epoch [24/50] batch [760/2000] time 2.046 (2.027) data 0.000 (0.001) loss 0.2961 (1.1594) lr 1.4258e-03 eta 1 day, 5:58:54
epoch [24/50] batch [780/2000] time 2.050 (2.027) data 0.000 (0.001) loss 0.7261 (1.1643) lr 1.4258e-03 eta 1 day, 5:58:14
epoch [24/50] batch [800/2000] time 2.054 (2.028) data 0.000 (0.001) loss 0.0479 (1.1579) lr 1.4258e-03 eta 1 day, 5:57:50
epoch [24/50] batch [820/2000] time 2.032 (2.028) data 0.000 (0.001) loss 0.5143 (1.1585) lr 1.4258e-03 eta 1 day, 5:57:09
epoch [24/50] batch [840/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.3822 (1.1530) lr 1.4258e-03 eta 1 day, 5:56:36
epoch [24/50] batch [860/2000] time 2.053 (2.028) data 0.000 (0.001) loss 1.5524 (1.1605) lr 1.4258e-03 eta 1 day, 5:56:02
epoch [24/50] batch [880/2000] time 2.056 (2.028) data 0.000 (0.001) loss 2.0202 (1.1599) lr 1.4258e-03 eta 1 day, 5:55:27
epoch [24/50] batch [900/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.7568 (1.1619) lr 1.4258e-03 eta 1 day, 5:54:55
epoch [24/50] batch [920/2000] time 2.024 (2.028) data 0.000 (0.001) loss 1.8993 (1.1686) lr 1.4258e-03 eta 1 day, 5:54:16
epoch [24/50] batch [940/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.8221 (1.1737) lr 1.4258e-03 eta 1 day, 5:53:26
epoch [24/50] batch [960/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.5376 (1.1738) lr 1.4258e-03 eta 1 day, 5:52:40
epoch [24/50] batch [980/2000] time 2.029 (2.028) data 0.000 (0.001) loss 1.2633 (1.1727) lr 1.4258e-03 eta 1 day, 5:51:58
epoch [24/50] batch [1000/2000] time 1.994 (2.028) data 0.000 (0.001) loss 0.6879 (1.1662) lr 1.4258e-03 eta 1 day, 5:51:17
epoch [24/50] batch [1020/2000] time 1.996 (2.028) data 0.000 (0.001) loss 0.1211 (1.1631) lr 1.4258e-03 eta 1 day, 5:50:37
epoch [24/50] batch [1040/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.1632 (1.1606) lr 1.4258e-03 eta 1 day, 5:49:54
epoch [24/50] batch [1060/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.7807 (1.1666) lr 1.4258e-03 eta 1 day, 5:49:13
epoch [24/50] batch [1080/2000] time 2.045 (2.028) data 0.000 (0.001) loss 0.5357 (1.1665) lr 1.4258e-03 eta 1 day, 5:48:32
epoch [24/50] batch [1100/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.8438 (1.1710) lr 1.4258e-03 eta 1 day, 5:47:47
epoch [24/50] batch [1120/2000] time 2.028 (2.028) data 0.000 (0.001) loss 2.9562 (1.1735) lr 1.4258e-03 eta 1 day, 5:47:05
epoch [24/50] batch [1140/2000] time 2.051 (2.028) data 0.001 (0.001) loss 0.4620 (1.1684) lr 1.4258e-03 eta 1 day, 5:46:27
epoch [24/50] batch [1160/2000] time 1.972 (2.028) data 0.000 (0.001) loss 1.4145 (1.1716) lr 1.4258e-03 eta 1 day, 5:45:43
epoch [24/50] batch [1180/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.1497 (1.1717) lr 1.4258e-03 eta 1 day, 5:45:03
epoch [24/50] batch [1200/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.2445 (1.1816) lr 1.4258e-03 eta 1 day, 5:44:23
epoch [24/50] batch [1220/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.0741 (1.1808) lr 1.4258e-03 eta 1 day, 5:43:41
epoch [24/50] batch [1240/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.5430 (1.1782) lr 1.4258e-03 eta 1 day, 5:42:55
epoch [24/50] batch [1260/2000] time 2.050 (2.028) data 0.000 (0.001) loss 3.3082 (1.1775) lr 1.4258e-03 eta 1 day, 5:42:17
epoch [24/50] batch [1280/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.4391 (1.1783) lr 1.4258e-03 eta 1 day, 5:41:40
epoch [24/50] batch [1300/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.7738 (1.1780) lr 1.4258e-03 eta 1 day, 5:41:01
epoch [24/50] batch [1320/2000] time 2.048 (2.028) data 0.000 (0.001) loss 2.1535 (1.1859) lr 1.4258e-03 eta 1 day, 5:40:24
epoch [24/50] batch [1340/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.9590 (1.1831) lr 1.4258e-03 eta 1 day, 5:39:45
epoch [24/50] batch [1360/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.6353 (1.1870) lr 1.4258e-03 eta 1 day, 5:39:02
epoch [24/50] batch [1380/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.9378 (1.1865) lr 1.4258e-03 eta 1 day, 5:38:25
epoch [24/50] batch [1400/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.7530 (1.1903) lr 1.4258e-03 eta 1 day, 5:37:47
epoch [24/50] batch [1420/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.6902 (1.1991) lr 1.4258e-03 eta 1 day, 5:37:07
epoch [24/50] batch [1440/2000] time 2.053 (2.028) data 0.000 (0.001) loss 1.5343 (1.1941) lr 1.4258e-03 eta 1 day, 5:36:28
epoch [24/50] batch [1460/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.5234 (1.1904) lr 1.4258e-03 eta 1 day, 5:35:50
epoch [24/50] batch [1480/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.3092 (1.1957) lr 1.4258e-03 eta 1 day, 5:35:08
epoch [24/50] batch [1500/2000] time 2.027 (2.028) data 0.000 (0.001) loss 1.4757 (1.1996) lr 1.4258e-03 eta 1 day, 5:34:27
epoch [24/50] batch [1520/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.3778 (1.1984) lr 1.4258e-03 eta 1 day, 5:33:45
epoch [24/50] batch [1540/2000] time 2.025 (2.028) data 0.000 (0.001) loss 0.9224 (1.1981) lr 1.4258e-03 eta 1 day, 5:33:04
epoch [24/50] batch [1560/2000] time 2.046 (2.028) data 0.000 (0.001) loss 0.9385 (1.1970) lr 1.4258e-03 eta 1 day, 5:32:28
epoch [24/50] batch [1580/2000] time 2.031 (2.028) data 0.000 (0.001) loss 0.2816 (1.1964) lr 1.4258e-03 eta 1 day, 5:31:46
epoch [24/50] batch [1600/2000] time 1.997 (2.028) data 0.000 (0.001) loss 0.7224 (1.1947) lr 1.4258e-03 eta 1 day, 5:31:05
epoch [24/50] batch [1620/2000] time 2.046 (2.028) data 0.000 (0.001) loss 1.3561 (1.1970) lr 1.4258e-03 eta 1 day, 5:30:21
epoch [24/50] batch [1640/2000] time 1.993 (2.028) data 0.000 (0.001) loss 2.7612 (1.1981) lr 1.4258e-03 eta 1 day, 5:29:42
epoch [24/50] batch [1660/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.3422 (1.2043) lr 1.4258e-03 eta 1 day, 5:29:07
epoch [24/50] batch [1680/2000] time 2.051 (2.028) data 0.001 (0.001) loss 1.4938 (1.2039) lr 1.4258e-03 eta 1 day, 5:28:25
epoch [24/50] batch [1700/2000] time 1.971 (2.028) data 0.000 (0.001) loss 1.2273 (1.2042) lr 1.4258e-03 eta 1 day, 5:27:42
epoch [24/50] batch [1720/2000] time 2.047 (2.028) data 0.000 (0.001) loss 3.7510 (1.2060) lr 1.4258e-03 eta 1 day, 5:27:04
epoch [24/50] batch [1740/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.6834 (1.2066) lr 1.4258e-03 eta 1 day, 5:26:22
epoch [24/50] batch [1760/2000] time 2.050 (2.028) data 0.000 (0.001) loss 2.6753 (1.2040) lr 1.4258e-03 eta 1 day, 5:25:40
epoch [24/50] batch [1780/2000] time 2.047 (2.028) data 0.000 (0.001) loss 0.0660 (1.2028) lr 1.4258e-03 eta 1 day, 5:25:02
epoch [24/50] batch [1800/2000] time 2.025 (2.028) data 0.000 (0.000) loss 0.2264 (1.2010) lr 1.4258e-03 eta 1 day, 5:24:19
epoch [24/50] batch [1820/2000] time 1.997 (2.028) data 0.000 (0.000) loss 0.4553 (1.2000) lr 1.4258e-03 eta 1 day, 5:23:35
epoch [24/50] batch [1840/2000] time 2.048 (2.028) data 0.000 (0.000) loss 2.4979 (1.2039) lr 1.4258e-03 eta 1 day, 5:22:57
epoch [24/50] batch [1860/2000] time 1.974 (2.028) data 0.000 (0.000) loss 0.2436 (1.2066) lr 1.4258e-03 eta 1 day, 5:22:13
epoch [24/50] batch [1880/2000] time 2.049 (2.028) data 0.000 (0.000) loss 3.6151 (1.2080) lr 1.4258e-03 eta 1 day, 5:21:33
epoch [24/50] batch [1900/2000] time 2.048 (2.028) data 0.000 (0.000) loss 0.0515 (1.2084) lr 1.4258e-03 eta 1 day, 5:20:55
epoch [24/50] batch [1920/2000] time 1.971 (2.028) data 0.000 (0.000) loss 3.4483 (1.2078) lr 1.4258e-03 eta 1 day, 5:20:12
epoch [24/50] batch [1940/2000] time 2.026 (2.028) data 0.000 (0.000) loss 1.0615 (1.2090) lr 1.4258e-03 eta 1 day, 5:19:29
epoch [24/50] batch [1960/2000] time 2.024 (2.028) data 0.000 (0.000) loss 0.3539 (1.2075) lr 1.4258e-03 eta 1 day, 5:18:47
epoch [24/50] batch [1980/2000] time 2.048 (2.028) data 0.000 (0.000) loss 0.4637 (1.2112) lr 1.4258e-03 eta 1 day, 5:18:03
epoch [24/50] batch [2000/2000] time 2.047 (2.028) data 0.000 (0.000) loss 0.6312 (1.2092) lr 1.3681e-03 eta 1 day, 5:17:24
epoch [25/50] batch [20/2000] time 2.035 (2.061) data 0.000 (0.028) loss 0.1803 (1.1945) lr 1.3681e-03 eta 1 day, 5:45:42
epoch [25/50] batch [40/2000] time 2.028 (2.045) data 0.000 (0.014) loss 1.4322 (1.0937) lr 1.3681e-03 eta 1 day, 5:31:20
epoch [25/50] batch [60/2000] time 2.053 (2.039) data 0.001 (0.009) loss 1.8939 (1.1701) lr 1.3681e-03 eta 1 day, 5:25:01
epoch [25/50] batch [80/2000] time 1.996 (2.036) data 0.000 (0.007) loss 1.7660 (1.1859) lr 1.3681e-03 eta 1 day, 5:21:31
epoch [25/50] batch [100/2000] time 2.024 (2.034) data 0.000 (0.006) loss 0.3233 (1.1905) lr 1.3681e-03 eta 1 day, 5:19:08
epoch [25/50] batch [120/2000] time 2.029 (2.033) data 0.000 (0.005) loss 1.1811 (1.2416) lr 1.3681e-03 eta 1 day, 5:17:31
epoch [25/50] batch [140/2000] time 1.996 (2.032) data 0.000 (0.004) loss 0.8298 (1.2176) lr 1.3681e-03 eta 1 day, 5:16:16
epoch [25/50] batch [160/2000] time 2.053 (2.031) data 0.000 (0.004) loss 0.1815 (1.2359) lr 1.3681e-03 eta 1 day, 5:14:44
epoch [25/50] batch [180/2000] time 2.050 (2.031) data 0.000 (0.003) loss 0.8790 (1.2048) lr 1.3681e-03 eta 1 day, 5:13:48
epoch [25/50] batch [200/2000] time 2.030 (2.031) data 0.000 (0.003) loss 4.7361 (1.2072) lr 1.3681e-03 eta 1 day, 5:13:07
epoch [25/50] batch [220/2000] time 2.049 (2.031) data 0.000 (0.003) loss 0.2366 (1.2064) lr 1.3681e-03 eta 1 day, 5:12:25
epoch [25/50] batch [240/2000] time 2.049 (2.030) data 0.000 (0.002) loss 1.8873 (1.2044) lr 1.3681e-03 eta 1 day, 5:11:19
epoch [25/50] batch [260/2000] time 1.995 (2.030) data 0.000 (0.002) loss 0.0813 (1.2006) lr 1.3681e-03 eta 1 day, 5:10:27
epoch [25/50] batch [280/2000] time 2.027 (2.030) data 0.000 (0.002) loss 1.9988 (1.1979) lr 1.3681e-03 eta 1 day, 5:09:39
epoch [25/50] batch [300/2000] time 2.030 (2.030) data 0.000 (0.002) loss 0.3983 (1.1953) lr 1.3681e-03 eta 1 day, 5:09:21
epoch [25/50] batch [320/2000] time 2.030 (2.030) data 0.000 (0.002) loss 0.6487 (1.1857) lr 1.3681e-03 eta 1 day, 5:08:34
epoch [25/50] batch [340/2000] time 1.973 (2.030) data 0.000 (0.002) loss 0.8837 (1.1792) lr 1.3681e-03 eta 1 day, 5:07:30
epoch [25/50] batch [360/2000] time 1.997 (2.029) data 0.000 (0.002) loss 4.2751 (1.1944) lr 1.3681e-03 eta 1 day, 5:06:42
epoch [25/50] batch [380/2000] time 2.051 (2.029) data 0.000 (0.002) loss 0.5103 (1.1765) lr 1.3681e-03 eta 1 day, 5:05:55
epoch [25/50] batch [400/2000] time 2.050 (2.030) data 0.000 (0.002) loss 0.8181 (1.1802) lr 1.3681e-03 eta 1 day, 5:05:24
epoch [25/50] batch [420/2000] time 2.026 (2.030) data 0.000 (0.002) loss 1.1455 (1.1620) lr 1.3681e-03 eta 1 day, 5:04:43
epoch [25/50] batch [440/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.0234 (1.1597) lr 1.3681e-03 eta 1 day, 5:04:18
epoch [25/50] batch [460/2000] time 1.973 (2.030) data 0.000 (0.001) loss 3.6819 (1.1717) lr 1.3681e-03 eta 1 day, 5:03:44
epoch [25/50] batch [480/2000] time 1.995 (2.030) data 0.000 (0.001) loss 3.4760 (1.1709) lr 1.3681e-03 eta 1 day, 5:03:09
epoch [25/50] batch [500/2000] time 2.026 (2.030) data 0.000 (0.001) loss 0.3478 (1.1854) lr 1.3681e-03 eta 1 day, 5:02:16
epoch [25/50] batch [520/2000] time 2.048 (2.030) data 0.000 (0.001) loss 0.0168 (1.1963) lr 1.3681e-03 eta 1 day, 5:01:33
epoch [25/50] batch [540/2000] time 2.048 (2.030) data 0.000 (0.001) loss 0.6848 (1.2033) lr 1.3681e-03 eta 1 day, 5:00:51
epoch [25/50] batch [560/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.4445 (1.2195) lr 1.3681e-03 eta 1 day, 5:00:04
epoch [25/50] batch [580/2000] time 1.998 (2.030) data 0.000 (0.001) loss 2.2399 (1.2229) lr 1.3681e-03 eta 1 day, 4:59:19
epoch [25/50] batch [600/2000] time 2.049 (2.029) data 0.001 (0.001) loss 0.0607 (1.2039) lr 1.3681e-03 eta 1 day, 4:58:34
epoch [25/50] batch [620/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.8104 (1.1916) lr 1.3681e-03 eta 1 day, 4:57:51
epoch [25/50] batch [640/2000] time 2.031 (2.029) data 0.000 (0.001) loss 2.3124 (1.1886) lr 1.3681e-03 eta 1 day, 4:57:13
epoch [25/50] batch [660/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.8666 (1.1752) lr 1.3681e-03 eta 1 day, 4:56:20
epoch [25/50] batch [680/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.2437 (1.1712) lr 1.3681e-03 eta 1 day, 4:55:41
epoch [25/50] batch [700/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.2147 (1.1680) lr 1.3681e-03 eta 1 day, 4:55:09
epoch [25/50] batch [720/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.6275 (1.1676) lr 1.3681e-03 eta 1 day, 4:54:22
epoch [25/50] batch [740/2000] time 1.994 (2.029) data 0.000 (0.001) loss 0.4253 (1.1829) lr 1.3681e-03 eta 1 day, 4:53:34
epoch [25/50] batch [760/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.3652 (1.1822) lr 1.3681e-03 eta 1 day, 4:52:57
epoch [25/50] batch [780/2000] time 2.025 (2.029) data 0.000 (0.001) loss 2.0450 (1.1745) lr 1.3681e-03 eta 1 day, 4:52:22
epoch [25/50] batch [800/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.7657 (1.1719) lr 1.3681e-03 eta 1 day, 4:51:35
epoch [25/50] batch [820/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.1809 (1.1788) lr 1.3681e-03 eta 1 day, 4:50:46
epoch [25/50] batch [840/2000] time 1.973 (2.029) data 0.000 (0.001) loss 1.3470 (1.1820) lr 1.3681e-03 eta 1 day, 4:50:02
epoch [25/50] batch [860/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.2302 (1.1827) lr 1.3681e-03 eta 1 day, 4:49:22
epoch [25/50] batch [880/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.2161 (1.1758) lr 1.3681e-03 eta 1 day, 4:48:40
epoch [25/50] batch [900/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.4817 (1.1770) lr 1.3681e-03 eta 1 day, 4:47:59
epoch [25/50] batch [920/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.6387 (1.1811) lr 1.3681e-03 eta 1 day, 4:47:17
epoch [25/50] batch [940/2000] time 1.970 (2.029) data 0.000 (0.001) loss 1.7400 (1.1799) lr 1.3681e-03 eta 1 day, 4:46:41
epoch [25/50] batch [960/2000] time 1.993 (2.029) data 0.000 (0.001) loss 0.7583 (1.1730) lr 1.3681e-03 eta 1 day, 4:45:54
epoch [25/50] batch [980/2000] time 2.026 (2.029) data 0.000 (0.001) loss 2.7853 (1.1725) lr 1.3681e-03 eta 1 day, 4:45:02
epoch [25/50] batch [1000/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.1169 (1.1687) lr 1.3681e-03 eta 1 day, 4:44:18
epoch [25/50] batch [1020/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.6261 (1.1643) lr 1.3681e-03 eta 1 day, 4:43:40
epoch [25/50] batch [1040/2000] time 2.050 (2.029) data 0.000 (0.001) loss 4.2193 (1.1764) lr 1.3681e-03 eta 1 day, 4:42:53
epoch [25/50] batch [1060/2000] time 2.028 (2.028) data 0.000 (0.001) loss 1.1681 (1.1829) lr 1.3681e-03 eta 1 day, 4:42:09
epoch [25/50] batch [1080/2000] time 2.046 (2.029) data 0.000 (0.001) loss 0.0841 (1.1762) lr 1.3681e-03 eta 1 day, 4:41:34
epoch [25/50] batch [1100/2000] time 2.025 (2.029) data 0.000 (0.001) loss 0.3935 (1.1720) lr 1.3681e-03 eta 1 day, 4:40:51
epoch [25/50] batch [1120/2000] time 2.030 (2.029) data 0.000 (0.001) loss 1.8010 (1.1732) lr 1.3681e-03 eta 1 day, 4:40:10
epoch [25/50] batch [1140/2000] time 1.972 (2.028) data 0.001 (0.001) loss 0.4573 (1.1717) lr 1.3681e-03 eta 1 day, 4:39:22
epoch [25/50] batch [1160/2000] time 2.047 (2.028) data 0.000 (0.001) loss 0.2458 (1.1789) lr 1.3681e-03 eta 1 day, 4:38:36
epoch [25/50] batch [1180/2000] time 2.046 (2.028) data 0.000 (0.001) loss 0.2359 (1.1772) lr 1.3681e-03 eta 1 day, 4:37:48
epoch [25/50] batch [1200/2000] time 1.975 (2.028) data 0.000 (0.001) loss 0.5699 (1.1720) lr 1.3681e-03 eta 1 day, 4:37:02
epoch [25/50] batch [1220/2000] time 1.996 (2.028) data 0.000 (0.001) loss 2.0750 (1.1727) lr 1.3681e-03 eta 1 day, 4:36:13
epoch [25/50] batch [1240/2000] time 1.994 (2.028) data 0.000 (0.001) loss 1.2401 (1.1738) lr 1.3681e-03 eta 1 day, 4:35:37
epoch [25/50] batch [1260/2000] time 1.996 (2.028) data 0.000 (0.001) loss 0.6616 (1.1761) lr 1.3681e-03 eta 1 day, 4:34:52
epoch [25/50] batch [1280/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.1821 (1.1763) lr 1.3681e-03 eta 1 day, 4:34:14
epoch [25/50] batch [1300/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.3491 (1.1766) lr 1.3681e-03 eta 1 day, 4:33:30
epoch [25/50] batch [1320/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.3269 (1.1775) lr 1.3681e-03 eta 1 day, 4:32:49
epoch [25/50] batch [1340/2000] time 1.995 (2.028) data 0.000 (0.001) loss 0.9212 (1.1786) lr 1.3681e-03 eta 1 day, 4:32:07
epoch [25/50] batch [1360/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.6322 (1.1755) lr 1.3681e-03 eta 1 day, 4:31:27
epoch [25/50] batch [1380/2000] time 2.055 (2.028) data 0.000 (0.001) loss 1.3174 (1.1739) lr 1.3681e-03 eta 1 day, 4:30:51
epoch [25/50] batch [1400/2000] time 1.977 (2.028) data 0.000 (0.001) loss 0.1035 (1.1702) lr 1.3681e-03 eta 1 day, 4:30:08
epoch [25/50] batch [1420/2000] time 2.028 (2.028) data 0.000 (0.001) loss 1.6566 (1.1725) lr 1.3681e-03 eta 1 day, 4:29:29
epoch [25/50] batch [1440/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.9293 (1.1718) lr 1.3681e-03 eta 1 day, 4:28:50
epoch [25/50] batch [1460/2000] time 1.996 (2.028) data 0.000 (0.001) loss 0.4628 (1.1682) lr 1.3681e-03 eta 1 day, 4:28:05
epoch [25/50] batch [1480/2000] time 2.045 (2.028) data 0.000 (0.001) loss 1.1723 (1.1702) lr 1.3681e-03 eta 1 day, 4:27:25
epoch [25/50] batch [1500/2000] time 2.046 (2.028) data 0.000 (0.001) loss 0.3251 (1.1704) lr 1.3681e-03 eta 1 day, 4:26:45
epoch [25/50] batch [1520/2000] time 1.997 (2.028) data 0.000 (0.001) loss 0.1130 (1.1704) lr 1.3681e-03 eta 1 day, 4:26:08
epoch [25/50] batch [1540/2000] time 1.973 (2.028) data 0.000 (0.001) loss 0.6615 (1.1734) lr 1.3681e-03 eta 1 day, 4:25:27
epoch [25/50] batch [1560/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.3173 (1.1748) lr 1.3681e-03 eta 1 day, 4:24:43
epoch [25/50] batch [1580/2000] time 1.994 (2.028) data 0.000 (0.001) loss 1.1100 (1.1759) lr 1.3681e-03 eta 1 day, 4:24:02
epoch [25/50] batch [1600/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.6613 (1.1806) lr 1.3681e-03 eta 1 day, 4:23:24
epoch [25/50] batch [1620/2000] time 1.996 (2.028) data 0.000 (0.001) loss 1.2361 (1.1785) lr 1.3681e-03 eta 1 day, 4:22:41
epoch [25/50] batch [1640/2000] time 1.972 (2.028) data 0.000 (0.001) loss 0.0374 (1.1765) lr 1.3681e-03 eta 1 day, 4:22:03
epoch [25/50] batch [1660/2000] time 2.024 (2.028) data 0.000 (0.001) loss 1.7534 (1.1777) lr 1.3681e-03 eta 1 day, 4:21:20
epoch [25/50] batch [1680/2000] time 2.046 (2.028) data 0.001 (0.001) loss 1.4617 (1.1824) lr 1.3681e-03 eta 1 day, 4:20:43
epoch [25/50] batch [1700/2000] time 2.046 (2.028) data 0.000 (0.001) loss 1.4367 (1.1874) lr 1.3681e-03 eta 1 day, 4:20:08
epoch [25/50] batch [1720/2000] time 2.024 (2.028) data 0.000 (0.001) loss 2.9424 (1.1876) lr 1.3681e-03 eta 1 day, 4:19:28
epoch [25/50] batch [1740/2000] time 2.049 (2.028) data 0.000 (0.001) loss 3.0971 (1.1906) lr 1.3681e-03 eta 1 day, 4:18:44
epoch [25/50] batch [1760/2000] time 2.028 (2.028) data 0.000 (0.001) loss 0.4853 (1.1944) lr 1.3681e-03 eta 1 day, 4:18:01
epoch [25/50] batch [1780/2000] time 2.003 (2.028) data 0.000 (0.000) loss 0.5683 (1.1922) lr 1.3681e-03 eta 1 day, 4:17:17
epoch [25/50] batch [1800/2000] time 2.051 (2.028) data 0.000 (0.000) loss 0.1856 (1.1896) lr 1.3681e-03 eta 1 day, 4:16:40
epoch [25/50] batch [1820/2000] time 2.050 (2.028) data 0.000 (0.000) loss 1.9011 (1.1878) lr 1.3681e-03 eta 1 day, 4:16:02
epoch [25/50] batch [1840/2000] time 2.048 (2.028) data 0.000 (0.000) loss 0.3924 (1.1903) lr 1.3681e-03 eta 1 day, 4:15:24
epoch [25/50] batch [1860/2000] time 2.050 (2.028) data 0.000 (0.000) loss 0.9732 (1.1882) lr 1.3681e-03 eta 1 day, 4:14:43
epoch [25/50] batch [1880/2000] time 1.999 (2.028) data 0.000 (0.000) loss 0.2274 (1.1862) lr 1.3681e-03 eta 1 day, 4:13:55
epoch [25/50] batch [1900/2000] time 2.049 (2.028) data 0.000 (0.000) loss 0.7726 (1.1846) lr 1.3681e-03 eta 1 day, 4:13:13
epoch [25/50] batch [1920/2000] time 2.023 (2.028) data 0.000 (0.000) loss 1.9243 (1.1840) lr 1.3681e-03 eta 1 day, 4:12:29
epoch [25/50] batch [1940/2000] time 1.993 (2.028) data 0.000 (0.000) loss 0.6667 (1.1833) lr 1.3681e-03 eta 1 day, 4:11:46
epoch [25/50] batch [1960/2000] time 2.024 (2.028) data 0.000 (0.000) loss 2.5775 (1.1847) lr 1.3681e-03 eta 1 day, 4:11:05
epoch [25/50] batch [1980/2000] time 2.024 (2.028) data 0.000 (0.000) loss 1.3481 (1.1887) lr 1.3681e-03 eta 1 day, 4:10:22
epoch [25/50] batch [2000/2000] time 1.992 (2.028) data 0.000 (0.000) loss 1.2056 (1.1906) lr 1.3090e-03 eta 1 day, 4:09:40
epoch [26/50] batch [20/2000] time 2.026 (2.049) data 0.000 (0.028) loss 1.2106 (1.1203) lr 1.3090e-03 eta 1 day, 4:26:59
epoch [26/50] batch [40/2000] time 2.026 (2.036) data 0.000 (0.014) loss 1.1984 (0.9858) lr 1.3090e-03 eta 1 day, 4:15:30
epoch [26/50] batch [60/2000] time 2.047 (2.033) data 0.001 (0.009) loss 2.2165 (1.1956) lr 1.3090e-03 eta 1 day, 4:12:01
epoch [26/50] batch [80/2000] time 2.049 (2.031) data 0.000 (0.007) loss 2.0140 (1.2902) lr 1.3090e-03 eta 1 day, 4:09:36
epoch [26/50] batch [100/2000] time 1.995 (2.029) data 0.000 (0.006) loss 2.5079 (1.3510) lr 1.3090e-03 eta 1 day, 4:07:31
epoch [26/50] batch [120/2000] time 2.046 (2.029) data 0.000 (0.005) loss 2.0357 (1.3468) lr 1.3090e-03 eta 1 day, 4:06:56
epoch [26/50] batch [140/2000] time 2.048 (2.031) data 0.000 (0.004) loss 2.2967 (1.3271) lr 1.3090e-03 eta 1 day, 4:07:22
epoch [26/50] batch [160/2000] time 2.053 (2.030) data 0.000 (0.004) loss 1.5041 (1.3360) lr 1.3090e-03 eta 1 day, 4:06:35
epoch [26/50] batch [180/2000] time 2.004 (2.030) data 0.000 (0.003) loss 0.0476 (1.3221) lr 1.3090e-03 eta 1 day, 4:05:52
epoch [26/50] batch [200/2000] time 2.031 (2.031) data 0.000 (0.003) loss 0.5120 (1.2996) lr 1.3090e-03 eta 1 day, 4:05:32
epoch [26/50] batch [220/2000] time 2.026 (2.031) data 0.000 (0.003) loss 0.0911 (1.3071) lr 1.3090e-03 eta 1 day, 4:05:02
epoch [26/50] batch [240/2000] time 1.996 (2.030) data 0.000 (0.002) loss 0.8564 (1.2807) lr 1.3090e-03 eta 1 day, 4:03:44
epoch [26/50] batch [260/2000] time 2.049 (2.030) data 0.000 (0.002) loss 0.2399 (1.2813) lr 1.3090e-03 eta 1 day, 4:03:00
epoch [26/50] batch [280/2000] time 2.026 (2.029) data 0.000 (0.002) loss 1.6209 (1.2600) lr 1.3090e-03 eta 1 day, 4:01:39
epoch [26/50] batch [300/2000] time 2.050 (2.029) data 0.000 (0.002) loss 2.0248 (1.2525) lr 1.3090e-03 eta 1 day, 4:00:39
epoch [26/50] batch [320/2000] time 1.998 (2.029) data 0.000 (0.002) loss 0.7677 (1.2439) lr 1.3090e-03 eta 1 day, 3:59:36
epoch [26/50] batch [340/2000] time 2.050 (2.028) data 0.000 (0.002) loss 1.4766 (1.2369) lr 1.3090e-03 eta 1 day, 3:58:54
epoch [26/50] batch [360/2000] time 2.048 (2.028) data 0.000 (0.002) loss 1.5230 (1.2487) lr 1.3090e-03 eta 1 day, 3:58:06
epoch [26/50] batch [380/2000] time 2.029 (2.029) data 0.000 (0.002) loss 0.7071 (1.2365) lr 1.3090e-03 eta 1 day, 3:57:41
epoch [26/50] batch [400/2000] time 2.028 (2.028) data 0.000 (0.002) loss 1.2394 (1.2338) lr 1.3090e-03 eta 1 day, 3:56:31
epoch [26/50] batch [420/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.7648 (1.2205) lr 1.3090e-03 eta 1 day, 3:55:57
epoch [26/50] batch [440/2000] time 1.997 (2.028) data 0.000 (0.001) loss 3.3630 (1.2316) lr 1.3090e-03 eta 1 day, 3:55:09
epoch [26/50] batch [460/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.1317 (1.2134) lr 1.3090e-03 eta 1 day, 3:54:23
epoch [26/50] batch [480/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.4305 (1.2127) lr 1.3090e-03 eta 1 day, 3:53:40
epoch [26/50] batch [500/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.4596 (1.2189) lr 1.3090e-03 eta 1 day, 3:52:55
epoch [26/50] batch [520/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.9897 (1.2180) lr 1.3090e-03 eta 1 day, 3:52:09
epoch [26/50] batch [540/2000] time 2.027 (2.028) data 0.000 (0.001) loss 1.3182 (1.2221) lr 1.3090e-03 eta 1 day, 3:51:31
epoch [26/50] batch [560/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.4004 (1.2089) lr 1.3090e-03 eta 1 day, 3:50:41
epoch [26/50] batch [580/2000] time 2.050 (2.027) data 0.000 (0.001) loss 0.4384 (1.2063) lr 1.3090e-03 eta 1 day, 3:49:57
epoch [26/50] batch [600/2000] time 2.051 (2.027) data 0.001 (0.001) loss 0.4180 (1.2093) lr 1.3090e-03 eta 1 day, 3:49:16
epoch [26/50] batch [620/2000] time 2.032 (2.027) data 0.000 (0.001) loss 1.5375 (1.2249) lr 1.3090e-03 eta 1 day, 3:48:34
epoch [26/50] batch [640/2000] time 2.052 (2.027) data 0.000 (0.001) loss 1.6845 (1.2204) lr 1.3090e-03 eta 1 day, 3:47:56
epoch [26/50] batch [660/2000] time 2.048 (2.027) data 0.000 (0.001) loss 1.0431 (1.2349) lr 1.3090e-03 eta 1 day, 3:47:13
epoch [26/50] batch [680/2000] time 2.049 (2.027) data 0.000 (0.001) loss 0.3036 (1.2247) lr 1.3090e-03 eta 1 day, 3:46:14
epoch [26/50] batch [700/2000] time 2.050 (2.027) data 0.000 (0.001) loss 0.4608 (1.2164) lr 1.3090e-03 eta 1 day, 3:45:27
epoch [26/50] batch [720/2000] time 1.999 (2.027) data 0.000 (0.001) loss 2.3105 (1.2116) lr 1.3090e-03 eta 1 day, 3:44:48
epoch [26/50] batch [740/2000] time 2.051 (2.027) data 0.000 (0.001) loss 1.1218 (1.2068) lr 1.3090e-03 eta 1 day, 3:44:07
epoch [26/50] batch [760/2000] time 2.048 (2.027) data 0.000 (0.001) loss 1.5867 (1.2047) lr 1.3090e-03 eta 1 day, 3:43:33
epoch [26/50] batch [780/2000] time 2.047 (2.027) data 0.000 (0.001) loss 3.5128 (1.2088) lr 1.3090e-03 eta 1 day, 3:42:51
epoch [26/50] batch [800/2000] time 2.048 (2.027) data 0.000 (0.001) loss 0.2442 (1.2049) lr 1.3090e-03 eta 1 day, 3:42:05
epoch [26/50] batch [820/2000] time 2.049 (2.027) data 0.000 (0.001) loss 0.0906 (1.2084) lr 1.3090e-03 eta 1 day, 3:41:29
epoch [26/50] batch [840/2000] time 2.050 (2.027) data 0.000 (0.001) loss 1.1603 (1.2110) lr 1.3090e-03 eta 1 day, 3:40:47
epoch [26/50] batch [860/2000] time 2.025 (2.027) data 0.000 (0.001) loss 0.4184 (1.2115) lr 1.3090e-03 eta 1 day, 3:40:03
epoch [26/50] batch [880/2000] time 2.025 (2.027) data 0.000 (0.001) loss 1.5666 (1.2146) lr 1.3090e-03 eta 1 day, 3:39:30
epoch [26/50] batch [900/2000] time 2.049 (2.027) data 0.000 (0.001) loss 0.0375 (1.2097) lr 1.3090e-03 eta 1 day, 3:38:48
epoch [26/50] batch [920/2000] time 1.995 (2.027) data 0.000 (0.001) loss 0.4639 (1.2015) lr 1.3090e-03 eta 1 day, 3:38:13
epoch [26/50] batch [940/2000] time 2.027 (2.027) data 0.000 (0.001) loss 0.8877 (1.2027) lr 1.3090e-03 eta 1 day, 3:37:26
epoch [26/50] batch [960/2000] time 2.051 (2.027) data 0.000 (0.001) loss 0.6391 (1.2068) lr 1.3090e-03 eta 1 day, 3:36:43
epoch [26/50] batch [980/2000] time 2.027 (2.027) data 0.000 (0.001) loss 1.6983 (1.2089) lr 1.3090e-03 eta 1 day, 3:35:59
epoch [26/50] batch [1000/2000] time 2.050 (2.027) data 0.000 (0.001) loss 0.6226 (1.2038) lr 1.3090e-03 eta 1 day, 3:35:20
epoch [26/50] batch [1020/2000] time 2.026 (2.027) data 0.000 (0.001) loss 2.0646 (1.2004) lr 1.3090e-03 eta 1 day, 3:34:34
epoch [26/50] batch [1040/2000] time 2.049 (2.027) data 0.000 (0.001) loss 0.6707 (1.1972) lr 1.3090e-03 eta 1 day, 3:33:53
epoch [26/50] batch [1060/2000] time 2.051 (2.027) data 0.000 (0.001) loss 1.2194 (1.1919) lr 1.3090e-03 eta 1 day, 3:33:07
epoch [26/50] batch [1080/2000] time 2.027 (2.027) data 0.000 (0.001) loss 0.9011 (1.1831) lr 1.3090e-03 eta 1 day, 3:32:31
epoch [26/50] batch [1100/2000] time 2.050 (2.027) data 0.000 (0.001) loss 0.8172 (1.1813) lr 1.3090e-03 eta 1 day, 3:31:56
epoch [26/50] batch [1120/2000] time 2.049 (2.027) data 0.000 (0.001) loss 0.4076 (1.1792) lr 1.3090e-03 eta 1 day, 3:31:15
epoch [26/50] batch [1140/2000] time 2.049 (2.027) data 0.001 (0.001) loss 1.1454 (1.1735) lr 1.3090e-03 eta 1 day, 3:30:38
epoch [26/50] batch [1160/2000] time 2.027 (2.027) data 0.000 (0.001) loss 1.2750 (1.1754) lr 1.3090e-03 eta 1 day, 3:29:58
epoch [26/50] batch [1180/2000] time 1.997 (2.027) data 0.000 (0.001) loss 0.4162 (1.1689) lr 1.3090e-03 eta 1 day, 3:29:17
epoch [26/50] batch [1200/2000] time 2.050 (2.027) data 0.000 (0.001) loss 0.1400 (1.1682) lr 1.3090e-03 eta 1 day, 3:28:40
epoch [26/50] batch [1220/2000] time 2.024 (2.027) data 0.000 (0.001) loss 0.7347 (1.1684) lr 1.3090e-03 eta 1 day, 3:28:00
epoch [26/50] batch [1240/2000] time 2.051 (2.027) data 0.000 (0.001) loss 0.7098 (1.1735) lr 1.3090e-03 eta 1 day, 3:27:24
epoch [26/50] batch [1260/2000] time 1.999 (2.027) data 0.000 (0.001) loss 2.6170 (1.1720) lr 1.3090e-03 eta 1 day, 3:26:42
epoch [26/50] batch [1280/2000] time 2.051 (2.027) data 0.000 (0.001) loss 0.7315 (1.1768) lr 1.3090e-03 eta 1 day, 3:25:58
epoch [26/50] batch [1300/2000] time 1.996 (2.027) data 0.000 (0.001) loss 0.9951 (1.1693) lr 1.3090e-03 eta 1 day, 3:25:15
epoch [26/50] batch [1320/2000] time 2.049 (2.027) data 0.000 (0.001) loss 0.1349 (1.1684) lr 1.3090e-03 eta 1 day, 3:24:34
epoch [26/50] batch [1340/2000] time 2.051 (2.027) data 0.000 (0.001) loss 0.1092 (1.1681) lr 1.3090e-03 eta 1 day, 3:23:55
epoch [26/50] batch [1360/2000] time 2.056 (2.027) data 0.000 (0.001) loss 2.6327 (1.1656) lr 1.3090e-03 eta 1 day, 3:23:17
epoch [26/50] batch [1380/2000] time 2.054 (2.027) data 0.000 (0.001) loss 0.0950 (1.1636) lr 1.3090e-03 eta 1 day, 3:22:35
epoch [26/50] batch [1400/2000] time 2.030 (2.027) data 0.000 (0.001) loss 2.3615 (1.1642) lr 1.3090e-03 eta 1 day, 3:21:56
epoch [26/50] batch [1420/2000] time 2.048 (2.027) data 0.000 (0.001) loss 0.5702 (1.1667) lr 1.3090e-03 eta 1 day, 3:21:15
epoch [26/50] batch [1440/2000] time 1.995 (2.027) data 0.000 (0.001) loss 1.3915 (1.1658) lr 1.3090e-03 eta 1 day, 3:20:34
epoch [26/50] batch [1460/2000] time 1.996 (2.027) data 0.000 (0.001) loss 0.5596 (1.1658) lr 1.3090e-03 eta 1 day, 3:19:50
epoch [26/50] batch [1480/2000] time 2.025 (2.027) data 0.000 (0.001) loss 0.9578 (1.1682) lr 1.3090e-03 eta 1 day, 3:19:07
epoch [26/50] batch [1500/2000] time 2.028 (2.027) data 0.000 (0.001) loss 0.0604 (1.1694) lr 1.3090e-03 eta 1 day, 3:18:29
epoch [26/50] batch [1520/2000] time 2.026 (2.027) data 0.000 (0.001) loss 1.1163 (1.1706) lr 1.3090e-03 eta 1 day, 3:17:45
epoch [26/50] batch [1540/2000] time 1.998 (2.027) data 0.000 (0.001) loss 0.0203 (1.1727) lr 1.3090e-03 eta 1 day, 3:17:03
epoch [26/50] batch [1560/2000] time 1.974 (2.027) data 0.000 (0.001) loss 1.9871 (1.1683) lr 1.3090e-03 eta 1 day, 3:16:21
epoch [26/50] batch [1580/2000] time 2.052 (2.027) data 0.000 (0.001) loss 0.0609 (1.1702) lr 1.3090e-03 eta 1 day, 3:15:39
epoch [26/50] batch [1600/2000] time 2.050 (2.027) data 0.000 (0.001) loss 1.3883 (1.1712) lr 1.3090e-03 eta 1 day, 3:14:58
epoch [26/50] batch [1620/2000] time 1.998 (2.027) data 0.000 (0.001) loss 0.6521 (1.1686) lr 1.3090e-03 eta 1 day, 3:14:16
epoch [26/50] batch [1640/2000] time 1.998 (2.027) data 0.000 (0.001) loss 0.7323 (1.1670) lr 1.3090e-03 eta 1 day, 3:13:35
epoch [26/50] batch [1660/2000] time 2.049 (2.027) data 0.000 (0.001) loss 1.5226 (1.1661) lr 1.3090e-03 eta 1 day, 3:12:56
epoch [26/50] batch [1680/2000] time 2.052 (2.027) data 0.001 (0.001) loss 3.3866 (1.1689) lr 1.3090e-03 eta 1 day, 3:12:17
epoch [26/50] batch [1700/2000] time 2.057 (2.027) data 0.000 (0.001) loss 0.8570 (1.1693) lr 1.3090e-03 eta 1 day, 3:11:39
epoch [26/50] batch [1720/2000] time 2.035 (2.027) data 0.000 (0.001) loss 0.1436 (1.1685) lr 1.3090e-03 eta 1 day, 3:11:01
epoch [26/50] batch [1740/2000] time 2.031 (2.027) data 0.000 (0.001) loss 0.6325 (1.1650) lr 1.3090e-03 eta 1 day, 3:10:21
epoch [26/50] batch [1760/2000] time 2.029 (2.027) data 0.000 (0.001) loss 1.2188 (1.1689) lr 1.3090e-03 eta 1 day, 3:09:39
epoch [26/50] batch [1780/2000] time 2.053 (2.027) data 0.000 (0.001) loss 2.5448 (1.1676) lr 1.3090e-03 eta 1 day, 3:09:04
epoch [26/50] batch [1800/2000] time 2.049 (2.027) data 0.000 (0.000) loss 0.8038 (1.1677) lr 1.3090e-03 eta 1 day, 3:08:22
epoch [26/50] batch [1820/2000] time 2.054 (2.027) data 0.000 (0.000) loss 1.5628 (1.1669) lr 1.3090e-03 eta 1 day, 3:07:40
epoch [26/50] batch [1840/2000] time 2.049 (2.027) data 0.000 (0.000) loss 0.3300 (1.1697) lr 1.3090e-03 eta 1 day, 3:07:07
epoch [26/50] batch [1860/2000] time 2.028 (2.027) data 0.000 (0.000) loss 1.4729 (1.1700) lr 1.3090e-03 eta 1 day, 3:06:25
epoch [26/50] batch [1880/2000] time 2.026 (2.027) data 0.000 (0.000) loss 2.1190 (1.1709) lr 1.3090e-03 eta 1 day, 3:05:47
epoch [26/50] batch [1900/2000] time 2.025 (2.027) data 0.000 (0.000) loss 0.9183 (1.1678) lr 1.3090e-03 eta 1 day, 3:05:04
epoch [26/50] batch [1920/2000] time 2.026 (2.027) data 0.000 (0.000) loss 4.8488 (1.1709) lr 1.3090e-03 eta 1 day, 3:04:23
epoch [26/50] batch [1940/2000] time 1.996 (2.027) data 0.000 (0.000) loss 0.9761 (1.1692) lr 1.3090e-03 eta 1 day, 3:03:42
epoch [26/50] batch [1960/2000] time 1.996 (2.027) data 0.000 (0.000) loss 1.4597 (1.1712) lr 1.3090e-03 eta 1 day, 3:03:01
epoch [26/50] batch [1980/2000] time 2.050 (2.027) data 0.000 (0.000) loss 0.5060 (1.1742) lr 1.3090e-03 eta 1 day, 3:02:18
epoch [26/50] batch [2000/2000] time 2.046 (2.027) data 0.000 (0.000) loss 0.1590 (1.1749) lr 1.2487e-03 eta 1 day, 3:01:34
epoch [27/50] batch [20/2000] time 2.048 (2.058) data 0.000 (0.028) loss 1.3988 (1.3464) lr 1.2487e-03 eta 1 day, 3:25:26
epoch [27/50] batch [40/2000] time 2.054 (2.046) data 0.000 (0.014) loss 2.6168 (1.2647) lr 1.2487e-03 eta 1 day, 3:15:05
epoch [27/50] batch [60/2000] time 2.028 (2.040) data 0.001 (0.009) loss 0.9760 (1.2422) lr 1.2487e-03 eta 1 day, 3:10:20
epoch [27/50] batch [80/2000] time 2.050 (2.039) data 0.000 (0.007) loss 1.0938 (1.2141) lr 1.2487e-03 eta 1 day, 3:08:21
epoch [27/50] batch [100/2000] time 1.997 (2.036) data 0.000 (0.006) loss 1.6346 (1.1802) lr 1.2487e-03 eta 1 day, 3:05:47
epoch [27/50] batch [120/2000] time 2.051 (2.036) data 0.000 (0.005) loss 1.3017 (1.1627) lr 1.2487e-03 eta 1 day, 3:04:30
epoch [27/50] batch [140/2000] time 1.997 (2.035) data 0.000 (0.004) loss 0.4498 (1.1709) lr 1.2487e-03 eta 1 day, 3:03:28
epoch [27/50] batch [160/2000] time 2.025 (2.035) data 0.000 (0.004) loss 1.3989 (1.1547) lr 1.2487e-03 eta 1 day, 3:02:15
epoch [27/50] batch [180/2000] time 2.048 (2.033) data 0.000 (0.003) loss 0.1843 (1.1607) lr 1.2487e-03 eta 1 day, 3:00:36
epoch [27/50] batch [200/2000] time 2.029 (2.033) data 0.000 (0.003) loss 0.5589 (1.1940) lr 1.2487e-03 eta 1 day, 2:59:37
epoch [27/50] batch [220/2000] time 2.001 (2.032) data 0.000 (0.003) loss 0.6141 (1.2027) lr 1.2487e-03 eta 1 day, 2:58:28
epoch [27/50] batch [240/2000] time 1.978 (2.032) data 0.000 (0.002) loss 2.5732 (1.2231) lr 1.2487e-03 eta 1 day, 2:57:41
epoch [27/50] batch [260/2000] time 2.034 (2.032) data 0.000 (0.002) loss 1.5838 (1.2303) lr 1.2487e-03 eta 1 day, 2:57:08
epoch [27/50] batch [280/2000] time 2.029 (2.033) data 0.000 (0.002) loss 0.5019 (1.2293) lr 1.2487e-03 eta 1 day, 2:56:50
epoch [27/50] batch [300/2000] time 2.052 (2.033) data 0.000 (0.002) loss 0.0872 (1.2094) lr 1.2487e-03 eta 1 day, 2:56:20
epoch [27/50] batch [320/2000] time 2.050 (2.034) data 0.000 (0.002) loss 0.8883 (1.2172) lr 1.2487e-03 eta 1 day, 2:56:00
epoch [27/50] batch [340/2000] time 2.028 (2.033) data 0.000 (0.002) loss 4.2904 (1.2417) lr 1.2487e-03 eta 1 day, 2:55:08
epoch [27/50] batch [360/2000] time 2.047 (2.033) data 0.000 (0.002) loss 0.3848 (1.2288) lr 1.2487e-03 eta 1 day, 2:54:31
epoch [27/50] batch [380/2000] time 2.047 (2.033) data 0.000 (0.002) loss 1.3321 (1.2360) lr 1.2487e-03 eta 1 day, 2:53:32
epoch [27/50] batch [400/2000] time 2.030 (2.033) data 0.000 (0.002) loss 0.6987 (1.2388) lr 1.2487e-03 eta 1 day, 2:52:28
epoch [27/50] batch [420/2000] time 2.002 (2.033) data 0.000 (0.002) loss 0.2905 (1.2500) lr 1.2487e-03 eta 1 day, 2:51:47
epoch [27/50] batch [440/2000] time 1.994 (2.032) data 0.000 (0.001) loss 1.5572 (1.2418) lr 1.2487e-03 eta 1 day, 2:50:54
epoch [27/50] batch [460/2000] time 2.025 (2.032) data 0.000 (0.001) loss 1.5154 (1.2503) lr 1.2487e-03 eta 1 day, 2:50:00
epoch [27/50] batch [480/2000] time 2.028 (2.032) data 0.000 (0.001) loss 0.1601 (1.2522) lr 1.2487e-03 eta 1 day, 2:49:00
epoch [27/50] batch [500/2000] time 2.027 (2.032) data 0.000 (0.001) loss 0.6916 (1.2483) lr 1.2487e-03 eta 1 day, 2:48:20
epoch [27/50] batch [520/2000] time 2.056 (2.031) data 0.000 (0.001) loss 1.3248 (1.2467) lr 1.2487e-03 eta 1 day, 2:47:27
epoch [27/50] batch [540/2000] time 2.027 (2.031) data 0.000 (0.001) loss 1.0590 (1.2412) lr 1.2487e-03 eta 1 day, 2:46:46
epoch [27/50] batch [560/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.4703 (1.2447) lr 1.2487e-03 eta 1 day, 2:46:09
epoch [27/50] batch [580/2000] time 2.048 (2.031) data 0.000 (0.001) loss 1.7457 (1.2403) lr 1.2487e-03 eta 1 day, 2:45:19
epoch [27/50] batch [600/2000] time 2.050 (2.031) data 0.001 (0.001) loss 0.8436 (1.2445) lr 1.2487e-03 eta 1 day, 2:44:34
epoch [27/50] batch [620/2000] time 2.048 (2.031) data 0.000 (0.001) loss 1.4744 (1.2499) lr 1.2487e-03 eta 1 day, 2:43:46
epoch [27/50] batch [640/2000] time 1.971 (2.031) data 0.000 (0.001) loss 1.9194 (1.2496) lr 1.2487e-03 eta 1 day, 2:42:57
epoch [27/50] batch [660/2000] time 1.999 (2.031) data 0.000 (0.001) loss 1.9062 (1.2552) lr 1.2487e-03 eta 1 day, 2:42:10
epoch [27/50] batch [680/2000] time 1.977 (2.030) data 0.000 (0.001) loss 1.9676 (1.2710) lr 1.2487e-03 eta 1 day, 2:41:17
epoch [27/50] batch [700/2000] time 2.050 (2.030) data 0.000 (0.001) loss 2.0337 (1.2686) lr 1.2487e-03 eta 1 day, 2:40:37
epoch [27/50] batch [720/2000] time 2.049 (2.030) data 0.000 (0.001) loss 3.0635 (1.2642) lr 1.2487e-03 eta 1 day, 2:39:56
epoch [27/50] batch [740/2000] time 2.052 (2.030) data 0.000 (0.001) loss 2.0220 (1.2628) lr 1.2487e-03 eta 1 day, 2:39:10
epoch [27/50] batch [760/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.6177 (1.2653) lr 1.2487e-03 eta 1 day, 2:38:30
epoch [27/50] batch [780/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.1392 (1.2649) lr 1.2487e-03 eta 1 day, 2:37:54
epoch [27/50] batch [800/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.3184 (1.2631) lr 1.2487e-03 eta 1 day, 2:37:10
epoch [27/50] batch [820/2000] time 1.995 (2.030) data 0.000 (0.001) loss 0.5526 (1.2674) lr 1.2487e-03 eta 1 day, 2:36:29
epoch [27/50] batch [840/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.5588 (1.2704) lr 1.2487e-03 eta 1 day, 2:35:41
epoch [27/50] batch [860/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.4612 (1.2630) lr 1.2487e-03 eta 1 day, 2:35:01
epoch [27/50] batch [880/2000] time 2.029 (2.030) data 0.000 (0.001) loss 0.6748 (1.2575) lr 1.2487e-03 eta 1 day, 2:34:18
epoch [27/50] batch [900/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.0203 (1.2573) lr 1.2487e-03 eta 1 day, 2:33:36
epoch [27/50] batch [920/2000] time 2.048 (2.030) data 0.000 (0.001) loss 0.3834 (1.2603) lr 1.2487e-03 eta 1 day, 2:33:01
epoch [27/50] batch [940/2000] time 2.045 (2.030) data 0.000 (0.001) loss 0.7007 (1.2550) lr 1.2487e-03 eta 1 day, 2:32:24
epoch [27/50] batch [960/2000] time 2.046 (2.030) data 0.000 (0.001) loss 2.2242 (1.2576) lr 1.2487e-03 eta 1 day, 2:31:36
epoch [27/50] batch [980/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.7771 (1.2637) lr 1.2487e-03 eta 1 day, 2:30:55
epoch [27/50] batch [1000/2000] time 2.028 (2.030) data 0.000 (0.001) loss 2.4243 (1.2627) lr 1.2487e-03 eta 1 day, 2:30:11
epoch [27/50] batch [1020/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.8164 (1.2613) lr 1.2487e-03 eta 1 day, 2:29:28
epoch [27/50] batch [1040/2000] time 2.048 (2.030) data 0.000 (0.001) loss 1.4175 (1.2517) lr 1.2487e-03 eta 1 day, 2:28:52
epoch [27/50] batch [1060/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.1652 (1.2512) lr 1.2487e-03 eta 1 day, 2:28:09
epoch [27/50] batch [1080/2000] time 2.030 (2.030) data 0.000 (0.001) loss 1.8100 (1.2501) lr 1.2487e-03 eta 1 day, 2:27:32
epoch [27/50] batch [1100/2000] time 2.048 (2.030) data 0.000 (0.001) loss 1.9109 (1.2566) lr 1.2487e-03 eta 1 day, 2:26:44
epoch [27/50] batch [1120/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.1753 (1.2542) lr 1.2487e-03 eta 1 day, 2:26:01
epoch [27/50] batch [1140/2000] time 1.995 (2.030) data 0.001 (0.001) loss 1.1907 (1.2501) lr 1.2487e-03 eta 1 day, 2:25:18
epoch [27/50] batch [1160/2000] time 2.025 (2.030) data 0.000 (0.001) loss 1.6922 (1.2491) lr 1.2487e-03 eta 1 day, 2:24:32
epoch [27/50] batch [1180/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.4302 (1.2525) lr 1.2487e-03 eta 1 day, 2:23:47
epoch [27/50] batch [1200/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.0630 (1.2482) lr 1.2487e-03 eta 1 day, 2:23:07
epoch [27/50] batch [1220/2000] time 1.997 (2.030) data 0.000 (0.001) loss 2.3348 (1.2523) lr 1.2487e-03 eta 1 day, 2:22:29
epoch [27/50] batch [1240/2000] time 2.051 (2.030) data 0.000 (0.001) loss 2.0711 (1.2510) lr 1.2487e-03 eta 1 day, 2:21:53
epoch [27/50] batch [1260/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.2516 (1.2468) lr 1.2487e-03 eta 1 day, 2:21:09
epoch [27/50] batch [1280/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.5119 (1.2448) lr 1.2487e-03 eta 1 day, 2:20:27
epoch [27/50] batch [1300/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.5635 (1.2447) lr 1.2487e-03 eta 1 day, 2:19:51
epoch [27/50] batch [1320/2000] time 1.974 (2.030) data 0.000 (0.001) loss 1.2456 (1.2421) lr 1.2487e-03 eta 1 day, 2:19:08
epoch [27/50] batch [1340/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.8641 (1.2376) lr 1.2487e-03 eta 1 day, 2:18:27
epoch [27/50] batch [1360/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.3389 (1.2361) lr 1.2487e-03 eta 1 day, 2:17:41
epoch [27/50] batch [1380/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.5038 (1.2338) lr 1.2487e-03 eta 1 day, 2:16:56
epoch [27/50] batch [1400/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.5869 (1.2307) lr 1.2487e-03 eta 1 day, 2:16:11
epoch [27/50] batch [1420/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.1988 (1.2277) lr 1.2487e-03 eta 1 day, 2:15:30
epoch [27/50] batch [1440/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.3167 (1.2284) lr 1.2487e-03 eta 1 day, 2:14:50
epoch [27/50] batch [1460/2000] time 2.046 (2.029) data 0.000 (0.001) loss 0.6993 (1.2237) lr 1.2487e-03 eta 1 day, 2:14:10
epoch [27/50] batch [1480/2000] time 1.975 (2.029) data 0.000 (0.001) loss 0.1453 (1.2205) lr 1.2487e-03 eta 1 day, 2:13:28
epoch [27/50] batch [1500/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.1127 (1.2199) lr 1.2487e-03 eta 1 day, 2:12:45
epoch [27/50] batch [1520/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.4241 (1.2134) lr 1.2487e-03 eta 1 day, 2:12:08
epoch [27/50] batch [1540/2000] time 2.058 (2.029) data 0.000 (0.001) loss 0.9766 (1.2114) lr 1.2487e-03 eta 1 day, 2:11:30
epoch [27/50] batch [1560/2000] time 2.040 (2.030) data 0.000 (0.001) loss 2.5274 (1.2095) lr 1.2487e-03 eta 1 day, 2:10:51
epoch [27/50] batch [1580/2000] time 2.059 (2.030) data 0.000 (0.001) loss 0.1398 (1.2084) lr 1.2487e-03 eta 1 day, 2:10:10
epoch [27/50] batch [1600/2000] time 2.056 (2.030) data 0.000 (0.001) loss 3.1348 (1.2091) lr 1.2487e-03 eta 1 day, 2:09:29
epoch [27/50] batch [1620/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.5549 (1.2082) lr 1.2487e-03 eta 1 day, 2:08:53
epoch [27/50] batch [1640/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.9608 (1.2093) lr 1.2487e-03 eta 1 day, 2:08:15
epoch [27/50] batch [1660/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.9641 (1.2088) lr 1.2487e-03 eta 1 day, 2:07:35
epoch [27/50] batch [1680/2000] time 2.057 (2.030) data 0.001 (0.001) loss 1.1999 (1.2102) lr 1.2487e-03 eta 1 day, 2:06:59
epoch [27/50] batch [1700/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.6375 (1.2094) lr 1.2487e-03 eta 1 day, 2:06:20
epoch [27/50] batch [1720/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.7720 (1.2070) lr 1.2487e-03 eta 1 day, 2:05:35
epoch [27/50] batch [1740/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.6843 (1.2041) lr 1.2487e-03 eta 1 day, 2:04:52
epoch [27/50] batch [1760/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.6388 (1.2093) lr 1.2487e-03 eta 1 day, 2:04:06
epoch [27/50] batch [1780/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.2554 (1.2080) lr 1.2487e-03 eta 1 day, 2:03:25
epoch [27/50] batch [1800/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.5971 (1.2078) lr 1.2487e-03 eta 1 day, 2:02:40
epoch [27/50] batch [1820/2000] time 2.048 (2.029) data 0.000 (0.000) loss 1.2093 (1.2044) lr 1.2487e-03 eta 1 day, 2:01:57
epoch [27/50] batch [1840/2000] time 1.997 (2.029) data 0.000 (0.000) loss 0.4276 (1.1999) lr 1.2487e-03 eta 1 day, 2:01:15
epoch [27/50] batch [1860/2000] time 1.996 (2.029) data 0.000 (0.000) loss 1.0459 (1.2005) lr 1.2487e-03 eta 1 day, 2:00:35
epoch [27/50] batch [1880/2000] time 2.050 (2.029) data 0.000 (0.000) loss 0.3173 (1.2007) lr 1.2487e-03 eta 1 day, 1:59:51
epoch [27/50] batch [1900/2000] time 2.051 (2.029) data 0.000 (0.000) loss 1.6816 (1.1993) lr 1.2487e-03 eta 1 day, 1:59:07
epoch [27/50] batch [1920/2000] time 2.049 (2.029) data 0.000 (0.000) loss 1.5688 (1.1991) lr 1.2487e-03 eta 1 day, 1:58:22
epoch [27/50] batch [1940/2000] time 2.028 (2.029) data 0.000 (0.000) loss 0.0798 (1.1963) lr 1.2487e-03 eta 1 day, 1:57:45
epoch [27/50] batch [1960/2000] time 2.050 (2.029) data 0.000 (0.000) loss 1.1380 (1.1970) lr 1.2487e-03 eta 1 day, 1:57:04
epoch [27/50] batch [1980/2000] time 2.026 (2.029) data 0.000 (0.000) loss 0.8329 (1.1993) lr 1.2487e-03 eta 1 day, 1:56:21
epoch [27/50] batch [2000/2000] time 2.047 (2.029) data 0.000 (0.000) loss 0.9419 (1.2004) lr 1.1874e-03 eta 1 day, 1:55:37
epoch [28/50] batch [20/2000] time 2.054 (2.057) data 0.000 (0.028) loss 1.1714 (1.2737) lr 1.1874e-03 eta 1 day, 2:15:59
epoch [28/50] batch [40/2000] time 1.999 (2.044) data 0.000 (0.014) loss 4.4234 (1.3572) lr 1.1874e-03 eta 1 day, 2:05:59
epoch [28/50] batch [60/2000] time 2.001 (2.042) data 0.001 (0.009) loss 0.4703 (1.3742) lr 1.1874e-03 eta 1 day, 2:03:08
epoch [28/50] batch [80/2000] time 2.049 (2.038) data 0.000 (0.007) loss 0.4058 (1.3315) lr 1.1874e-03 eta 1 day, 1:59:51
epoch [28/50] batch [100/2000] time 2.051 (2.036) data 0.000 (0.006) loss 1.1657 (1.2915) lr 1.1874e-03 eta 1 day, 1:57:21
epoch [28/50] batch [120/2000] time 2.051 (2.035) data 0.000 (0.005) loss 0.6288 (1.2643) lr 1.1874e-03 eta 1 day, 1:55:54
epoch [28/50] batch [140/2000] time 1.973 (2.034) data 0.000 (0.004) loss 1.7958 (1.2200) lr 1.1874e-03 eta 1 day, 1:54:58
epoch [28/50] batch [160/2000] time 2.050 (2.033) data 0.000 (0.004) loss 1.4921 (1.2266) lr 1.1874e-03 eta 1 day, 1:53:07
epoch [28/50] batch [180/2000] time 1.999 (2.031) data 0.000 (0.003) loss 3.7005 (1.1966) lr 1.1874e-03 eta 1 day, 1:51:21
epoch [28/50] batch [200/2000] time 1.972 (2.030) data 0.000 (0.003) loss 0.1297 (1.1803) lr 1.1874e-03 eta 1 day, 1:49:56
epoch [28/50] batch [220/2000] time 2.026 (2.030) data 0.000 (0.003) loss 1.7751 (1.1747) lr 1.1874e-03 eta 1 day, 1:48:44
epoch [28/50] batch [240/2000] time 2.030 (2.030) data 0.000 (0.002) loss 0.5669 (1.1634) lr 1.1874e-03 eta 1 day, 1:47:57
epoch [28/50] batch [260/2000] time 2.048 (2.030) data 0.000 (0.002) loss 3.0120 (1.1716) lr 1.1874e-03 eta 1 day, 1:47:22
epoch [28/50] batch [280/2000] time 2.027 (2.030) data 0.000 (0.002) loss 0.7921 (1.1762) lr 1.1874e-03 eta 1 day, 1:46:41
epoch [28/50] batch [300/2000] time 1.994 (2.030) data 0.000 (0.002) loss 1.7464 (1.1656) lr 1.1874e-03 eta 1 day, 1:45:58
epoch [28/50] batch [320/2000] time 2.048 (2.029) data 0.000 (0.002) loss 0.8763 (1.1587) lr 1.1874e-03 eta 1 day, 1:44:48
epoch [28/50] batch [340/2000] time 1.973 (2.029) data 0.000 (0.002) loss 1.3721 (1.1519) lr 1.1874e-03 eta 1 day, 1:43:59
epoch [28/50] batch [360/2000] time 1.976 (2.029) data 0.000 (0.002) loss 1.0660 (1.1487) lr 1.1874e-03 eta 1 day, 1:43:05
epoch [28/50] batch [380/2000] time 2.049 (2.028) data 0.000 (0.002) loss 2.0655 (1.1342) lr 1.1874e-03 eta 1 day, 1:42:19
epoch [28/50] batch [400/2000] time 1.973 (2.028) data 0.000 (0.002) loss 1.2389 (1.1380) lr 1.1874e-03 eta 1 day, 1:41:30
epoch [28/50] batch [420/2000] time 2.025 (2.028) data 0.000 (0.001) loss 1.5959 (1.1465) lr 1.1874e-03 eta 1 day, 1:40:53
epoch [28/50] batch [440/2000] time 2.024 (2.028) data 0.000 (0.001) loss 0.3355 (1.1486) lr 1.1874e-03 eta 1 day, 1:40:12
epoch [28/50] batch [460/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.3517 (1.1439) lr 1.1874e-03 eta 1 day, 1:39:37
epoch [28/50] batch [480/2000] time 2.048 (2.028) data 0.000 (0.001) loss 2.0047 (1.1352) lr 1.1874e-03 eta 1 day, 1:38:48
epoch [28/50] batch [500/2000] time 1.995 (2.028) data 0.000 (0.001) loss 0.5050 (1.1363) lr 1.1874e-03 eta 1 day, 1:38:04
epoch [28/50] batch [520/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.6588 (1.1248) lr 1.1874e-03 eta 1 day, 1:37:25
epoch [28/50] batch [540/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.1873 (1.1222) lr 1.1874e-03 eta 1 day, 1:36:38
epoch [28/50] batch [560/2000] time 1.995 (2.028) data 0.000 (0.001) loss 0.1609 (1.1157) lr 1.1874e-03 eta 1 day, 1:35:56
epoch [28/50] batch [580/2000] time 2.047 (2.028) data 0.000 (0.001) loss 2.3333 (1.1104) lr 1.1874e-03 eta 1 day, 1:35:10
epoch [28/50] batch [600/2000] time 2.000 (2.028) data 0.001 (0.001) loss 2.3912 (1.1069) lr 1.1874e-03 eta 1 day, 1:34:26
epoch [28/50] batch [620/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.4042 (1.1094) lr 1.1874e-03 eta 1 day, 1:33:42
epoch [28/50] batch [640/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.2533 (1.1250) lr 1.1874e-03 eta 1 day, 1:32:56
epoch [28/50] batch [660/2000] time 2.028 (2.028) data 0.000 (0.001) loss 0.3257 (1.1318) lr 1.1874e-03 eta 1 day, 1:32:17
epoch [28/50] batch [680/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.0374 (1.1280) lr 1.1874e-03 eta 1 day, 1:31:41
epoch [28/50] batch [700/2000] time 2.026 (2.028) data 0.000 (0.001) loss 2.4738 (1.1318) lr 1.1874e-03 eta 1 day, 1:30:52
epoch [28/50] batch [720/2000] time 1.975 (2.028) data 0.000 (0.001) loss 2.0072 (1.1301) lr 1.1874e-03 eta 1 day, 1:30:07
epoch [28/50] batch [740/2000] time 2.047 (2.028) data 0.000 (0.001) loss 2.8060 (1.1221) lr 1.1874e-03 eta 1 day, 1:29:25
epoch [28/50] batch [760/2000] time 1.994 (2.027) data 0.000 (0.001) loss 0.5130 (1.1242) lr 1.1874e-03 eta 1 day, 1:28:36
epoch [28/50] batch [780/2000] time 1.995 (2.027) data 0.000 (0.001) loss 0.2151 (1.1286) lr 1.1874e-03 eta 1 day, 1:27:52
epoch [28/50] batch [800/2000] time 2.050 (2.027) data 0.000 (0.001) loss 1.6757 (1.1315) lr 1.1874e-03 eta 1 day, 1:27:09
epoch [28/50] batch [820/2000] time 2.025 (2.027) data 0.000 (0.001) loss 1.2672 (1.1335) lr 1.1874e-03 eta 1 day, 1:26:27
epoch [28/50] batch [840/2000] time 2.026 (2.027) data 0.000 (0.001) loss 1.0193 (1.1384) lr 1.1874e-03 eta 1 day, 1:25:42
epoch [28/50] batch [860/2000] time 1.970 (2.027) data 0.000 (0.001) loss 0.9687 (1.1323) lr 1.1874e-03 eta 1 day, 1:24:56
epoch [28/50] batch [880/2000] time 2.024 (2.027) data 0.000 (0.001) loss 0.1759 (1.1267) lr 1.1874e-03 eta 1 day, 1:24:18
epoch [28/50] batch [900/2000] time 2.050 (2.027) data 0.000 (0.001) loss 0.7211 (1.1283) lr 1.1874e-03 eta 1 day, 1:23:35
epoch [28/50] batch [920/2000] time 2.049 (2.027) data 0.000 (0.001) loss 0.6360 (1.1288) lr 1.1874e-03 eta 1 day, 1:22:52
epoch [28/50] batch [940/2000] time 2.000 (2.027) data 0.000 (0.001) loss 1.5413 (1.1215) lr 1.1874e-03 eta 1 day, 1:22:08
epoch [28/50] batch [960/2000] time 2.025 (2.027) data 0.000 (0.001) loss 0.1971 (1.1193) lr 1.1874e-03 eta 1 day, 1:21:30
epoch [28/50] batch [980/2000] time 2.000 (2.027) data 0.000 (0.001) loss 0.0301 (1.1139) lr 1.1874e-03 eta 1 day, 1:20:48
epoch [28/50] batch [1000/2000] time 2.001 (2.027) data 0.000 (0.001) loss 0.3545 (1.1175) lr 1.1874e-03 eta 1 day, 1:20:20
epoch [28/50] batch [1020/2000] time 2.027 (2.027) data 0.000 (0.001) loss 1.4511 (1.1195) lr 1.1874e-03 eta 1 day, 1:19:48
epoch [28/50] batch [1040/2000] time 1.995 (2.027) data 0.000 (0.001) loss 0.7128 (1.1204) lr 1.1874e-03 eta 1 day, 1:19:10
epoch [28/50] batch [1060/2000] time 2.050 (2.027) data 0.000 (0.001) loss 1.4869 (1.1199) lr 1.1874e-03 eta 1 day, 1:18:31
epoch [28/50] batch [1080/2000] time 1.997 (2.027) data 0.000 (0.001) loss 1.6948 (1.1242) lr 1.1874e-03 eta 1 day, 1:17:50
epoch [28/50] batch [1100/2000] time 2.053 (2.027) data 0.000 (0.001) loss 0.3997 (1.1237) lr 1.1874e-03 eta 1 day, 1:17:11
epoch [28/50] batch [1120/2000] time 2.055 (2.027) data 0.000 (0.001) loss 0.6166 (1.1267) lr 1.1874e-03 eta 1 day, 1:16:33
epoch [28/50] batch [1140/2000] time 2.051 (2.027) data 0.001 (0.001) loss 0.7666 (1.1259) lr 1.1874e-03 eta 1 day, 1:15:50
epoch [28/50] batch [1160/2000] time 1.973 (2.027) data 0.000 (0.001) loss 3.8223 (1.1315) lr 1.1874e-03 eta 1 day, 1:15:06
epoch [28/50] batch [1180/2000] time 1.970 (2.027) data 0.000 (0.001) loss 1.5268 (1.1342) lr 1.1874e-03 eta 1 day, 1:14:29
epoch [28/50] batch [1200/2000] time 2.048 (2.027) data 0.000 (0.001) loss 2.2195 (1.1430) lr 1.1874e-03 eta 1 day, 1:13:48
epoch [28/50] batch [1220/2000] time 2.026 (2.027) data 0.000 (0.001) loss 1.5330 (1.1400) lr 1.1874e-03 eta 1 day, 1:12:57
epoch [28/50] batch [1240/2000] time 2.048 (2.027) data 0.000 (0.001) loss 1.3456 (1.1398) lr 1.1874e-03 eta 1 day, 1:12:21
epoch [28/50] batch [1260/2000] time 2.023 (2.027) data 0.000 (0.001) loss 3.1944 (1.1445) lr 1.1874e-03 eta 1 day, 1:11:36
epoch [28/50] batch [1280/2000] time 2.047 (2.027) data 0.000 (0.001) loss 1.5929 (1.1461) lr 1.1874e-03 eta 1 day, 1:10:55
epoch [28/50] batch [1300/2000] time 2.054 (2.027) data 0.000 (0.001) loss 1.1137 (1.1489) lr 1.1874e-03 eta 1 day, 1:10:18
epoch [28/50] batch [1320/2000] time 2.054 (2.027) data 0.000 (0.001) loss 0.5653 (1.1543) lr 1.1874e-03 eta 1 day, 1:09:42
epoch [28/50] batch [1340/2000] time 1.976 (2.027) data 0.000 (0.001) loss 1.2126 (1.1554) lr 1.1874e-03 eta 1 day, 1:09:03
epoch [28/50] batch [1360/2000] time 1.998 (2.027) data 0.000 (0.001) loss 1.1530 (1.1566) lr 1.1874e-03 eta 1 day, 1:08:22
epoch [28/50] batch [1380/2000] time 1.996 (2.027) data 0.000 (0.001) loss 0.2925 (1.1537) lr 1.1874e-03 eta 1 day, 1:07:38
epoch [28/50] batch [1400/2000] time 2.028 (2.027) data 0.000 (0.001) loss 0.5393 (1.1538) lr 1.1874e-03 eta 1 day, 1:07:00
epoch [28/50] batch [1420/2000] time 2.051 (2.027) data 0.000 (0.001) loss 0.3665 (1.1508) lr 1.1874e-03 eta 1 day, 1:06:19
epoch [28/50] batch [1440/2000] time 1.997 (2.027) data 0.000 (0.001) loss 1.2617 (1.1559) lr 1.1874e-03 eta 1 day, 1:05:40
epoch [28/50] batch [1460/2000] time 1.975 (2.027) data 0.000 (0.001) loss 2.3514 (1.1584) lr 1.1874e-03 eta 1 day, 1:05:01
epoch [28/50] batch [1480/2000] time 2.051 (2.027) data 0.000 (0.001) loss 3.3366 (1.1603) lr 1.1874e-03 eta 1 day, 1:04:23
epoch [28/50] batch [1500/2000] time 2.048 (2.027) data 0.000 (0.001) loss 1.1713 (1.1586) lr 1.1874e-03 eta 1 day, 1:03:39
epoch [28/50] batch [1520/2000] time 2.032 (2.027) data 0.000 (0.001) loss 0.7880 (1.1603) lr 1.1874e-03 eta 1 day, 1:02:56
epoch [28/50] batch [1540/2000] time 2.051 (2.027) data 0.000 (0.001) loss 0.7251 (1.1571) lr 1.1874e-03 eta 1 day, 1:02:17
epoch [28/50] batch [1560/2000] time 2.026 (2.027) data 0.000 (0.001) loss 0.3918 (1.1576) lr 1.1874e-03 eta 1 day, 1:01:33
epoch [28/50] batch [1580/2000] time 1.994 (2.027) data 0.000 (0.001) loss 1.9975 (1.1612) lr 1.1874e-03 eta 1 day, 1:00:55
epoch [28/50] batch [1600/2000] time 1.994 (2.027) data 0.000 (0.001) loss 4.6436 (1.1620) lr 1.1874e-03 eta 1 day, 1:00:16
epoch [28/50] batch [1620/2000] time 2.048 (2.027) data 0.000 (0.001) loss 2.5667 (1.1610) lr 1.1874e-03 eta 1 day, 0:59:34
epoch [28/50] batch [1640/2000] time 2.026 (2.027) data 0.000 (0.001) loss 0.4418 (1.1571) lr 1.1874e-03 eta 1 day, 0:58:48
epoch [28/50] batch [1660/2000] time 2.049 (2.027) data 0.000 (0.001) loss 0.6830 (1.1594) lr 1.1874e-03 eta 1 day, 0:58:05
epoch [28/50] batch [1680/2000] time 2.051 (2.027) data 0.001 (0.001) loss 0.6639 (1.1578) lr 1.1874e-03 eta 1 day, 0:57:23
epoch [28/50] batch [1700/2000] time 2.028 (2.027) data 0.000 (0.001) loss 0.0188 (1.1542) lr 1.1874e-03 eta 1 day, 0:56:41
epoch [28/50] batch [1720/2000] time 2.048 (2.027) data 0.000 (0.001) loss 0.2156 (1.1562) lr 1.1874e-03 eta 1 day, 0:55:57
epoch [28/50] batch [1740/2000] time 1.994 (2.027) data 0.000 (0.001) loss 1.5276 (1.1606) lr 1.1874e-03 eta 1 day, 0:55:17
epoch [28/50] batch [1760/2000] time 2.025 (2.027) data 0.000 (0.001) loss 0.9373 (1.1646) lr 1.1874e-03 eta 1 day, 0:54:35
epoch [28/50] batch [1780/2000] time 1.996 (2.027) data 0.000 (0.000) loss 2.1432 (1.1643) lr 1.1874e-03 eta 1 day, 0:53:56
epoch [28/50] batch [1800/2000] time 1.970 (2.027) data 0.000 (0.000) loss 0.2169 (1.1616) lr 1.1874e-03 eta 1 day, 0:53:11
epoch [28/50] batch [1820/2000] time 2.047 (2.027) data 0.000 (0.000) loss 2.5200 (1.1628) lr 1.1874e-03 eta 1 day, 0:52:30
epoch [28/50] batch [1840/2000] time 2.050 (2.027) data 0.000 (0.000) loss 0.9675 (1.1641) lr 1.1874e-03 eta 1 day, 0:51:52
epoch [28/50] batch [1860/2000] time 2.049 (2.027) data 0.000 (0.000) loss 0.8181 (1.1666) lr 1.1874e-03 eta 1 day, 0:51:15
epoch [28/50] batch [1880/2000] time 2.027 (2.027) data 0.000 (0.000) loss 1.5359 (1.1646) lr 1.1874e-03 eta 1 day, 0:50:31
epoch [28/50] batch [1900/2000] time 2.025 (2.027) data 0.000 (0.000) loss 1.6793 (1.1668) lr 1.1874e-03 eta 1 day, 0:49:51
epoch [28/50] batch [1920/2000] time 2.050 (2.027) data 0.000 (0.000) loss 1.5937 (1.1681) lr 1.1874e-03 eta 1 day, 0:49:13
epoch [28/50] batch [1940/2000] time 2.047 (2.027) data 0.000 (0.000) loss 1.7391 (1.1687) lr 1.1874e-03 eta 1 day, 0:48:35
epoch [28/50] batch [1960/2000] time 2.049 (2.027) data 0.000 (0.000) loss 0.2341 (1.1668) lr 1.1874e-03 eta 1 day, 0:47:55
epoch [28/50] batch [1980/2000] time 1.996 (2.027) data 0.000 (0.000) loss 2.1719 (1.1688) lr 1.1874e-03 eta 1 day, 0:47:12
epoch [28/50] batch [2000/2000] time 2.049 (2.027) data 0.000 (0.000) loss 1.1709 (1.1725) lr 1.1253e-03 eta 1 day, 0:46:32
epoch [29/50] batch [20/2000] time 1.974 (2.051) data 0.000 (0.028) loss 2.1555 (1.1550) lr 1.1253e-03 eta 1 day, 1:03:01
epoch [29/50] batch [40/2000] time 1.975 (2.044) data 0.000 (0.014) loss 3.4595 (1.2926) lr 1.1253e-03 eta 1 day, 0:57:30
epoch [29/50] batch [60/2000] time 2.049 (2.040) data 0.001 (0.010) loss 0.9670 (1.4299) lr 1.1253e-03 eta 1 day, 0:53:41
epoch [29/50] batch [80/2000] time 2.048 (2.038) data 0.000 (0.007) loss 0.3531 (1.3412) lr 1.1253e-03 eta 1 day, 0:51:41
epoch [29/50] batch [100/2000] time 1.972 (2.034) data 0.000 (0.006) loss 0.6844 (1.2811) lr 1.1253e-03 eta 1 day, 0:48:30
epoch [29/50] batch [120/2000] time 2.048 (2.034) data 0.000 (0.005) loss 3.0781 (1.2677) lr 1.1253e-03 eta 1 day, 0:47:29
epoch [29/50] batch [140/2000] time 1.996 (2.033) data 0.000 (0.004) loss 2.0453 (1.2591) lr 1.1253e-03 eta 1 day, 0:46:06
epoch [29/50] batch [160/2000] time 2.048 (2.032) data 0.000 (0.004) loss 2.2949 (1.2924) lr 1.1253e-03 eta 1 day, 0:44:30
epoch [29/50] batch [180/2000] time 2.025 (2.032) data 0.000 (0.003) loss 2.0522 (1.2686) lr 1.1253e-03 eta 1 day, 0:43:45
epoch [29/50] batch [200/2000] time 1.971 (2.031) data 0.000 (0.003) loss 0.2644 (1.2529) lr 1.1253e-03 eta 1 day, 0:42:20
epoch [29/50] batch [220/2000] time 2.046 (2.030) data 0.000 (0.003) loss 0.9395 (1.2477) lr 1.1253e-03 eta 1 day, 0:40:58
epoch [29/50] batch [240/2000] time 1.993 (2.030) data 0.000 (0.003) loss 2.2883 (1.2337) lr 1.1253e-03 eta 1 day, 0:40:13
epoch [29/50] batch [260/2000] time 2.046 (2.030) data 0.000 (0.002) loss 0.5646 (1.2167) lr 1.1253e-03 eta 1 day, 0:39:45
epoch [29/50] batch [280/2000] time 1.996 (2.029) data 0.000 (0.002) loss 2.1205 (1.2141) lr 1.1253e-03 eta 1 day, 0:38:33
epoch [29/50] batch [300/2000] time 2.051 (2.029) data 0.000 (0.002) loss 0.8291 (1.2021) lr 1.1253e-03 eta 1 day, 0:37:56
epoch [29/50] batch [320/2000] time 2.049 (2.029) data 0.000 (0.002) loss 0.7815 (1.1998) lr 1.1253e-03 eta 1 day, 0:36:58
epoch [29/50] batch [340/2000] time 2.049 (2.029) data 0.000 (0.002) loss 0.0648 (1.2003) lr 1.1253e-03 eta 1 day, 0:36:31
epoch [29/50] batch [360/2000] time 2.026 (2.029) data 0.000 (0.002) loss 1.7923 (1.2155) lr 1.1253e-03 eta 1 day, 0:35:30
epoch [29/50] batch [380/2000] time 2.047 (2.029) data 0.000 (0.002) loss 0.6746 (1.2030) lr 1.1253e-03 eta 1 day, 0:34:46
epoch [29/50] batch [400/2000] time 2.024 (2.028) data 0.000 (0.002) loss 0.0468 (1.1984) lr 1.1253e-03 eta 1 day, 0:33:43
epoch [29/50] batch [420/2000] time 1.996 (2.028) data 0.000 (0.002) loss 0.2184 (1.1909) lr 1.1253e-03 eta 1 day, 0:32:57
epoch [29/50] batch [440/2000] time 2.053 (2.028) data 0.000 (0.001) loss 2.3192 (1.1854) lr 1.1253e-03 eta 1 day, 0:32:13
epoch [29/50] batch [460/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.5694 (1.1859) lr 1.1253e-03 eta 1 day, 0:31:34
epoch [29/50] batch [480/2000] time 2.025 (2.028) data 0.000 (0.001) loss 1.5737 (1.1937) lr 1.1253e-03 eta 1 day, 0:30:53
epoch [29/50] batch [500/2000] time 2.025 (2.028) data 0.000 (0.001) loss 0.7836 (1.1824) lr 1.1253e-03 eta 1 day, 0:30:08
epoch [29/50] batch [520/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.5522 (1.1811) lr 1.1253e-03 eta 1 day, 0:29:28
epoch [29/50] batch [540/2000] time 1.997 (2.028) data 0.000 (0.001) loss 0.6854 (1.1881) lr 1.1253e-03 eta 1 day, 0:28:48
epoch [29/50] batch [560/2000] time 2.056 (2.028) data 0.000 (0.001) loss 0.6749 (1.1943) lr 1.1253e-03 eta 1 day, 0:28:08
epoch [29/50] batch [580/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.7794 (1.1981) lr 1.1253e-03 eta 1 day, 0:27:41
epoch [29/50] batch [600/2000] time 2.024 (2.028) data 0.001 (0.001) loss 1.7589 (1.1868) lr 1.1253e-03 eta 1 day, 0:26:50
epoch [29/50] batch [620/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.5969 (1.1892) lr 1.1253e-03 eta 1 day, 0:26:14
epoch [29/50] batch [640/2000] time 1.997 (2.028) data 0.000 (0.001) loss 0.4422 (1.1816) lr 1.1253e-03 eta 1 day, 0:25:39
epoch [29/50] batch [660/2000] time 1.975 (2.028) data 0.000 (0.001) loss 2.5404 (1.1793) lr 1.1253e-03 eta 1 day, 0:24:59
epoch [29/50] batch [680/2000] time 2.047 (2.028) data 0.000 (0.001) loss 1.0880 (1.1844) lr 1.1253e-03 eta 1 day, 0:24:16
epoch [29/50] batch [700/2000] time 1.994 (2.028) data 0.000 (0.001) loss 0.0729 (1.1835) lr 1.1253e-03 eta 1 day, 0:23:35
epoch [29/50] batch [720/2000] time 2.049 (2.028) data 0.000 (0.001) loss 2.3943 (1.1843) lr 1.1253e-03 eta 1 day, 0:22:53
epoch [29/50] batch [740/2000] time 2.047 (2.028) data 0.000 (0.001) loss 3.2634 (1.1895) lr 1.1253e-03 eta 1 day, 0:22:09
epoch [29/50] batch [760/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.2684 (1.1834) lr 1.1253e-03 eta 1 day, 0:21:25
epoch [29/50] batch [780/2000] time 1.994 (2.028) data 0.000 (0.001) loss 0.6438 (1.1764) lr 1.1253e-03 eta 1 day, 0:20:40
epoch [29/50] batch [800/2000] time 1.993 (2.028) data 0.000 (0.001) loss 0.7024 (1.1744) lr 1.1253e-03 eta 1 day, 0:20:05
epoch [29/50] batch [820/2000] time 1.971 (2.028) data 0.000 (0.001) loss 0.0474 (1.1684) lr 1.1253e-03 eta 1 day, 0:19:19
epoch [29/50] batch [840/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.2485 (1.1685) lr 1.1253e-03 eta 1 day, 0:18:41
epoch [29/50] batch [860/2000] time 2.052 (2.028) data 0.000 (0.001) loss 1.1972 (1.1671) lr 1.1253e-03 eta 1 day, 0:18:00
epoch [29/50] batch [880/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.4368 (1.1594) lr 1.1253e-03 eta 1 day, 0:17:22
epoch [29/50] batch [900/2000] time 2.025 (2.028) data 0.000 (0.001) loss 0.6141 (1.1688) lr 1.1253e-03 eta 1 day, 0:16:48
epoch [29/50] batch [920/2000] time 1.997 (2.028) data 0.000 (0.001) loss 0.3751 (1.1675) lr 1.1253e-03 eta 1 day, 0:16:01
epoch [29/50] batch [940/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.7061 (1.1679) lr 1.1253e-03 eta 1 day, 0:15:23
epoch [29/50] batch [960/2000] time 2.054 (2.028) data 0.000 (0.001) loss 2.2922 (1.1657) lr 1.1253e-03 eta 1 day, 0:14:43
epoch [29/50] batch [980/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.6976 (1.1735) lr 1.1253e-03 eta 1 day, 0:14:09
epoch [29/50] batch [1000/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.0257 (1.1724) lr 1.1253e-03 eta 1 day, 0:13:35
epoch [29/50] batch [1020/2000] time 2.050 (2.028) data 0.000 (0.001) loss 3.4163 (1.1732) lr 1.1253e-03 eta 1 day, 0:12:54
epoch [29/50] batch [1040/2000] time 1.999 (2.028) data 0.000 (0.001) loss 2.2032 (1.1724) lr 1.1253e-03 eta 1 day, 0:12:16
epoch [29/50] batch [1060/2000] time 1.995 (2.028) data 0.000 (0.001) loss 3.4538 (1.1712) lr 1.1253e-03 eta 1 day, 0:11:37
epoch [29/50] batch [1080/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.7095 (1.1658) lr 1.1253e-03 eta 1 day, 0:10:59
epoch [29/50] batch [1100/2000] time 2.053 (2.028) data 0.000 (0.001) loss 0.0783 (1.1716) lr 1.1253e-03 eta 1 day, 0:10:19
epoch [29/50] batch [1120/2000] time 2.027 (2.029) data 0.000 (0.001) loss 2.0710 (1.1795) lr 1.1253e-03 eta 1 day, 0:09:42
epoch [29/50] batch [1140/2000] time 2.001 (2.029) data 0.001 (0.001) loss 1.9861 (1.1823) lr 1.1253e-03 eta 1 day, 0:09:03
epoch [29/50] batch [1160/2000] time 1.971 (2.029) data 0.000 (0.001) loss 0.7111 (1.1808) lr 1.1253e-03 eta 1 day, 0:08:29
epoch [29/50] batch [1180/2000] time 2.023 (2.029) data 0.000 (0.001) loss 0.8398 (1.1768) lr 1.1253e-03 eta 1 day, 0:07:47
epoch [29/50] batch [1200/2000] time 2.024 (2.029) data 0.000 (0.001) loss 2.3430 (1.1814) lr 1.1253e-03 eta 1 day, 0:07:04
epoch [29/50] batch [1220/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.8321 (1.1872) lr 1.1253e-03 eta 1 day, 0:06:22
epoch [29/50] batch [1240/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.3698 (1.1826) lr 1.1253e-03 eta 1 day, 0:05:36
epoch [29/50] batch [1260/2000] time 1.997 (2.028) data 0.000 (0.001) loss 0.4429 (1.1833) lr 1.1253e-03 eta 1 day, 0:04:53
epoch [29/50] batch [1280/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.5653 (1.1854) lr 1.1253e-03 eta 1 day, 0:04:13
epoch [29/50] batch [1300/2000] time 1.997 (2.028) data 0.000 (0.001) loss 0.4028 (1.1860) lr 1.1253e-03 eta 1 day, 0:03:34
epoch [29/50] batch [1320/2000] time 1.997 (2.028) data 0.000 (0.001) loss 0.3023 (1.1858) lr 1.1253e-03 eta 1 day, 0:02:51
epoch [29/50] batch [1340/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.0762 (1.1860) lr 1.1253e-03 eta 1 day, 0:02:09
epoch [29/50] batch [1360/2000] time 2.056 (2.028) data 0.000 (0.001) loss 0.3178 (1.1870) lr 1.1253e-03 eta 1 day, 0:01:25
epoch [29/50] batch [1380/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.4198 (1.1817) lr 1.1253e-03 eta 1 day, 0:00:46
epoch [29/50] batch [1400/2000] time 2.030 (2.028) data 0.000 (0.001) loss 0.1169 (1.1820) lr 1.1253e-03 eta 1 day, 0:00:09
epoch [29/50] batch [1420/2000] time 1.995 (2.028) data 0.000 (0.001) loss 1.3887 (1.1792) lr 1.1253e-03 eta 23:59:29
epoch [29/50] batch [1440/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.0758 (1.1771) lr 1.1253e-03 eta 23:58:48
epoch [29/50] batch [1460/2000] time 1.999 (2.028) data 0.000 (0.001) loss 1.6626 (1.1796) lr 1.1253e-03 eta 23:58:09
epoch [29/50] batch [1480/2000] time 1.973 (2.028) data 0.000 (0.001) loss 2.6542 (1.1826) lr 1.1253e-03 eta 23:57:25
epoch [29/50] batch [1500/2000] time 2.034 (2.028) data 0.000 (0.001) loss 1.6991 (1.1810) lr 1.1253e-03 eta 23:56:47
epoch [29/50] batch [1520/2000] time 2.000 (2.028) data 0.000 (0.001) loss 0.4358 (1.1852) lr 1.1253e-03 eta 23:56:09
epoch [29/50] batch [1540/2000] time 1.996 (2.028) data 0.000 (0.001) loss 2.0956 (1.1882) lr 1.1253e-03 eta 23:55:28
epoch [29/50] batch [1560/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.0227 (1.1862) lr 1.1253e-03 eta 23:54:44
epoch [29/50] batch [1580/2000] time 2.047 (2.028) data 0.000 (0.001) loss 0.6124 (1.1817) lr 1.1253e-03 eta 23:54:02
epoch [29/50] batch [1600/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.6534 (1.1785) lr 1.1253e-03 eta 23:53:20
epoch [29/50] batch [1620/2000] time 1.974 (2.028) data 0.000 (0.001) loss 0.5664 (1.1743) lr 1.1253e-03 eta 23:52:40
epoch [29/50] batch [1640/2000] time 2.046 (2.028) data 0.000 (0.001) loss 1.5877 (1.1777) lr 1.1253e-03 eta 23:52:02
epoch [29/50] batch [1660/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.2738 (1.1758) lr 1.1253e-03 eta 23:51:17
epoch [29/50] batch [1680/2000] time 2.046 (2.028) data 0.001 (0.001) loss 1.5805 (1.1755) lr 1.1253e-03 eta 23:50:34
epoch [29/50] batch [1700/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.6250 (1.1727) lr 1.1253e-03 eta 23:49:57
epoch [29/50] batch [1720/2000] time 2.052 (2.028) data 0.000 (0.001) loss 2.2463 (1.1709) lr 1.1253e-03 eta 23:49:14
epoch [29/50] batch [1740/2000] time 2.025 (2.028) data 0.000 (0.001) loss 0.5361 (1.1761) lr 1.1253e-03 eta 23:48:29
epoch [29/50] batch [1760/2000] time 1.996 (2.028) data 0.000 (0.001) loss 1.3075 (1.1790) lr 1.1253e-03 eta 23:47:49
epoch [29/50] batch [1780/2000] time 2.053 (2.028) data 0.000 (0.001) loss 0.8973 (1.1802) lr 1.1253e-03 eta 23:47:11
epoch [29/50] batch [1800/2000] time 2.027 (2.028) data 0.000 (0.001) loss 1.3512 (1.1773) lr 1.1253e-03 eta 23:46:31
epoch [29/50] batch [1820/2000] time 2.046 (2.028) data 0.000 (0.000) loss 3.1099 (1.1786) lr 1.1253e-03 eta 23:45:52
epoch [29/50] batch [1840/2000] time 2.046 (2.028) data 0.000 (0.000) loss 0.4622 (1.1746) lr 1.1253e-03 eta 23:45:10
epoch [29/50] batch [1860/2000] time 1.993 (2.028) data 0.000 (0.000) loss 1.2010 (1.1758) lr 1.1253e-03 eta 23:44:28
epoch [29/50] batch [1880/2000] time 2.048 (2.028) data 0.000 (0.000) loss 2.6620 (1.1807) lr 1.1253e-03 eta 23:43:50
epoch [29/50] batch [1900/2000] time 2.051 (2.028) data 0.000 (0.000) loss 1.0913 (1.1810) lr 1.1253e-03 eta 23:43:08
epoch [29/50] batch [1920/2000] time 2.028 (2.028) data 0.000 (0.000) loss 0.5418 (1.1794) lr 1.1253e-03 eta 23:42:29
epoch [29/50] batch [1940/2000] time 2.051 (2.028) data 0.000 (0.000) loss 0.0811 (1.1778) lr 1.1253e-03 eta 23:41:47
epoch [29/50] batch [1960/2000] time 2.048 (2.028) data 0.000 (0.000) loss 1.3520 (1.1782) lr 1.1253e-03 eta 23:41:08
epoch [29/50] batch [1980/2000] time 2.050 (2.028) data 0.000 (0.000) loss 1.1172 (1.1787) lr 1.1253e-03 eta 23:40:27
epoch [29/50] batch [2000/2000] time 1.993 (2.028) data 0.000 (0.000) loss 4.5581 (1.1827) lr 1.0628e-03 eta 23:39:48
epoch [30/50] batch [20/2000] time 2.025 (2.059) data 0.000 (0.028) loss 0.8744 (1.3362) lr 1.0628e-03 eta 1 day, 0:00:52
epoch [30/50] batch [40/2000] time 1.995 (2.042) data 0.000 (0.014) loss 2.6495 (1.5633) lr 1.0628e-03 eta 23:47:46
epoch [30/50] batch [60/2000] time 2.047 (2.036) data 0.001 (0.009) loss 0.6031 (1.3421) lr 1.0628e-03 eta 23:43:13
epoch [30/50] batch [80/2000] time 2.026 (2.032) data 0.000 (0.007) loss 1.7579 (1.2395) lr 1.0628e-03 eta 23:40:00
epoch [30/50] batch [100/2000] time 2.052 (2.032) data 0.000 (0.006) loss 0.2294 (1.1529) lr 1.0628e-03 eta 23:39:01
epoch [30/50] batch [120/2000] time 2.047 (2.032) data 0.000 (0.005) loss 2.4449 (1.1358) lr 1.0628e-03 eta 23:38:01
epoch [30/50] batch [140/2000] time 2.046 (2.031) data 0.000 (0.004) loss 1.6912 (1.1148) lr 1.0628e-03 eta 23:36:40
epoch [30/50] batch [160/2000] time 1.997 (2.030) data 0.000 (0.004) loss 0.7187 (1.0856) lr 1.0628e-03 eta 23:35:50
epoch [30/50] batch [180/2000] time 1.997 (2.029) data 0.000 (0.003) loss 0.8455 (1.0942) lr 1.0628e-03 eta 23:34:23
epoch [30/50] batch [200/2000] time 1.997 (2.030) data 0.000 (0.003) loss 1.4016 (1.0981) lr 1.0628e-03 eta 23:34:19
epoch [30/50] batch [220/2000] time 2.054 (2.030) data 0.000 (0.003) loss 0.1293 (1.1153) lr 1.0628e-03 eta 23:33:38
epoch [30/50] batch [240/2000] time 2.029 (2.030) data 0.000 (0.003) loss 0.6280 (1.1433) lr 1.0628e-03 eta 23:33:03
epoch [30/50] batch [260/2000] time 1.973 (2.030) data 0.000 (0.002) loss 0.4791 (1.1482) lr 1.0628e-03 eta 23:32:04
epoch [30/50] batch [280/2000] time 2.051 (2.030) data 0.000 (0.002) loss 1.8077 (1.1421) lr 1.0628e-03 eta 23:31:26
epoch [30/50] batch [300/2000] time 2.048 (2.029) data 0.000 (0.002) loss 0.7112 (1.1355) lr 1.0628e-03 eta 23:30:27
epoch [30/50] batch [320/2000] time 1.996 (2.030) data 0.000 (0.002) loss 0.2651 (1.1394) lr 1.0628e-03 eta 23:29:49
epoch [30/50] batch [340/2000] time 2.031 (2.030) data 0.000 (0.002) loss 0.8024 (1.1305) lr 1.0628e-03 eta 23:29:25
epoch [30/50] batch [360/2000] time 2.056 (2.030) data 0.000 (0.002) loss 1.2827 (1.1423) lr 1.0628e-03 eta 23:28:42
epoch [30/50] batch [380/2000] time 1.996 (2.030) data 0.000 (0.002) loss 1.5360 (1.1394) lr 1.0628e-03 eta 23:27:53
epoch [30/50] batch [400/2000] time 2.001 (2.030) data 0.000 (0.002) loss 2.4364 (1.1645) lr 1.0628e-03 eta 23:27:09
epoch [30/50] batch [420/2000] time 2.000 (2.030) data 0.000 (0.002) loss 1.0981 (1.1549) lr 1.0628e-03 eta 23:26:30
epoch [30/50] batch [440/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.1653 (1.1305) lr 1.0628e-03 eta 23:25:57
epoch [30/50] batch [460/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.5047 (1.1344) lr 1.0628e-03 eta 23:25:08
epoch [30/50] batch [480/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.7887 (1.1550) lr 1.0628e-03 eta 23:24:24
epoch [30/50] batch [500/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.0832 (1.1668) lr 1.0628e-03 eta 23:23:44
epoch [30/50] batch [520/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.3678 (1.1626) lr 1.0628e-03 eta 23:23:05
epoch [30/50] batch [540/2000] time 2.077 (2.030) data 0.000 (0.001) loss 1.8830 (1.1662) lr 1.0628e-03 eta 23:22:25
epoch [30/50] batch [560/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.0611 (1.1747) lr 1.0628e-03 eta 23:21:43
epoch [30/50] batch [580/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.6290 (1.1759) lr 1.0628e-03 eta 23:21:04
epoch [30/50] batch [600/2000] time 1.999 (2.029) data 0.001 (0.001) loss 1.8138 (1.1689) lr 1.0628e-03 eta 23:20:05
epoch [30/50] batch [620/2000] time 2.030 (2.029) data 0.000 (0.001) loss 0.6582 (1.1614) lr 1.0628e-03 eta 23:19:24
epoch [30/50] batch [640/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.2173 (1.1664) lr 1.0628e-03 eta 23:18:39
epoch [30/50] batch [660/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.4823 (1.1611) lr 1.0628e-03 eta 23:17:50
epoch [30/50] batch [680/2000] time 2.050 (2.029) data 0.000 (0.001) loss 3.3885 (1.1656) lr 1.0628e-03 eta 23:17:06
epoch [30/50] batch [700/2000] time 2.048 (2.029) data 0.000 (0.001) loss 3.1526 (1.1589) lr 1.0628e-03 eta 23:16:34
epoch [30/50] batch [720/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.1110 (1.1571) lr 1.0628e-03 eta 23:15:59
epoch [30/50] batch [740/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.3729 (1.1586) lr 1.0628e-03 eta 23:15:16
epoch [30/50] batch [760/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.0450 (1.1646) lr 1.0628e-03 eta 23:14:26
epoch [30/50] batch [780/2000] time 2.046 (2.029) data 0.000 (0.001) loss 0.1526 (1.1523) lr 1.0628e-03 eta 23:13:40
epoch [30/50] batch [800/2000] time 1.992 (2.029) data 0.000 (0.001) loss 0.6069 (1.1526) lr 1.0628e-03 eta 23:12:56
epoch [30/50] batch [820/2000] time 1.994 (2.028) data 0.000 (0.001) loss 2.4643 (1.1641) lr 1.0628e-03 eta 23:12:07
epoch [30/50] batch [840/2000] time 2.024 (2.028) data 0.000 (0.001) loss 1.4198 (1.1613) lr 1.0628e-03 eta 23:11:18
epoch [30/50] batch [860/2000] time 2.057 (2.028) data 0.000 (0.001) loss 0.4792 (1.1592) lr 1.0628e-03 eta 23:10:42
epoch [30/50] batch [880/2000] time 2.006 (2.028) data 0.000 (0.001) loss 0.8010 (1.1547) lr 1.0628e-03 eta 23:10:07
epoch [30/50] batch [900/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.6709 (1.1500) lr 1.0628e-03 eta 23:09:31
epoch [30/50] batch [920/2000] time 2.058 (2.029) data 0.000 (0.001) loss 0.7677 (1.1431) lr 1.0628e-03 eta 23:08:52
epoch [30/50] batch [940/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.7294 (1.1453) lr 1.0628e-03 eta 23:08:17
epoch [30/50] batch [960/2000] time 2.024 (2.029) data 0.000 (0.001) loss 0.0407 (1.1426) lr 1.0628e-03 eta 23:07:33
epoch [30/50] batch [980/2000] time 2.046 (2.029) data 0.000 (0.001) loss 1.7951 (1.1407) lr 1.0628e-03 eta 23:06:49
epoch [30/50] batch [1000/2000] time 2.045 (2.029) data 0.000 (0.001) loss 0.9749 (1.1363) lr 1.0628e-03 eta 23:06:09
epoch [30/50] batch [1020/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.3147 (1.1375) lr 1.0628e-03 eta 23:05:25
epoch [30/50] batch [1040/2000] time 2.026 (2.028) data 0.000 (0.001) loss 1.0834 (1.1351) lr 1.0628e-03 eta 23:04:43
epoch [30/50] batch [1060/2000] time 2.023 (2.028) data 0.000 (0.001) loss 0.1772 (1.1393) lr 1.0628e-03 eta 23:03:55
epoch [30/50] batch [1080/2000] time 1.995 (2.028) data 0.000 (0.001) loss 1.2604 (1.1490) lr 1.0628e-03 eta 23:03:10
epoch [30/50] batch [1100/2000] time 2.052 (2.028) data 0.000 (0.001) loss 2.2061 (1.1510) lr 1.0628e-03 eta 23:02:29
epoch [30/50] batch [1120/2000] time 2.051 (2.028) data 0.000 (0.001) loss 2.3266 (1.1513) lr 1.0628e-03 eta 23:01:48
epoch [30/50] batch [1140/2000] time 2.049 (2.028) data 0.001 (0.001) loss 1.7059 (1.1545) lr 1.0628e-03 eta 23:01:06
epoch [30/50] batch [1160/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.7745 (1.1557) lr 1.0628e-03 eta 23:00:26
epoch [30/50] batch [1180/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.6980 (1.1515) lr 1.0628e-03 eta 22:59:44
epoch [30/50] batch [1200/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.7056 (1.1561) lr 1.0628e-03 eta 22:59:03
epoch [30/50] batch [1220/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.5530 (1.1551) lr 1.0628e-03 eta 22:58:18
epoch [30/50] batch [1240/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.5927 (1.1515) lr 1.0628e-03 eta 22:57:36
epoch [30/50] batch [1260/2000] time 1.998 (2.028) data 0.000 (0.001) loss 2.2684 (1.1540) lr 1.0628e-03 eta 22:56:53
epoch [30/50] batch [1280/2000] time 2.055 (2.028) data 0.000 (0.001) loss 1.1645 (1.1549) lr 1.0628e-03 eta 22:56:16
epoch [30/50] batch [1300/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.3807 (1.1527) lr 1.0628e-03 eta 22:55:39
epoch [30/50] batch [1320/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.4510 (1.1514) lr 1.0628e-03 eta 22:55:02
epoch [30/50] batch [1340/2000] time 2.033 (2.028) data 0.000 (0.001) loss 0.9726 (1.1505) lr 1.0628e-03 eta 22:54:20
epoch [30/50] batch [1360/2000] time 2.055 (2.028) data 0.000 (0.001) loss 0.3997 (1.1445) lr 1.0628e-03 eta 22:53:43
epoch [30/50] batch [1380/2000] time 1.974 (2.028) data 0.000 (0.001) loss 1.1551 (1.1449) lr 1.0628e-03 eta 22:53:02
epoch [30/50] batch [1400/2000] time 2.058 (2.028) data 0.000 (0.001) loss 0.4309 (1.1427) lr 1.0628e-03 eta 22:52:24
epoch [30/50] batch [1420/2000] time 2.034 (2.028) data 0.000 (0.001) loss 1.6811 (1.1407) lr 1.0628e-03 eta 22:51:49
epoch [30/50] batch [1440/2000] time 2.055 (2.028) data 0.000 (0.001) loss 1.5198 (1.1417) lr 1.0628e-03 eta 22:51:09
epoch [30/50] batch [1460/2000] time 2.058 (2.029) data 0.000 (0.001) loss 0.6129 (1.1490) lr 1.0628e-03 eta 22:50:35
epoch [30/50] batch [1480/2000] time 2.053 (2.029) data 0.000 (0.001) loss 2.4910 (1.1499) lr 1.0628e-03 eta 22:49:56
epoch [30/50] batch [1500/2000] time 2.026 (2.029) data 0.000 (0.001) loss 2.1197 (1.1499) lr 1.0628e-03 eta 22:49:20
epoch [30/50] batch [1520/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.5549 (1.1527) lr 1.0628e-03 eta 22:48:41
epoch [30/50] batch [1540/2000] time 2.056 (2.029) data 0.000 (0.001) loss 1.3916 (1.1540) lr 1.0628e-03 eta 22:48:01
epoch [30/50] batch [1560/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.8992 (1.1563) lr 1.0628e-03 eta 22:47:23
epoch [30/50] batch [1580/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.9363 (1.1566) lr 1.0628e-03 eta 22:46:45
epoch [30/50] batch [1600/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.4644 (1.1558) lr 1.0628e-03 eta 22:46:02
epoch [30/50] batch [1620/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.6458 (1.1522) lr 1.0628e-03 eta 22:45:20
epoch [30/50] batch [1640/2000] time 2.057 (2.029) data 0.000 (0.001) loss 0.5259 (1.1516) lr 1.0628e-03 eta 22:44:42
epoch [30/50] batch [1660/2000] time 2.057 (2.029) data 0.000 (0.001) loss 0.7641 (1.1541) lr 1.0628e-03 eta 22:44:03
epoch [30/50] batch [1680/2000] time 2.003 (2.029) data 0.001 (0.001) loss 1.5954 (1.1505) lr 1.0628e-03 eta 22:43:27
epoch [30/50] batch [1700/2000] time 2.031 (2.029) data 0.000 (0.001) loss 0.2582 (1.1521) lr 1.0628e-03 eta 22:42:50
epoch [30/50] batch [1720/2000] time 2.055 (2.029) data 0.000 (0.001) loss 1.2869 (1.1583) lr 1.0628e-03 eta 22:42:14
epoch [30/50] batch [1740/2000] time 2.060 (2.029) data 0.000 (0.001) loss 1.7215 (1.1568) lr 1.0628e-03 eta 22:41:38
epoch [30/50] batch [1760/2000] time 2.062 (2.029) data 0.000 (0.001) loss 0.2902 (1.1536) lr 1.0628e-03 eta 22:41:04
epoch [30/50] batch [1780/2000] time 2.062 (2.030) data 0.000 (0.001) loss 0.2181 (1.1534) lr 1.0628e-03 eta 22:40:27
epoch [30/50] batch [1800/2000] time 2.004 (2.030) data 0.000 (0.001) loss 1.5264 (1.1551) lr 1.0628e-03 eta 22:39:48
epoch [30/50] batch [1820/2000] time 2.032 (2.030) data 0.000 (0.001) loss 1.8020 (1.1576) lr 1.0628e-03 eta 22:39:09
epoch [30/50] batch [1840/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.2758 (1.1574) lr 1.0628e-03 eta 22:38:31
epoch [30/50] batch [1860/2000] time 2.036 (2.030) data 0.000 (0.001) loss 0.7234 (1.1549) lr 1.0628e-03 eta 22:37:50
epoch [30/50] batch [1880/2000] time 2.038 (2.030) data 0.000 (0.001) loss 0.9704 (1.1513) lr 1.0628e-03 eta 22:37:13
epoch [30/50] batch [1900/2000] time 2.041 (2.030) data 0.000 (0.001) loss 3.0857 (1.1535) lr 1.0628e-03 eta 22:36:39
epoch [30/50] batch [1920/2000] time 2.035 (2.030) data 0.000 (0.001) loss 0.3344 (1.1522) lr 1.0628e-03 eta 22:36:00
epoch [30/50] batch [1940/2000] time 2.033 (2.030) data 0.000 (0.001) loss 0.2545 (1.1507) lr 1.0628e-03 eta 22:35:21
epoch [30/50] batch [1960/2000] time 2.004 (2.030) data 0.000 (0.001) loss 0.1614 (1.1548) lr 1.0628e-03 eta 22:34:43
epoch [30/50] batch [1980/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.0877 (1.1547) lr 1.0628e-03 eta 22:34:02
epoch [30/50] batch [2000/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.0879 (1.1529) lr 1.0000e-03 eta 22:33:24
epoch [31/50] batch [20/2000] time 2.036 (2.063) data 0.000 (0.028) loss 1.8408 (0.9578) lr 1.0000e-03 eta 22:54:23
epoch [31/50] batch [40/2000] time 2.037 (2.047) data 0.000 (0.014) loss 0.4903 (1.0993) lr 1.0000e-03 eta 22:43:02
epoch [31/50] batch [60/2000] time 2.009 (2.043) data 0.001 (0.009) loss 0.0250 (1.1307) lr 1.0000e-03 eta 22:39:55
epoch [31/50] batch [80/2000] time 2.036 (2.045) data 0.000 (0.007) loss 1.6139 (1.1637) lr 1.0000e-03 eta 22:40:54
epoch [31/50] batch [100/2000] time 1.976 (2.042) data 0.000 (0.006) loss 1.6517 (1.1805) lr 1.0000e-03 eta 22:37:47
epoch [31/50] batch [120/2000] time 1.980 (2.042) data 0.000 (0.005) loss 1.1647 (1.2101) lr 1.0000e-03 eta 22:36:57
epoch [31/50] batch [140/2000] time 2.057 (2.041) data 0.000 (0.004) loss 1.2910 (1.2009) lr 1.0000e-03 eta 22:36:12
epoch [31/50] batch [160/2000] time 2.054 (2.040) data 0.000 (0.004) loss 0.7453 (1.2425) lr 1.0000e-03 eta 22:34:40
epoch [31/50] batch [180/2000] time 2.059 (2.040) data 0.000 (0.003) loss 1.2557 (1.2108) lr 1.0000e-03 eta 22:33:37
epoch [31/50] batch [200/2000] time 2.031 (2.039) data 0.000 (0.003) loss 0.1287 (1.2016) lr 1.0000e-03 eta 22:32:30
epoch [31/50] batch [220/2000] time 2.051 (2.038) data 0.000 (0.003) loss 0.3131 (1.1965) lr 1.0000e-03 eta 22:31:28
epoch [31/50] batch [240/2000] time 2.003 (2.038) data 0.000 (0.003) loss 0.6202 (1.1792) lr 1.0000e-03 eta 22:30:24
epoch [31/50] batch [260/2000] time 2.033 (2.037) data 0.000 (0.002) loss 3.9607 (1.1887) lr 1.0000e-03 eta 22:29:22
epoch [31/50] batch [280/2000] time 2.036 (2.037) data 0.000 (0.002) loss 0.9910 (1.1700) lr 1.0000e-03 eta 22:28:40
epoch [31/50] batch [300/2000] time 2.055 (2.037) data 0.000 (0.002) loss 0.6416 (1.1756) lr 1.0000e-03 eta 22:28:03
epoch [31/50] batch [320/2000] time 2.029 (2.037) data 0.000 (0.002) loss 1.5139 (1.1686) lr 1.0000e-03 eta 22:27:01
epoch [31/50] batch [340/2000] time 1.998 (2.037) data 0.000 (0.002) loss 0.2917 (1.1491) lr 1.0000e-03 eta 22:26:13
epoch [31/50] batch [360/2000] time 2.052 (2.036) data 0.000 (0.002) loss 0.7151 (1.1536) lr 1.0000e-03 eta 22:25:06
epoch [31/50] batch [380/2000] time 2.049 (2.036) data 0.000 (0.002) loss 0.5793 (1.1374) lr 1.0000e-03 eta 22:24:09
epoch [31/50] batch [400/2000] time 2.029 (2.035) data 0.000 (0.002) loss 2.2708 (1.1649) lr 1.0000e-03 eta 22:23:09
epoch [31/50] batch [420/2000] time 1.997 (2.035) data 0.000 (0.002) loss 2.2073 (1.1625) lr 1.0000e-03 eta 22:22:27
epoch [31/50] batch [440/2000] time 1.982 (2.035) data 0.000 (0.001) loss 2.8527 (1.1603) lr 1.0000e-03 eta 22:21:53
epoch [31/50] batch [460/2000] time 2.037 (2.035) data 0.000 (0.001) loss 0.5488 (1.1725) lr 1.0000e-03 eta 22:21:12
epoch [31/50] batch [480/2000] time 2.036 (2.035) data 0.000 (0.001) loss 2.9904 (1.1896) lr 1.0000e-03 eta 22:20:31
epoch [31/50] batch [500/2000] time 2.058 (2.036) data 0.000 (0.001) loss 0.8889 (1.1912) lr 1.0000e-03 eta 22:20:04
epoch [31/50] batch [520/2000] time 2.059 (2.036) data 0.000 (0.001) loss 3.1259 (1.2111) lr 1.0000e-03 eta 22:19:29
epoch [31/50] batch [540/2000] time 2.060 (2.036) data 0.000 (0.001) loss 0.6498 (1.2166) lr 1.0000e-03 eta 22:18:58
epoch [31/50] batch [560/2000] time 2.055 (2.036) data 0.000 (0.001) loss 0.4611 (1.2148) lr 1.0000e-03 eta 22:18:12
epoch [31/50] batch [580/2000] time 2.030 (2.036) data 0.000 (0.001) loss 0.7049 (1.2191) lr 1.0000e-03 eta 22:17:19
epoch [31/50] batch [600/2000] time 2.050 (2.035) data 0.001 (0.001) loss 1.8011 (1.2196) lr 1.0000e-03 eta 22:16:24
epoch [31/50] batch [620/2000] time 2.050 (2.035) data 0.000 (0.001) loss 0.4632 (1.2217) lr 1.0000e-03 eta 22:15:37
epoch [31/50] batch [640/2000] time 2.049 (2.035) data 0.000 (0.001) loss 1.6561 (1.2221) lr 1.0000e-03 eta 22:14:51
epoch [31/50] batch [660/2000] time 1.996 (2.035) data 0.000 (0.001) loss 0.5542 (1.2138) lr 1.0000e-03 eta 22:14:07
epoch [31/50] batch [680/2000] time 2.028 (2.035) data 0.000 (0.001) loss 2.1654 (1.2034) lr 1.0000e-03 eta 22:13:17
epoch [31/50] batch [700/2000] time 1.997 (2.034) data 0.000 (0.001) loss 1.7642 (1.1976) lr 1.0000e-03 eta 22:12:31
epoch [31/50] batch [720/2000] time 2.050 (2.034) data 0.000 (0.001) loss 1.9803 (1.1980) lr 1.0000e-03 eta 22:11:52
epoch [31/50] batch [740/2000] time 2.029 (2.034) data 0.000 (0.001) loss 1.7229 (1.2013) lr 1.0000e-03 eta 22:11:09
epoch [31/50] batch [760/2000] time 2.051 (2.034) data 0.000 (0.001) loss 1.9405 (1.1995) lr 1.0000e-03 eta 22:10:29
epoch [31/50] batch [780/2000] time 2.003 (2.034) data 0.000 (0.001) loss 3.1590 (1.1917) lr 1.0000e-03 eta 22:09:48
epoch [31/50] batch [800/2000] time 2.052 (2.034) data 0.000 (0.001) loss 0.7578 (1.1871) lr 1.0000e-03 eta 22:09:03
epoch [31/50] batch [820/2000] time 1.978 (2.034) data 0.000 (0.001) loss 0.2563 (1.1813) lr 1.0000e-03 eta 22:08:15
epoch [31/50] batch [840/2000] time 2.052 (2.034) data 0.000 (0.001) loss 0.2807 (1.1812) lr 1.0000e-03 eta 22:07:33
epoch [31/50] batch [860/2000] time 2.033 (2.034) data 0.000 (0.001) loss 0.3416 (1.1847) lr 1.0000e-03 eta 22:06:48
epoch [31/50] batch [880/2000] time 2.053 (2.034) data 0.000 (0.001) loss 0.8535 (1.1859) lr 1.0000e-03 eta 22:06:05
epoch [31/50] batch [900/2000] time 2.000 (2.034) data 0.000 (0.001) loss 0.8871 (1.1871) lr 1.0000e-03 eta 22:05:20
epoch [31/50] batch [920/2000] time 2.055 (2.034) data 0.000 (0.001) loss 0.5567 (1.1900) lr 1.0000e-03 eta 22:04:37
epoch [31/50] batch [940/2000] time 2.051 (2.034) data 0.000 (0.001) loss 2.4160 (1.1840) lr 1.0000e-03 eta 22:03:54
epoch [31/50] batch [960/2000] time 2.002 (2.033) data 0.000 (0.001) loss 0.6984 (1.1784) lr 1.0000e-03 eta 22:03:04
epoch [31/50] batch [980/2000] time 2.051 (2.033) data 0.000 (0.001) loss 0.0348 (1.1826) lr 1.0000e-03 eta 22:02:26
epoch [31/50] batch [1000/2000] time 2.027 (2.033) data 0.000 (0.001) loss 1.5357 (1.1830) lr 1.0000e-03 eta 22:01:43
epoch [31/50] batch [1020/2000] time 2.054 (2.033) data 0.000 (0.001) loss 1.0827 (1.1880) lr 1.0000e-03 eta 22:01:00
epoch [31/50] batch [1040/2000] time 2.004 (2.033) data 0.000 (0.001) loss 0.6967 (1.1833) lr 1.0000e-03 eta 22:00:21
epoch [31/50] batch [1060/2000] time 2.035 (2.034) data 0.000 (0.001) loss 0.7643 (1.1887) lr 1.0000e-03 eta 21:59:45
epoch [31/50] batch [1080/2000] time 2.055 (2.034) data 0.000 (0.001) loss 0.8873 (1.1903) lr 1.0000e-03 eta 21:59:10
epoch [31/50] batch [1100/2000] time 2.056 (2.034) data 0.000 (0.001) loss 1.0577 (1.1921) lr 1.0000e-03 eta 21:58:27
epoch [31/50] batch [1120/2000] time 2.061 (2.034) data 0.000 (0.001) loss 1.2869 (1.1877) lr 1.0000e-03 eta 21:57:51
epoch [31/50] batch [1140/2000] time 2.057 (2.034) data 0.001 (0.001) loss 2.7394 (1.1929) lr 1.0000e-03 eta 21:57:11
epoch [31/50] batch [1160/2000] time 2.005 (2.034) data 0.000 (0.001) loss 3.1306 (1.1949) lr 1.0000e-03 eta 21:56:29
epoch [31/50] batch [1180/2000] time 2.035 (2.034) data 0.000 (0.001) loss 1.3310 (1.1920) lr 1.0000e-03 eta 21:55:51
epoch [31/50] batch [1200/2000] time 2.005 (2.034) data 0.000 (0.001) loss 0.8700 (1.1905) lr 1.0000e-03 eta 21:55:08
epoch [31/50] batch [1220/2000] time 2.007 (2.034) data 0.000 (0.001) loss 2.5500 (1.1871) lr 1.0000e-03 eta 21:54:28
epoch [31/50] batch [1240/2000] time 2.057 (2.034) data 0.000 (0.001) loss 1.9674 (1.1915) lr 1.0000e-03 eta 21:53:48
epoch [31/50] batch [1260/2000] time 2.054 (2.034) data 0.000 (0.001) loss 0.4051 (1.1880) lr 1.0000e-03 eta 21:53:12
epoch [31/50] batch [1280/2000] time 2.003 (2.034) data 0.000 (0.001) loss 1.9630 (1.1879) lr 1.0000e-03 eta 21:52:30
epoch [31/50] batch [1300/2000] time 2.053 (2.034) data 0.000 (0.001) loss 0.6729 (1.1885) lr 1.0000e-03 eta 21:51:49
epoch [31/50] batch [1320/2000] time 2.054 (2.034) data 0.000 (0.001) loss 2.1811 (1.1899) lr 1.0000e-03 eta 21:51:10
epoch [31/50] batch [1340/2000] time 2.029 (2.034) data 0.000 (0.001) loss 1.2885 (1.1850) lr 1.0000e-03 eta 21:50:24
epoch [31/50] batch [1360/2000] time 2.003 (2.034) data 0.000 (0.001) loss 1.0052 (1.1859) lr 1.0000e-03 eta 21:49:36
epoch [31/50] batch [1380/2000] time 2.061 (2.034) data 0.000 (0.001) loss 3.9269 (1.1886) lr 1.0000e-03 eta 21:48:54
epoch [31/50] batch [1400/2000] time 2.000 (2.034) data 0.000 (0.001) loss 1.1307 (1.1879) lr 1.0000e-03 eta 21:48:13
epoch [31/50] batch [1420/2000] time 2.030 (2.033) data 0.000 (0.001) loss 0.8040 (1.1813) lr 1.0000e-03 eta 21:47:27
epoch [31/50] batch [1440/2000] time 2.059 (2.033) data 0.000 (0.001) loss 1.1013 (1.1799) lr 1.0000e-03 eta 21:46:45
epoch [31/50] batch [1460/2000] time 2.034 (2.033) data 0.000 (0.001) loss 2.2603 (1.1897) lr 1.0000e-03 eta 21:46:03
epoch [31/50] batch [1480/2000] time 2.034 (2.033) data 0.000 (0.001) loss 1.5578 (1.1877) lr 1.0000e-03 eta 21:45:26
epoch [31/50] batch [1500/2000] time 1.975 (2.033) data 0.000 (0.001) loss 1.8979 (1.1839) lr 1.0000e-03 eta 21:44:43
epoch [31/50] batch [1520/2000] time 2.058 (2.033) data 0.000 (0.001) loss 1.3053 (1.1836) lr 1.0000e-03 eta 21:44:02
epoch [31/50] batch [1540/2000] time 2.002 (2.033) data 0.000 (0.001) loss 0.0240 (1.1829) lr 1.0000e-03 eta 21:43:18
epoch [31/50] batch [1560/2000] time 2.055 (2.033) data 0.000 (0.001) loss 0.0756 (1.1792) lr 1.0000e-03 eta 21:42:36
epoch [31/50] batch [1580/2000] time 1.999 (2.033) data 0.000 (0.001) loss 1.7884 (1.1804) lr 1.0000e-03 eta 21:41:52
epoch [31/50] batch [1600/2000] time 2.061 (2.033) data 0.000 (0.001) loss 1.8680 (1.1814) lr 1.0000e-03 eta 21:41:15
epoch [31/50] batch [1620/2000] time 2.065 (2.033) data 0.000 (0.001) loss 0.2052 (1.1828) lr 1.0000e-03 eta 21:40:37
epoch [31/50] batch [1640/2000] time 2.060 (2.033) data 0.000 (0.001) loss 0.4921 (1.1820) lr 1.0000e-03 eta 21:39:56
epoch [31/50] batch [1660/2000] time 2.061 (2.033) data 0.000 (0.001) loss 0.0083 (1.1828) lr 1.0000e-03 eta 21:39:17
epoch [31/50] batch [1680/2000] time 2.063 (2.033) data 0.001 (0.001) loss 1.8953 (1.1833) lr 1.0000e-03 eta 21:38:36
epoch [31/50] batch [1700/2000] time 2.067 (2.033) data 0.000 (0.001) loss 0.6778 (1.1801) lr 1.0000e-03 eta 21:37:59
epoch [31/50] batch [1720/2000] time 1.976 (2.033) data 0.000 (0.001) loss 0.8169 (1.1798) lr 1.0000e-03 eta 21:37:15
epoch [31/50] batch [1740/2000] time 2.001 (2.033) data 0.000 (0.001) loss 1.2293 (1.1767) lr 1.0000e-03 eta 21:36:30
epoch [31/50] batch [1760/2000] time 2.033 (2.033) data 0.000 (0.001) loss 0.4114 (1.1756) lr 1.0000e-03 eta 21:35:51
epoch [31/50] batch [1780/2000] time 2.003 (2.033) data 0.000 (0.001) loss 1.2937 (1.1765) lr 1.0000e-03 eta 21:35:09
epoch [31/50] batch [1800/2000] time 2.036 (2.033) data 0.000 (0.001) loss 0.5894 (1.1780) lr 1.0000e-03 eta 21:34:31
epoch [31/50] batch [1820/2000] time 2.034 (2.033) data 0.000 (0.001) loss 2.9684 (1.1791) lr 1.0000e-03 eta 21:33:52
epoch [31/50] batch [1840/2000] time 2.060 (2.033) data 0.000 (0.001) loss 0.3593 (1.1824) lr 1.0000e-03 eta 21:33:09
epoch [31/50] batch [1860/2000] time 2.033 (2.033) data 0.000 (0.001) loss 1.9182 (1.1867) lr 1.0000e-03 eta 21:32:30
epoch [31/50] batch [1880/2000] time 2.058 (2.033) data 0.000 (0.001) loss 1.8140 (1.1871) lr 1.0000e-03 eta 21:31:50
epoch [31/50] batch [1900/2000] time 2.002 (2.033) data 0.000 (0.001) loss 1.7694 (1.1921) lr 1.0000e-03 eta 21:31:10
epoch [31/50] batch [1920/2000] time 2.059 (2.033) data 0.000 (0.001) loss 0.5241 (1.1957) lr 1.0000e-03 eta 21:30:32
epoch [31/50] batch [1940/2000] time 2.005 (2.033) data 0.000 (0.001) loss 2.4217 (1.1944) lr 1.0000e-03 eta 21:29:51
epoch [31/50] batch [1960/2000] time 2.063 (2.034) data 0.000 (0.001) loss 0.9567 (1.1933) lr 1.0000e-03 eta 21:29:17
epoch [31/50] batch [1980/2000] time 2.005 (2.034) data 0.000 (0.001) loss 2.7300 (1.1941) lr 1.0000e-03 eta 21:28:41
epoch [31/50] batch [2000/2000] time 2.062 (2.034) data 0.000 (0.001) loss 2.9835 (1.1971) lr 9.3721e-04 eta 21:28:03
epoch [32/50] batch [20/2000] time 2.064 (2.070) data 0.000 (0.034) loss 2.3390 (1.1822) lr 9.3721e-04 eta 21:50:28
epoch [32/50] batch [40/2000] time 2.031 (2.049) data 0.000 (0.017) loss 1.0490 (1.1840) lr 9.3721e-04 eta 21:36:12
epoch [32/50] batch [60/2000] time 2.055 (2.044) data 0.001 (0.012) loss 1.0207 (1.1070) lr 9.3721e-04 eta 21:32:17
epoch [32/50] batch [80/2000] time 2.058 (2.043) data 0.000 (0.009) loss 1.7717 (1.0913) lr 9.3721e-04 eta 21:30:59
epoch [32/50] batch [100/2000] time 2.038 (2.042) data 0.000 (0.007) loss 2.1427 (1.1530) lr 9.3721e-04 eta 21:29:50
epoch [32/50] batch [120/2000] time 2.004 (2.042) data 0.000 (0.006) loss 2.2350 (1.1178) lr 9.3721e-04 eta 21:29:09
epoch [32/50] batch [140/2000] time 2.059 (2.041) data 0.000 (0.005) loss 0.7759 (1.1847) lr 9.3721e-04 eta 21:27:50
epoch [32/50] batch [160/2000] time 1.998 (2.039) data 0.000 (0.004) loss 0.3295 (1.1665) lr 9.3721e-04 eta 21:26:09
epoch [32/50] batch [180/2000] time 2.006 (2.039) data 0.000 (0.004) loss 2.3055 (1.1639) lr 9.3721e-04 eta 21:25:28
epoch [32/50] batch [200/2000] time 2.033 (2.039) data 0.000 (0.004) loss 2.4695 (1.1471) lr 9.3721e-04 eta 21:24:22
epoch [32/50] batch [220/2000] time 2.037 (2.039) data 0.000 (0.003) loss 1.1400 (1.2100) lr 9.3721e-04 eta 21:23:55
epoch [32/50] batch [240/2000] time 1.998 (2.039) data 0.000 (0.003) loss 0.5287 (1.2177) lr 9.3721e-04 eta 21:23:01
epoch [32/50] batch [260/2000] time 1.999 (2.038) data 0.000 (0.003) loss 0.6605 (1.2060) lr 9.3721e-04 eta 21:21:51
epoch [32/50] batch [280/2000] time 2.000 (2.038) data 0.000 (0.003) loss 0.8065 (1.2024) lr 9.3721e-04 eta 21:21:04
epoch [32/50] batch [300/2000] time 2.037 (2.038) data 0.000 (0.003) loss 1.4042 (1.1948) lr 9.3721e-04 eta 21:20:23
epoch [32/50] batch [320/2000] time 2.000 (2.037) data 0.000 (0.002) loss 0.7581 (1.1976) lr 9.3721e-04 eta 21:19:22
epoch [32/50] batch [340/2000] time 2.003 (2.037) data 0.000 (0.002) loss 1.3701 (1.2222) lr 9.3721e-04 eta 21:18:27
epoch [32/50] batch [360/2000] time 2.060 (2.037) data 0.000 (0.002) loss 2.0810 (1.2268) lr 9.3721e-04 eta 21:17:44
epoch [32/50] batch [380/2000] time 2.003 (2.036) data 0.000 (0.002) loss 0.4815 (1.2270) lr 9.3721e-04 eta 21:16:50
epoch [32/50] batch [400/2000] time 2.057 (2.036) data 0.000 (0.002) loss 1.2922 (1.2314) lr 9.3721e-04 eta 21:16:01
epoch [32/50] batch [420/2000] time 2.034 (2.036) data 0.000 (0.002) loss 0.2345 (1.2345) lr 9.3721e-04 eta 21:15:30
epoch [32/50] batch [440/2000] time 2.057 (2.036) data 0.000 (0.002) loss 1.9153 (1.2356) lr 9.3721e-04 eta 21:14:42
epoch [32/50] batch [460/2000] time 2.057 (2.036) data 0.000 (0.002) loss 0.9212 (1.2412) lr 9.3721e-04 eta 21:13:50
epoch [32/50] batch [480/2000] time 2.054 (2.036) data 0.000 (0.002) loss 0.3432 (1.2375) lr 9.3721e-04 eta 21:12:59
epoch [32/50] batch [500/2000] time 2.030 (2.036) data 0.000 (0.002) loss 1.2206 (1.2412) lr 9.3721e-04 eta 21:12:18
epoch [32/50] batch [520/2000] time 1.998 (2.035) data 0.000 (0.002) loss 1.1544 (1.2426) lr 9.3721e-04 eta 21:11:29
epoch [32/50] batch [540/2000] time 1.998 (2.035) data 0.000 (0.002) loss 0.3225 (1.2426) lr 9.3721e-04 eta 21:10:39
epoch [32/50] batch [560/2000] time 2.033 (2.035) data 0.000 (0.001) loss 2.7139 (1.2383) lr 9.3721e-04 eta 21:09:51
epoch [32/50] batch [580/2000] time 2.055 (2.035) data 0.000 (0.001) loss 0.4640 (1.2507) lr 9.3721e-04 eta 21:09:10
epoch [32/50] batch [600/2000] time 2.055 (2.035) data 0.001 (0.001) loss 1.3821 (1.2484) lr 9.3721e-04 eta 21:08:25
epoch [32/50] batch [620/2000] time 2.059 (2.035) data 0.000 (0.001) loss 0.9182 (1.2516) lr 9.3721e-04 eta 21:07:49
epoch [32/50] batch [640/2000] time 2.037 (2.035) data 0.000 (0.001) loss 0.8629 (1.2488) lr 9.3721e-04 eta 21:07:02
epoch [32/50] batch [660/2000] time 1.999 (2.035) data 0.000 (0.001) loss 0.5528 (1.2483) lr 9.3721e-04 eta 21:06:15
epoch [32/50] batch [680/2000] time 2.060 (2.035) data 0.000 (0.001) loss 0.9231 (1.2536) lr 9.3721e-04 eta 21:05:43
epoch [32/50] batch [700/2000] time 2.058 (2.035) data 0.000 (0.001) loss 0.9104 (1.2473) lr 9.3721e-04 eta 21:05:01
epoch [32/50] batch [720/2000] time 2.063 (2.035) data 0.000 (0.001) loss 1.4389 (1.2432) lr 9.3721e-04 eta 21:04:19
epoch [32/50] batch [740/2000] time 2.055 (2.035) data 0.000 (0.001) loss 1.7445 (1.2476) lr 9.3721e-04 eta 21:03:35
epoch [32/50] batch [760/2000] time 2.037 (2.035) data 0.000 (0.001) loss 0.1284 (1.2489) lr 9.3721e-04 eta 21:02:55
epoch [32/50] batch [780/2000] time 2.062 (2.035) data 0.000 (0.001) loss 1.2531 (1.2481) lr 9.3721e-04 eta 21:02:20
epoch [32/50] batch [800/2000] time 2.039 (2.035) data 0.000 (0.001) loss 2.3477 (1.2514) lr 9.3721e-04 eta 21:01:43
epoch [32/50] batch [820/2000] time 2.005 (2.035) data 0.000 (0.001) loss 0.6834 (1.2538) lr 9.3721e-04 eta 21:01:01
epoch [32/50] batch [840/2000] time 2.058 (2.035) data 0.000 (0.001) loss 0.6668 (1.2502) lr 9.3721e-04 eta 21:00:25
epoch [32/50] batch [860/2000] time 2.056 (2.035) data 0.000 (0.001) loss 0.7682 (1.2440) lr 9.3721e-04 eta 20:59:43
epoch [32/50] batch [880/2000] time 2.031 (2.035) data 0.000 (0.001) loss 3.1743 (1.2423) lr 9.3721e-04 eta 20:59:03
epoch [32/50] batch [900/2000] time 1.995 (2.035) data 0.000 (0.001) loss 0.0973 (1.2393) lr 9.3721e-04 eta 20:58:16
epoch [32/50] batch [920/2000] time 2.002 (2.035) data 0.000 (0.001) loss 0.8955 (1.2407) lr 9.3721e-04 eta 20:57:34
epoch [32/50] batch [940/2000] time 2.033 (2.035) data 0.000 (0.001) loss 1.0562 (1.2464) lr 9.3721e-04 eta 20:56:51
epoch [32/50] batch [960/2000] time 2.036 (2.035) data 0.000 (0.001) loss 0.8915 (1.2450) lr 9.3721e-04 eta 20:56:09
epoch [32/50] batch [980/2000] time 1.981 (2.035) data 0.000 (0.001) loss 1.4813 (1.2458) lr 9.3721e-04 eta 20:55:29
epoch [32/50] batch [1000/2000] time 2.063 (2.035) data 0.000 (0.001) loss 0.3290 (1.2412) lr 9.3721e-04 eta 20:54:54
epoch [32/50] batch [1020/2000] time 2.058 (2.035) data 0.000 (0.001) loss 0.9456 (1.2402) lr 9.3721e-04 eta 20:54:14
epoch [32/50] batch [1040/2000] time 2.030 (2.035) data 0.000 (0.001) loss 1.5235 (1.2375) lr 9.3721e-04 eta 20:53:31
epoch [32/50] batch [1060/2000] time 2.065 (2.035) data 0.000 (0.001) loss 0.6740 (1.2352) lr 9.3721e-04 eta 20:52:48
epoch [32/50] batch [1080/2000] time 2.066 (2.035) data 0.000 (0.001) loss 0.1372 (1.2280) lr 9.3721e-04 eta 20:52:13
epoch [32/50] batch [1100/2000] time 2.038 (2.035) data 0.000 (0.001) loss 2.5743 (1.2285) lr 9.3721e-04 eta 20:51:38
epoch [32/50] batch [1120/2000] time 2.033 (2.035) data 0.000 (0.001) loss 0.5761 (1.2204) lr 9.3721e-04 eta 20:50:57
epoch [32/50] batch [1140/2000] time 2.001 (2.035) data 0.001 (0.001) loss 0.6292 (1.2227) lr 9.3721e-04 eta 20:50:11
epoch [32/50] batch [1160/2000] time 2.060 (2.035) data 0.000 (0.001) loss 0.6390 (1.2222) lr 9.3721e-04 eta 20:49:31
epoch [32/50] batch [1180/2000] time 2.057 (2.035) data 0.000 (0.001) loss 0.2109 (1.2169) lr 9.3721e-04 eta 20:48:53
epoch [32/50] batch [1200/2000] time 2.003 (2.035) data 0.000 (0.001) loss 0.6847 (1.2191) lr 9.3721e-04 eta 20:48:10
epoch [32/50] batch [1220/2000] time 2.034 (2.035) data 0.000 (0.001) loss 0.7709 (1.2166) lr 9.3721e-04 eta 20:47:28
epoch [32/50] batch [1240/2000] time 2.062 (2.035) data 0.000 (0.001) loss 1.8174 (1.2170) lr 9.3721e-04 eta 20:46:46
epoch [32/50] batch [1260/2000] time 2.057 (2.035) data 0.000 (0.001) loss 2.3892 (1.2175) lr 9.3721e-04 eta 20:46:04
epoch [32/50] batch [1280/2000] time 2.034 (2.035) data 0.000 (0.001) loss 2.0856 (1.2202) lr 9.3721e-04 eta 20:45:25
epoch [32/50] batch [1300/2000] time 2.059 (2.035) data 0.000 (0.001) loss 0.5259 (1.2218) lr 9.3721e-04 eta 20:44:40
epoch [32/50] batch [1320/2000] time 2.059 (2.035) data 0.000 (0.001) loss 2.7083 (1.2200) lr 9.3721e-04 eta 20:43:57
epoch [32/50] batch [1340/2000] time 2.034 (2.035) data 0.000 (0.001) loss 1.2591 (1.2267) lr 9.3721e-04 eta 20:43:15
epoch [32/50] batch [1360/2000] time 2.001 (2.035) data 0.000 (0.001) loss 0.4072 (1.2192) lr 9.3721e-04 eta 20:42:37
epoch [32/50] batch [1380/2000] time 2.002 (2.035) data 0.000 (0.001) loss 0.0974 (1.2181) lr 9.3721e-04 eta 20:41:54
epoch [32/50] batch [1400/2000] time 2.029 (2.035) data 0.000 (0.001) loss 0.8388 (1.2179) lr 9.3721e-04 eta 20:41:15
epoch [32/50] batch [1420/2000] time 2.029 (2.035) data 0.000 (0.001) loss 1.2682 (1.2212) lr 9.3721e-04 eta 20:40:34
epoch [32/50] batch [1440/2000] time 2.055 (2.035) data 0.000 (0.001) loss 0.8301 (1.2177) lr 9.3721e-04 eta 20:39:53
epoch [32/50] batch [1460/2000] time 2.057 (2.035) data 0.000 (0.001) loss 0.8580 (1.2166) lr 9.3721e-04 eta 20:39:12
epoch [32/50] batch [1480/2000] time 2.060 (2.035) data 0.000 (0.001) loss 1.3942 (1.2127) lr 9.3721e-04 eta 20:38:32
epoch [32/50] batch [1500/2000] time 2.008 (2.035) data 0.000 (0.001) loss 1.2784 (1.2132) lr 9.3721e-04 eta 20:37:57
epoch [32/50] batch [1520/2000] time 2.067 (2.035) data 0.000 (0.001) loss 0.3729 (1.2112) lr 9.3721e-04 eta 20:37:16
epoch [32/50] batch [1540/2000] time 2.007 (2.035) data 0.000 (0.001) loss 1.0883 (1.2128) lr 9.3721e-04 eta 20:36:36
epoch [32/50] batch [1560/2000] time 2.034 (2.035) data 0.000 (0.001) loss 0.1220 (1.2155) lr 9.3721e-04 eta 20:35:57
epoch [32/50] batch [1580/2000] time 1.998 (2.035) data 0.000 (0.001) loss 1.5672 (1.2112) lr 9.3721e-04 eta 20:35:14
epoch [32/50] batch [1600/2000] time 2.061 (2.035) data 0.000 (0.001) loss 1.1237 (1.2178) lr 9.3721e-04 eta 20:34:35
epoch [32/50] batch [1620/2000] time 2.033 (2.035) data 0.000 (0.001) loss 1.5287 (1.2145) lr 9.3721e-04 eta 20:33:54
epoch [32/50] batch [1640/2000] time 2.054 (2.035) data 0.000 (0.001) loss 0.8522 (1.2147) lr 9.3721e-04 eta 20:33:21
epoch [32/50] batch [1660/2000] time 1.976 (2.035) data 0.000 (0.001) loss 2.8541 (1.2173) lr 9.3721e-04 eta 20:32:40
epoch [32/50] batch [1680/2000] time 2.000 (2.035) data 0.001 (0.001) loss 0.2887 (1.2149) lr 9.3721e-04 eta 20:31:57
epoch [32/50] batch [1700/2000] time 2.041 (2.035) data 0.000 (0.001) loss 2.0017 (1.2172) lr 9.3721e-04 eta 20:31:17
epoch [32/50] batch [1720/2000] time 2.031 (2.035) data 0.000 (0.001) loss 1.1118 (1.2200) lr 9.3721e-04 eta 20:30:33
epoch [32/50] batch [1740/2000] time 2.052 (2.035) data 0.000 (0.001) loss 1.2205 (1.2189) lr 9.3721e-04 eta 20:29:50
epoch [32/50] batch [1760/2000] time 2.060 (2.035) data 0.000 (0.001) loss 2.2368 (1.2168) lr 9.3721e-04 eta 20:29:09
epoch [32/50] batch [1780/2000] time 2.056 (2.035) data 0.000 (0.001) loss 2.0132 (1.2189) lr 9.3721e-04 eta 20:28:26
epoch [32/50] batch [1800/2000] time 2.035 (2.035) data 0.000 (0.001) loss 0.9198 (1.2198) lr 9.3721e-04 eta 20:27:43
epoch [32/50] batch [1820/2000] time 2.004 (2.035) data 0.000 (0.001) loss 0.8477 (1.2195) lr 9.3721e-04 eta 20:27:01
epoch [32/50] batch [1840/2000] time 2.057 (2.035) data 0.000 (0.001) loss 1.2511 (1.2209) lr 9.3721e-04 eta 20:26:22
epoch [32/50] batch [1860/2000] time 2.001 (2.035) data 0.000 (0.001) loss 0.6178 (1.2227) lr 9.3721e-04 eta 20:25:39
epoch [32/50] batch [1880/2000] time 2.031 (2.035) data 0.000 (0.001) loss 0.5367 (1.2221) lr 9.3721e-04 eta 20:24:58
epoch [32/50] batch [1900/2000] time 2.055 (2.035) data 0.000 (0.001) loss 1.7819 (1.2237) lr 9.3721e-04 eta 20:24:20
epoch [32/50] batch [1920/2000] time 1.999 (2.035) data 0.000 (0.001) loss 1.5260 (1.2245) lr 9.3721e-04 eta 20:23:38
epoch [32/50] batch [1940/2000] time 2.055 (2.035) data 0.000 (0.001) loss 1.7920 (1.2215) lr 9.3721e-04 eta 20:22:57
epoch [32/50] batch [1960/2000] time 2.033 (2.035) data 0.000 (0.001) loss 0.2423 (1.2204) lr 9.3721e-04 eta 20:22:16
epoch [32/50] batch [1980/2000] time 2.033 (2.035) data 0.000 (0.001) loss 0.7532 (1.2209) lr 9.3721e-04 eta 20:21:34
epoch [32/50] batch [2000/2000] time 2.059 (2.035) data 0.000 (0.001) loss 0.3658 (1.2227) lr 8.7467e-04 eta 20:20:54
epoch [33/50] batch [20/2000] time 2.035 (2.070) data 0.000 (0.034) loss 1.3843 (1.4944) lr 8.7467e-04 eta 20:41:00
epoch [33/50] batch [40/2000] time 2.032 (2.052) data 0.000 (0.017) loss 0.7534 (1.3683) lr 8.7467e-04 eta 20:29:48
epoch [33/50] batch [60/2000] time 1.999 (2.046) data 0.001 (0.012) loss 0.2237 (1.3306) lr 8.7467e-04 eta 20:25:34
epoch [33/50] batch [80/2000] time 2.052 (2.041) data 0.000 (0.009) loss 0.7804 (1.2142) lr 8.7467e-04 eta 20:22:02
epoch [33/50] batch [100/2000] time 1.999 (2.040) data 0.000 (0.007) loss 0.6284 (1.2363) lr 8.7467e-04 eta 20:20:48
epoch [33/50] batch [120/2000] time 1.999 (2.039) data 0.000 (0.006) loss 0.5512 (1.2287) lr 8.7467e-04 eta 20:19:03
epoch [33/50] batch [140/2000] time 2.055 (2.039) data 0.000 (0.005) loss 0.3059 (1.2223) lr 8.7467e-04 eta 20:18:45
epoch [33/50] batch [160/2000] time 2.051 (2.038) data 0.000 (0.004) loss 0.8852 (1.2231) lr 8.7467e-04 eta 20:17:20
epoch [33/50] batch [180/2000] time 2.029 (2.037) data 0.000 (0.004) loss 1.8195 (1.2350) lr 8.7467e-04 eta 20:16:11
epoch [33/50] batch [200/2000] time 2.052 (2.036) data 0.000 (0.004) loss 0.4101 (1.2507) lr 8.7467e-04 eta 20:15:05
epoch [33/50] batch [220/2000] time 2.035 (2.036) data 0.000 (0.003) loss 0.5055 (1.2356) lr 8.7467e-04 eta 20:14:20
epoch [33/50] batch [240/2000] time 2.056 (2.036) data 0.000 (0.003) loss 0.6015 (1.2160) lr 8.7467e-04 eta 20:13:30
epoch [33/50] batch [260/2000] time 2.036 (2.036) data 0.000 (0.003) loss 0.2916 (1.2206) lr 8.7467e-04 eta 20:12:54
epoch [33/50] batch [280/2000] time 1.978 (2.036) data 0.000 (0.003) loss 0.0394 (1.2241) lr 8.7467e-04 eta 20:12:12
epoch [33/50] batch [300/2000] time 2.055 (2.036) data 0.000 (0.002) loss 2.6075 (1.2191) lr 8.7467e-04 eta 20:11:29
epoch [33/50] batch [320/2000] time 2.050 (2.036) data 0.000 (0.002) loss 0.5774 (1.2179) lr 8.7467e-04 eta 20:10:31
epoch [33/50] batch [340/2000] time 2.054 (2.035) data 0.000 (0.002) loss 1.1334 (1.2016) lr 8.7467e-04 eta 20:09:31
epoch [33/50] batch [360/2000] time 2.053 (2.035) data 0.000 (0.002) loss 0.5924 (1.2194) lr 8.7467e-04 eta 20:08:52
epoch [33/50] batch [380/2000] time 2.053 (2.035) data 0.000 (0.002) loss 3.0040 (1.2127) lr 8.7467e-04 eta 20:08:01
epoch [33/50] batch [400/2000] time 2.030 (2.034) data 0.000 (0.002) loss 0.9650 (1.2081) lr 8.7467e-04 eta 20:07:08
epoch [33/50] batch [420/2000] time 1.999 (2.035) data 0.000 (0.002) loss 0.4922 (1.2186) lr 8.7467e-04 eta 20:06:36
epoch [33/50] batch [440/2000] time 2.032 (2.034) data 0.000 (0.002) loss 1.1194 (1.2242) lr 8.7467e-04 eta 20:05:44
epoch [33/50] batch [460/2000] time 2.059 (2.034) data 0.000 (0.002) loss 1.2346 (1.2277) lr 8.7467e-04 eta 20:04:57
epoch [33/50] batch [480/2000] time 2.001 (2.034) data 0.000 (0.002) loss 1.7561 (1.2212) lr 8.7467e-04 eta 20:04:12
epoch [33/50] batch [500/2000] time 2.000 (2.034) data 0.000 (0.002) loss 0.5237 (1.2091) lr 8.7467e-04 eta 20:03:23
epoch [33/50] batch [520/2000] time 2.001 (2.034) data 0.000 (0.002) loss 0.5386 (1.2050) lr 8.7467e-04 eta 20:02:39
epoch [33/50] batch [540/2000] time 2.028 (2.034) data 0.000 (0.001) loss 0.5261 (1.1945) lr 8.7467e-04 eta 20:01:57
epoch [33/50] batch [560/2000] time 2.056 (2.034) data 0.000 (0.001) loss 1.4692 (1.1877) lr 8.7467e-04 eta 20:01:08
epoch [33/50] batch [580/2000] time 2.052 (2.033) data 0.000 (0.001) loss 1.8659 (1.1942) lr 8.7467e-04 eta 20:00:25
epoch [33/50] batch [600/2000] time 2.056 (2.033) data 0.001 (0.001) loss 0.4050 (1.1881) lr 8.7467e-04 eta 19:59:35
epoch [33/50] batch [620/2000] time 2.053 (2.033) data 0.000 (0.001) loss 0.3033 (1.1860) lr 8.7467e-04 eta 19:58:59
epoch [33/50] batch [640/2000] time 2.054 (2.033) data 0.000 (0.001) loss 0.6434 (1.1911) lr 8.7467e-04 eta 19:58:13
epoch [33/50] batch [660/2000] time 2.001 (2.033) data 0.000 (0.001) loss 1.5060 (1.1973) lr 8.7467e-04 eta 19:57:26
epoch [33/50] batch [680/2000] time 2.053 (2.033) data 0.000 (0.001) loss 0.2696 (1.1910) lr 8.7467e-04 eta 19:56:47
epoch [33/50] batch [700/2000] time 2.056 (2.033) data 0.000 (0.001) loss 0.7211 (1.1910) lr 8.7467e-04 eta 19:56:05
epoch [33/50] batch [720/2000] time 2.050 (2.033) data 0.000 (0.001) loss 0.2377 (1.1852) lr 8.7467e-04 eta 19:55:22
epoch [33/50] batch [740/2000] time 1.977 (2.033) data 0.000 (0.001) loss 0.6747 (1.1795) lr 8.7467e-04 eta 19:54:35
epoch [33/50] batch [760/2000] time 1.976 (2.033) data 0.000 (0.001) loss 0.3892 (1.1737) lr 8.7467e-04 eta 19:53:55
epoch [33/50] batch [780/2000] time 2.029 (2.033) data 0.000 (0.001) loss 0.8029 (1.1676) lr 8.7467e-04 eta 19:53:20
epoch [33/50] batch [800/2000] time 2.057 (2.033) data 0.000 (0.001) loss 0.7174 (1.1690) lr 8.7467e-04 eta 19:52:41
epoch [33/50] batch [820/2000] time 1.976 (2.033) data 0.000 (0.001) loss 0.4402 (1.1673) lr 8.7467e-04 eta 19:52:01
epoch [33/50] batch [840/2000] time 2.002 (2.033) data 0.000 (0.001) loss 0.2991 (1.1704) lr 8.7467e-04 eta 19:51:21
epoch [33/50] batch [860/2000] time 2.027 (2.033) data 0.000 (0.001) loss 0.4722 (1.1757) lr 8.7467e-04 eta 19:50:39
epoch [33/50] batch [880/2000] time 1.978 (2.033) data 0.000 (0.001) loss 0.4211 (1.1713) lr 8.7467e-04 eta 19:49:57
epoch [33/50] batch [900/2000] time 1.999 (2.033) data 0.000 (0.001) loss 1.1457 (1.1718) lr 8.7467e-04 eta 19:49:15
epoch [33/50] batch [920/2000] time 2.054 (2.033) data 0.000 (0.001) loss 1.8529 (1.1854) lr 8.7467e-04 eta 19:48:33
epoch [33/50] batch [940/2000] time 2.051 (2.033) data 0.000 (0.001) loss 1.7389 (1.1851) lr 8.7467e-04 eta 19:47:50
epoch [33/50] batch [960/2000] time 2.028 (2.033) data 0.000 (0.001) loss 1.4994 (1.1859) lr 8.7467e-04 eta 19:47:11
epoch [33/50] batch [980/2000] time 1.998 (2.033) data 0.000 (0.001) loss 0.8183 (1.1888) lr 8.7467e-04 eta 19:46:25
epoch [33/50] batch [1000/2000] time 2.032 (2.033) data 0.000 (0.001) loss 0.2367 (1.1833) lr 8.7467e-04 eta 19:45:46
epoch [33/50] batch [1020/2000] time 2.030 (2.033) data 0.000 (0.001) loss 0.0990 (1.1777) lr 8.7467e-04 eta 19:45:08
epoch [33/50] batch [1040/2000] time 2.055 (2.033) data 0.000 (0.001) loss 3.0775 (1.1729) lr 8.7467e-04 eta 19:44:24
epoch [33/50] batch [1060/2000] time 2.063 (2.033) data 0.000 (0.001) loss 2.4574 (1.1710) lr 8.7467e-04 eta 19:43:41
epoch [33/50] batch [1080/2000] time 2.028 (2.033) data 0.000 (0.001) loss 4.5687 (1.1770) lr 8.7467e-04 eta 19:42:55
epoch [33/50] batch [1100/2000] time 2.052 (2.032) data 0.000 (0.001) loss 0.3117 (1.1850) lr 8.7467e-04 eta 19:42:14
epoch [33/50] batch [1120/2000] time 2.056 (2.032) data 0.000 (0.001) loss 0.4033 (1.1776) lr 8.7467e-04 eta 19:41:32
epoch [33/50] batch [1140/2000] time 2.029 (2.032) data 0.001 (0.001) loss 1.0781 (1.1751) lr 8.7467e-04 eta 19:40:51
epoch [33/50] batch [1160/2000] time 2.052 (2.032) data 0.000 (0.001) loss 1.3107 (1.1753) lr 8.7467e-04 eta 19:40:12
epoch [33/50] batch [1180/2000] time 2.029 (2.032) data 0.000 (0.001) loss 0.0916 (1.1742) lr 8.7467e-04 eta 19:39:29
epoch [33/50] batch [1200/2000] time 2.000 (2.032) data 0.000 (0.001) loss 0.1472 (1.1743) lr 8.7467e-04 eta 19:38:47
epoch [33/50] batch [1220/2000] time 2.029 (2.032) data 0.000 (0.001) loss 0.2042 (1.1785) lr 8.7467e-04 eta 19:38:05
epoch [33/50] batch [1240/2000] time 2.002 (2.032) data 0.000 (0.001) loss 0.4852 (1.1728) lr 8.7467e-04 eta 19:37:19
epoch [33/50] batch [1260/2000] time 1.998 (2.032) data 0.000 (0.001) loss 1.5506 (1.1730) lr 8.7467e-04 eta 19:36:36
epoch [33/50] batch [1280/2000] time 2.053 (2.032) data 0.000 (0.001) loss 0.5722 (1.1718) lr 8.7467e-04 eta 19:35:56
epoch [33/50] batch [1300/2000] time 2.030 (2.032) data 0.000 (0.001) loss 1.0736 (1.1752) lr 8.7467e-04 eta 19:35:18
epoch [33/50] batch [1320/2000] time 2.052 (2.032) data 0.000 (0.001) loss 2.2159 (1.1742) lr 8.7467e-04 eta 19:34:35
epoch [33/50] batch [1340/2000] time 2.000 (2.032) data 0.000 (0.001) loss 1.6196 (1.1722) lr 8.7467e-04 eta 19:33:51
epoch [33/50] batch [1360/2000] time 2.050 (2.032) data 0.000 (0.001) loss 2.6082 (1.1731) lr 8.7467e-04 eta 19:33:11
epoch [33/50] batch [1380/2000] time 2.032 (2.032) data 0.000 (0.001) loss 1.1222 (1.1721) lr 8.7467e-04 eta 19:32:31
epoch [33/50] batch [1400/2000] time 2.027 (2.032) data 0.000 (0.001) loss 1.8248 (1.1684) lr 8.7467e-04 eta 19:31:47
epoch [33/50] batch [1420/2000] time 2.032 (2.032) data 0.000 (0.001) loss 0.6055 (1.1659) lr 8.7467e-04 eta 19:31:06
epoch [33/50] batch [1440/2000] time 2.053 (2.032) data 0.000 (0.001) loss 0.6809 (1.1651) lr 8.7467e-04 eta 19:30:25
epoch [33/50] batch [1460/2000] time 2.055 (2.032) data 0.000 (0.001) loss 1.7413 (1.1684) lr 8.7467e-04 eta 19:29:42
epoch [33/50] batch [1480/2000] time 2.050 (2.032) data 0.000 (0.001) loss 0.1861 (1.1716) lr 8.7467e-04 eta 19:29:04
epoch [33/50] batch [1500/2000] time 2.030 (2.032) data 0.000 (0.001) loss 1.4370 (1.1730) lr 8.7467e-04 eta 19:28:22
epoch [33/50] batch [1520/2000] time 1.998 (2.032) data 0.000 (0.001) loss 1.0342 (1.1720) lr 8.7467e-04 eta 19:27:42
epoch [33/50] batch [1540/2000] time 2.031 (2.032) data 0.000 (0.001) loss 0.2336 (1.1721) lr 8.7467e-04 eta 19:27:00
epoch [33/50] batch [1560/2000] time 2.029 (2.032) data 0.000 (0.001) loss 0.7631 (1.1693) lr 8.7467e-04 eta 19:26:20
epoch [33/50] batch [1580/2000] time 2.001 (2.032) data 0.000 (0.001) loss 0.0649 (1.1717) lr 8.7467e-04 eta 19:25:38
epoch [33/50] batch [1600/2000] time 2.051 (2.032) data 0.000 (0.001) loss 0.7243 (1.1736) lr 8.7467e-04 eta 19:24:55
epoch [33/50] batch [1620/2000] time 2.031 (2.032) data 0.000 (0.001) loss 1.4894 (1.1698) lr 8.7467e-04 eta 19:24:16
epoch [33/50] batch [1640/2000] time 2.004 (2.032) data 0.000 (0.001) loss 2.3067 (1.1744) lr 8.7467e-04 eta 19:23:34
epoch [33/50] batch [1660/2000] time 2.031 (2.032) data 0.000 (0.001) loss 0.3580 (1.1721) lr 8.7467e-04 eta 19:22:52
epoch [33/50] batch [1680/2000] time 2.053 (2.032) data 0.001 (0.001) loss 0.2495 (1.1702) lr 8.7467e-04 eta 19:22:13
epoch [33/50] batch [1700/2000] time 2.056 (2.032) data 0.000 (0.001) loss 2.5616 (1.1657) lr 8.7467e-04 eta 19:21:33
epoch [33/50] batch [1720/2000] time 2.001 (2.032) data 0.000 (0.001) loss 1.0242 (1.1674) lr 8.7467e-04 eta 19:20:50
epoch [33/50] batch [1740/2000] time 2.053 (2.032) data 0.000 (0.001) loss 1.3188 (1.1669) lr 8.7467e-04 eta 19:20:10
epoch [33/50] batch [1760/2000] time 1.998 (2.032) data 0.000 (0.001) loss 0.4777 (1.1653) lr 8.7467e-04 eta 19:19:29
epoch [33/50] batch [1780/2000] time 2.056 (2.032) data 0.000 (0.001) loss 0.3381 (1.1650) lr 8.7467e-04 eta 19:18:50
epoch [33/50] batch [1800/2000] time 2.008 (2.032) data 0.000 (0.001) loss 1.7863 (1.1659) lr 8.7467e-04 eta 19:18:10
epoch [33/50] batch [1820/2000] time 2.030 (2.032) data 0.000 (0.001) loss 1.9083 (1.1698) lr 8.7467e-04 eta 19:17:31
epoch [33/50] batch [1840/2000] time 1.996 (2.032) data 0.000 (0.001) loss 0.7476 (1.1722) lr 8.7467e-04 eta 19:16:50
epoch [33/50] batch [1860/2000] time 2.028 (2.032) data 0.000 (0.001) loss 1.3419 (1.1704) lr 8.7467e-04 eta 19:16:09
epoch [33/50] batch [1880/2000] time 2.051 (2.032) data 0.000 (0.001) loss 1.5416 (1.1697) lr 8.7467e-04 eta 19:15:29
epoch [33/50] batch [1900/2000] time 2.030 (2.032) data 0.000 (0.001) loss 3.4978 (1.1728) lr 8.7467e-04 eta 19:14:51
epoch [33/50] batch [1920/2000] time 2.050 (2.032) data 0.000 (0.001) loss 1.6652 (1.1753) lr 8.7467e-04 eta 19:14:09
epoch [33/50] batch [1940/2000] time 2.031 (2.032) data 0.000 (0.001) loss 0.4123 (1.1737) lr 8.7467e-04 eta 19:13:26
epoch [33/50] batch [1960/2000] time 2.006 (2.032) data 0.000 (0.001) loss 0.8137 (1.1746) lr 8.7467e-04 eta 19:12:46
epoch [33/50] batch [1980/2000] time 2.059 (2.032) data 0.000 (0.001) loss 2.2105 (1.1735) lr 8.7467e-04 eta 19:12:09
epoch [33/50] batch [2000/2000] time 2.054 (2.032) data 0.000 (0.001) loss 0.4001 (1.1742) lr 8.1262e-04 eta 19:11:27
epoch [34/50] batch [20/2000] time 2.053 (2.064) data 0.000 (0.029) loss 3.1876 (1.6781) lr 8.1262e-04 eta 19:28:55
epoch [34/50] batch [40/2000] time 2.032 (2.048) data 0.000 (0.014) loss 1.7189 (1.6036) lr 8.1262e-04 eta 19:19:15
epoch [34/50] batch [60/2000] time 2.030 (2.043) data 0.001 (0.010) loss 1.6396 (1.5237) lr 8.1262e-04 eta 19:15:39
epoch [34/50] batch [80/2000] time 1.999 (2.042) data 0.000 (0.007) loss 2.1588 (1.4013) lr 8.1262e-04 eta 19:14:07
epoch [34/50] batch [100/2000] time 2.000 (2.040) data 0.000 (0.006) loss 0.7298 (1.3660) lr 8.1262e-04 eta 19:12:19
epoch [34/50] batch [120/2000] time 2.062 (2.039) data 0.000 (0.005) loss 1.5596 (1.2779) lr 8.1262e-04 eta 19:11:09
epoch [34/50] batch [140/2000] time 2.055 (2.037) data 0.000 (0.004) loss 0.5435 (1.1936) lr 8.1262e-04 eta 19:09:31
epoch [34/50] batch [160/2000] time 2.029 (2.036) data 0.000 (0.004) loss 0.2861 (1.1619) lr 8.1262e-04 eta 19:08:04
epoch [34/50] batch [180/2000] time 1.974 (2.035) data 0.000 (0.003) loss 1.7377 (1.1339) lr 8.1262e-04 eta 19:07:13
epoch [34/50] batch [200/2000] time 2.057 (2.034) data 0.000 (0.003) loss 1.4538 (1.1514) lr 8.1262e-04 eta 19:05:58
epoch [34/50] batch [220/2000] time 2.001 (2.034) data 0.000 (0.003) loss 2.0087 (1.1540) lr 8.1262e-04 eta 19:05:03
epoch [34/50] batch [240/2000] time 2.038 (2.034) data 0.000 (0.003) loss 1.6556 (1.1304) lr 8.1262e-04 eta 19:04:21
epoch [34/50] batch [260/2000] time 2.059 (2.034) data 0.000 (0.002) loss 0.0362 (1.1379) lr 8.1262e-04 eta 19:04:03
epoch [34/50] batch [280/2000] time 2.042 (2.035) data 0.000 (0.002) loss 0.7411 (1.1483) lr 8.1262e-04 eta 19:03:24
epoch [34/50] batch [300/2000] time 2.061 (2.035) data 0.000 (0.002) loss 0.2996 (1.1515) lr 8.1262e-04 eta 19:02:55
epoch [34/50] batch [320/2000] time 2.057 (2.035) data 0.000 (0.002) loss 0.0449 (1.1569) lr 8.1262e-04 eta 19:02:19
epoch [34/50] batch [340/2000] time 1.998 (2.035) data 0.000 (0.002) loss 0.6706 (1.1544) lr 8.1262e-04 eta 19:01:29
epoch [34/50] batch [360/2000] time 2.034 (2.035) data 0.000 (0.002) loss 0.8226 (1.1407) lr 8.1262e-04 eta 19:00:56
epoch [34/50] batch [380/2000] time 2.059 (2.035) data 0.000 (0.002) loss 0.6883 (1.1440) lr 8.1262e-04 eta 19:00:13
epoch [34/50] batch [400/2000] time 2.006 (2.035) data 0.000 (0.002) loss 1.8670 (1.1444) lr 8.1262e-04 eta 18:59:34
epoch [34/50] batch [420/2000] time 2.066 (2.035) data 0.000 (0.002) loss 1.8931 (1.1331) lr 8.1262e-04 eta 18:58:59
epoch [34/50] batch [440/2000] time 2.068 (2.035) data 0.000 (0.002) loss 3.2625 (1.1457) lr 8.1262e-04 eta 18:58:24
epoch [34/50] batch [460/2000] time 2.006 (2.035) data 0.000 (0.001) loss 3.3191 (1.1383) lr 8.1262e-04 eta 18:57:49
epoch [34/50] batch [480/2000] time 2.056 (2.035) data 0.000 (0.001) loss 1.3989 (1.1376) lr 8.1262e-04 eta 18:57:07
epoch [34/50] batch [500/2000] time 2.002 (2.035) data 0.000 (0.001) loss 0.7184 (1.1442) lr 8.1262e-04 eta 18:56:22
epoch [34/50] batch [520/2000] time 2.003 (2.035) data 0.000 (0.001) loss 2.6425 (1.1514) lr 8.1262e-04 eta 18:55:37
epoch [34/50] batch [540/2000] time 2.058 (2.036) data 0.000 (0.001) loss 0.7446 (1.1460) lr 8.1262e-04 eta 18:55:08
epoch [34/50] batch [560/2000] time 2.038 (2.036) data 0.000 (0.001) loss 0.2720 (1.1541) lr 8.1262e-04 eta 18:54:35
epoch [34/50] batch [580/2000] time 2.059 (2.036) data 0.000 (0.001) loss 0.4685 (1.1501) lr 8.1262e-04 eta 18:53:50
epoch [34/50] batch [600/2000] time 2.035 (2.036) data 0.001 (0.001) loss 1.8127 (1.1597) lr 8.1262e-04 eta 18:53:16
epoch [34/50] batch [620/2000] time 2.036 (2.036) data 0.000 (0.001) loss 0.6458 (1.1582) lr 8.1262e-04 eta 18:52:37
epoch [34/50] batch [640/2000] time 2.059 (2.036) data 0.000 (0.001) loss 1.6965 (1.1515) lr 8.1262e-04 eta 18:51:59
epoch [34/50] batch [660/2000] time 2.034 (2.036) data 0.000 (0.001) loss 1.5517 (1.1521) lr 8.1262e-04 eta 18:51:12
epoch [34/50] batch [680/2000] time 2.003 (2.036) data 0.000 (0.001) loss 0.0445 (1.1492) lr 8.1262e-04 eta 18:50:31
epoch [34/50] batch [700/2000] time 2.001 (2.036) data 0.000 (0.001) loss 1.0388 (1.1635) lr 8.1262e-04 eta 18:49:44
epoch [34/50] batch [720/2000] time 2.039 (2.036) data 0.000 (0.001) loss 1.3270 (1.1606) lr 8.1262e-04 eta 18:49:05
epoch [34/50] batch [740/2000] time 2.063 (2.036) data 0.000 (0.001) loss 0.6828 (1.1578) lr 8.1262e-04 eta 18:48:27
epoch [34/50] batch [760/2000] time 2.063 (2.036) data 0.000 (0.001) loss 0.1526 (1.1582) lr 8.1262e-04 eta 18:47:55
epoch [34/50] batch [780/2000] time 2.063 (2.036) data 0.000 (0.001) loss 0.0403 (1.1589) lr 8.1262e-04 eta 18:47:17
epoch [34/50] batch [800/2000] time 2.043 (2.036) data 0.000 (0.001) loss 1.2209 (1.1585) lr 8.1262e-04 eta 18:46:44
epoch [34/50] batch [820/2000] time 2.063 (2.036) data 0.000 (0.001) loss 0.9137 (1.1587) lr 8.1262e-04 eta 18:46:00
epoch [34/50] batch [840/2000] time 2.004 (2.036) data 0.000 (0.001) loss 1.8601 (1.1596) lr 8.1262e-04 eta 18:45:27
epoch [34/50] batch [860/2000] time 2.062 (2.036) data 0.000 (0.001) loss 1.5392 (1.1610) lr 8.1262e-04 eta 18:44:44
epoch [34/50] batch [880/2000] time 2.007 (2.036) data 0.000 (0.001) loss 2.0894 (1.1551) lr 8.1262e-04 eta 18:44:03
epoch [34/50] batch [900/2000] time 2.040 (2.036) data 0.000 (0.001) loss 1.8453 (1.1562) lr 8.1262e-04 eta 18:43:24
epoch [34/50] batch [920/2000] time 2.037 (2.036) data 0.000 (0.001) loss 0.5046 (1.1551) lr 8.1262e-04 eta 18:42:44
epoch [34/50] batch [940/2000] time 1.977 (2.036) data 0.000 (0.001) loss 2.7155 (1.1583) lr 8.1262e-04 eta 18:41:59
epoch [34/50] batch [960/2000] time 2.002 (2.036) data 0.000 (0.001) loss 0.0263 (1.1564) lr 8.1262e-04 eta 18:41:20
epoch [34/50] batch [980/2000] time 1.999 (2.036) data 0.000 (0.001) loss 0.0872 (1.1560) lr 8.1262e-04 eta 18:40:37
epoch [34/50] batch [1000/2000] time 2.060 (2.036) data 0.000 (0.001) loss 1.0552 (1.1546) lr 8.1262e-04 eta 18:39:56
epoch [34/50] batch [1020/2000] time 2.004 (2.036) data 0.000 (0.001) loss 1.9505 (1.1637) lr 8.1262e-04 eta 18:39:16
epoch [34/50] batch [1040/2000] time 2.059 (2.036) data 0.000 (0.001) loss 1.3065 (1.1649) lr 8.1262e-04 eta 18:38:33
epoch [34/50] batch [1060/2000] time 2.000 (2.036) data 0.000 (0.001) loss 3.2380 (1.1662) lr 8.1262e-04 eta 18:37:49
epoch [34/50] batch [1080/2000] time 2.002 (2.036) data 0.000 (0.001) loss 1.1041 (1.1636) lr 8.1262e-04 eta 18:37:07
epoch [34/50] batch [1100/2000] time 2.060 (2.036) data 0.000 (0.001) loss 0.2407 (1.1689) lr 8.1262e-04 eta 18:36:30
epoch [34/50] batch [1120/2000] time 2.066 (2.036) data 0.000 (0.001) loss 0.5374 (1.1701) lr 8.1262e-04 eta 18:35:53
epoch [34/50] batch [1140/2000] time 2.004 (2.036) data 0.001 (0.001) loss 0.1042 (1.1646) lr 8.1262e-04 eta 18:35:11
epoch [34/50] batch [1160/2000] time 2.034 (2.036) data 0.000 (0.001) loss 1.8147 (1.1640) lr 8.1262e-04 eta 18:34:32
epoch [34/50] batch [1180/2000] time 2.062 (2.036) data 0.000 (0.001) loss 1.3725 (1.1636) lr 8.1262e-04 eta 18:33:51
epoch [34/50] batch [1200/2000] time 2.002 (2.036) data 0.000 (0.001) loss 0.4661 (1.1608) lr 8.1262e-04 eta 18:33:09
epoch [34/50] batch [1220/2000] time 2.059 (2.036) data 0.000 (0.001) loss 0.1088 (1.1605) lr 8.1262e-04 eta 18:32:29
epoch [34/50] batch [1240/2000] time 2.061 (2.036) data 0.000 (0.001) loss 1.9408 (1.1624) lr 8.1262e-04 eta 18:31:45
epoch [34/50] batch [1260/2000] time 1.981 (2.036) data 0.000 (0.001) loss 2.9424 (1.1691) lr 8.1262e-04 eta 18:31:02
epoch [34/50] batch [1280/2000] time 2.036 (2.036) data 0.000 (0.001) loss 0.3784 (1.1663) lr 8.1262e-04 eta 18:30:24
epoch [34/50] batch [1300/2000] time 1.979 (2.036) data 0.000 (0.001) loss 0.7990 (1.1610) lr 8.1262e-04 eta 18:29:42
epoch [34/50] batch [1320/2000] time 2.002 (2.036) data 0.000 (0.001) loss 1.4702 (1.1572) lr 8.1262e-04 eta 18:29:00
epoch [34/50] batch [1340/2000] time 2.008 (2.036) data 0.000 (0.001) loss 0.3431 (1.1562) lr 8.1262e-04 eta 18:28:24
epoch [34/50] batch [1360/2000] time 2.063 (2.036) data 0.000 (0.001) loss 1.2697 (1.1608) lr 8.1262e-04 eta 18:27:50
epoch [34/50] batch [1380/2000] time 2.063 (2.037) data 0.000 (0.001) loss 0.4414 (1.1545) lr 8.1262e-04 eta 18:27:13
epoch [34/50] batch [1400/2000] time 2.006 (2.037) data 0.000 (0.001) loss 0.7318 (1.1562) lr 8.1262e-04 eta 18:26:36
epoch [34/50] batch [1420/2000] time 2.061 (2.037) data 0.000 (0.001) loss 0.4868 (1.1550) lr 8.1262e-04 eta 18:25:55
epoch [34/50] batch [1440/2000] time 2.062 (2.037) data 0.000 (0.001) loss 0.9023 (1.1538) lr 8.1262e-04 eta 18:25:16
epoch [34/50] batch [1460/2000] time 2.000 (2.037) data 0.000 (0.001) loss 0.3884 (1.1513) lr 8.1262e-04 eta 18:24:34
epoch [34/50] batch [1480/2000] time 2.062 (2.037) data 0.000 (0.001) loss 0.9124 (1.1478) lr 8.1262e-04 eta 18:23:55
epoch [34/50] batch [1500/2000] time 2.061 (2.037) data 0.000 (0.001) loss 1.2686 (1.1457) lr 8.1262e-04 eta 18:23:15
epoch [34/50] batch [1520/2000] time 2.060 (2.037) data 0.002 (0.001) loss 0.7630 (1.1459) lr 8.1262e-04 eta 18:22:33
epoch [34/50] batch [1540/2000] time 2.059 (2.037) data 0.000 (0.001) loss 0.2894 (1.1428) lr 8.1262e-04 eta 18:21:51
epoch [34/50] batch [1560/2000] time 2.037 (2.037) data 0.000 (0.001) loss 0.9876 (1.1424) lr 8.1262e-04 eta 18:21:07
epoch [34/50] batch [1580/2000] time 2.034 (2.037) data 0.000 (0.001) loss 3.8417 (1.1465) lr 8.1262e-04 eta 18:20:28
epoch [34/50] batch [1600/2000] time 2.058 (2.037) data 0.000 (0.001) loss 2.1610 (1.1449) lr 8.1262e-04 eta 18:19:47
epoch [34/50] batch [1620/2000] time 2.032 (2.037) data 0.000 (0.001) loss 0.1411 (1.1436) lr 8.1262e-04 eta 18:19:06
epoch [34/50] batch [1640/2000] time 2.034 (2.037) data 0.000 (0.001) loss 0.0481 (1.1420) lr 8.1262e-04 eta 18:18:25
epoch [34/50] batch [1660/2000] time 2.033 (2.037) data 0.000 (0.001) loss 1.0857 (1.1437) lr 8.1262e-04 eta 18:17:42
epoch [34/50] batch [1680/2000] time 2.036 (2.037) data 0.001 (0.001) loss 1.5518 (1.1479) lr 8.1262e-04 eta 18:17:01
epoch [34/50] batch [1700/2000] time 2.034 (2.037) data 0.000 (0.001) loss 1.2702 (1.1512) lr 8.1262e-04 eta 18:16:21
epoch [34/50] batch [1720/2000] time 1.980 (2.037) data 0.000 (0.001) loss 1.1088 (1.1527) lr 8.1262e-04 eta 18:15:39
epoch [34/50] batch [1740/2000] time 2.034 (2.036) data 0.000 (0.001) loss 1.2339 (1.1546) lr 8.1262e-04 eta 18:14:57
epoch [34/50] batch [1760/2000] time 2.002 (2.036) data 0.000 (0.001) loss 0.4524 (1.1557) lr 8.1262e-04 eta 18:14:16
epoch [34/50] batch [1780/2000] time 2.062 (2.037) data 0.000 (0.001) loss 1.8445 (1.1592) lr 8.1262e-04 eta 18:13:37
epoch [34/50] batch [1800/2000] time 1.999 (2.036) data 0.000 (0.001) loss 1.6638 (1.1587) lr 8.1262e-04 eta 18:12:54
epoch [34/50] batch [1820/2000] time 2.042 (2.036) data 0.000 (0.001) loss 0.0361 (1.1591) lr 8.1262e-04 eta 18:12:11
epoch [34/50] batch [1840/2000] time 2.064 (2.036) data 0.000 (0.001) loss 1.3694 (1.1588) lr 8.1262e-04 eta 18:11:29
epoch [34/50] batch [1860/2000] time 2.008 (2.036) data 0.000 (0.001) loss 0.0286 (1.1550) lr 8.1262e-04 eta 18:10:50
epoch [34/50] batch [1880/2000] time 2.063 (2.036) data 0.000 (0.001) loss 1.6713 (1.1525) lr 8.1262e-04 eta 18:10:11
epoch [34/50] batch [1900/2000] time 2.058 (2.037) data 0.000 (0.001) loss 0.4167 (1.1531) lr 8.1262e-04 eta 18:09:33
epoch [34/50] batch [1920/2000] time 2.059 (2.037) data 0.000 (0.001) loss 0.4635 (1.1527) lr 8.1262e-04 eta 18:08:53
epoch [34/50] batch [1940/2000] time 2.057 (2.037) data 0.000 (0.001) loss 0.2606 (1.1515) lr 8.1262e-04 eta 18:08:14
epoch [34/50] batch [1960/2000] time 2.058 (2.037) data 0.000 (0.001) loss 2.0085 (1.1593) lr 8.1262e-04 eta 18:07:33
epoch [34/50] batch [1980/2000] time 2.032 (2.037) data 0.000 (0.001) loss 1.1656 (1.1620) lr 8.1262e-04 eta 18:06:52
epoch [34/50] batch [2000/2000] time 2.058 (2.037) data 0.000 (0.001) loss 0.9343 (1.1616) lr 7.5131e-04 eta 18:06:09
epoch [35/50] batch [20/2000] time 2.038 (2.071) data 0.000 (0.036) loss 0.7613 (1.0491) lr 7.5131e-04 eta 18:23:55
epoch [35/50] batch [40/2000] time 2.055 (2.056) data 0.000 (0.018) loss 0.2472 (1.1138) lr 7.5131e-04 eta 18:14:57
epoch [35/50] batch [60/2000] time 2.037 (2.046) data 0.001 (0.012) loss 2.8617 (1.2124) lr 7.5131e-04 eta 18:09:23
epoch [35/50] batch [80/2000] time 1.998 (2.043) data 0.000 (0.009) loss 2.7715 (1.2611) lr 7.5131e-04 eta 18:07:05
epoch [35/50] batch [100/2000] time 2.038 (2.042) data 0.000 (0.007) loss 0.9603 (1.2664) lr 7.5131e-04 eta 18:05:54
epoch [35/50] batch [120/2000] time 1.978 (2.040) data 0.000 (0.006) loss 1.9691 (1.1822) lr 7.5131e-04 eta 18:04:04
epoch [35/50] batch [140/2000] time 2.057 (2.039) data 0.000 (0.005) loss 0.5789 (1.1683) lr 7.5131e-04 eta 18:02:57
epoch [35/50] batch [160/2000] time 2.057 (2.039) data 0.000 (0.005) loss 0.3870 (1.2046) lr 7.5131e-04 eta 18:02:17
epoch [35/50] batch [180/2000] time 2.058 (2.040) data 0.000 (0.004) loss 1.7962 (1.1920) lr 7.5131e-04 eta 18:01:43
epoch [35/50] batch [200/2000] time 2.058 (2.039) data 0.000 (0.004) loss 2.2652 (1.1931) lr 7.5131e-04 eta 18:00:32
epoch [35/50] batch [220/2000] time 2.059 (2.038) data 0.000 (0.003) loss 0.5704 (1.2022) lr 7.5131e-04 eta 17:59:26
epoch [35/50] batch [240/2000] time 2.055 (2.037) data 0.000 (0.003) loss 0.1298 (1.2102) lr 7.5131e-04 eta 17:58:27
epoch [35/50] batch [260/2000] time 2.057 (2.038) data 0.000 (0.003) loss 0.2254 (1.1935) lr 7.5131e-04 eta 17:58:02
epoch [35/50] batch [280/2000] time 2.030 (2.038) data 0.000 (0.003) loss 0.4909 (1.1842) lr 7.5131e-04 eta 17:57:13
epoch [35/50] batch [300/2000] time 2.056 (2.037) data 0.000 (0.003) loss 0.2806 (1.1799) lr 7.5131e-04 eta 17:56:25
epoch [35/50] batch [320/2000] time 2.001 (2.037) data 0.000 (0.002) loss 0.5051 (1.1788) lr 7.5131e-04 eta 17:55:39
epoch [35/50] batch [340/2000] time 2.059 (2.037) data 0.000 (0.002) loss 1.3459 (1.1911) lr 7.5131e-04 eta 17:55:01
epoch [35/50] batch [360/2000] time 2.000 (2.037) data 0.000 (0.002) loss 1.9484 (1.1845) lr 7.5131e-04 eta 17:54:11
epoch [35/50] batch [380/2000] time 2.002 (2.037) data 0.000 (0.002) loss 0.3081 (1.1739) lr 7.5131e-04 eta 17:53:24
epoch [35/50] batch [400/2000] time 2.000 (2.037) data 0.000 (0.002) loss 1.2004 (1.1758) lr 7.5131e-04 eta 17:52:40
epoch [35/50] batch [420/2000] time 2.069 (2.037) data 0.000 (0.002) loss 0.3066 (1.1779) lr 7.5131e-04 eta 17:51:57
epoch [35/50] batch [440/2000] time 2.040 (2.037) data 0.000 (0.002) loss 0.1721 (1.1661) lr 7.5131e-04 eta 17:51:21
epoch [35/50] batch [460/2000] time 2.036 (2.037) data 0.000 (0.002) loss 0.6363 (1.1524) lr 7.5131e-04 eta 17:50:37
epoch [35/50] batch [480/2000] time 2.033 (2.037) data 0.000 (0.002) loss 1.7636 (1.1596) lr 7.5131e-04 eta 17:49:54
epoch [35/50] batch [500/2000] time 2.056 (2.036) data 0.000 (0.002) loss 4.7189 (1.1732) lr 7.5131e-04 eta 17:49:07
epoch [35/50] batch [520/2000] time 2.060 (2.036) data 0.000 (0.002) loss 1.9119 (1.1819) lr 7.5131e-04 eta 17:48:16
epoch [35/50] batch [540/2000] time 1.974 (2.036) data 0.000 (0.002) loss 1.1085 (1.1877) lr 7.5131e-04 eta 17:47:35
epoch [35/50] batch [560/2000] time 1.998 (2.036) data 0.000 (0.002) loss 0.9817 (1.1748) lr 7.5131e-04 eta 17:46:40
epoch [35/50] batch [580/2000] time 2.034 (2.036) data 0.000 (0.001) loss 1.7634 (1.1811) lr 7.5131e-04 eta 17:45:58
epoch [35/50] batch [600/2000] time 2.059 (2.036) data 0.001 (0.001) loss 2.8883 (1.1821) lr 7.5131e-04 eta 17:45:23
epoch [35/50] batch [620/2000] time 1.996 (2.036) data 0.000 (0.001) loss 1.2171 (1.1847) lr 7.5131e-04 eta 17:44:40
epoch [35/50] batch [640/2000] time 2.038 (2.036) data 0.000 (0.001) loss 0.5659 (1.1780) lr 7.5131e-04 eta 17:43:57
epoch [35/50] batch [660/2000] time 2.059 (2.036) data 0.000 (0.001) loss 1.7939 (1.1840) lr 7.5131e-04 eta 17:43:15
epoch [35/50] batch [680/2000] time 2.004 (2.035) data 0.000 (0.001) loss 0.7046 (1.1791) lr 7.5131e-04 eta 17:42:27
epoch [35/50] batch [700/2000] time 2.059 (2.035) data 0.000 (0.001) loss 1.9023 (1.1839) lr 7.5131e-04 eta 17:41:50
epoch [35/50] batch [720/2000] time 2.059 (2.036) data 0.000 (0.001) loss 0.7357 (1.1843) lr 7.5131e-04 eta 17:41:11
epoch [35/50] batch [740/2000] time 1.981 (2.036) data 0.000 (0.001) loss 1.3393 (1.1837) lr 7.5131e-04 eta 17:40:32
epoch [35/50] batch [760/2000] time 2.063 (2.036) data 0.000 (0.001) loss 3.0286 (1.1936) lr 7.5131e-04 eta 17:39:55
epoch [35/50] batch [780/2000] time 2.034 (2.036) data 0.000 (0.001) loss 0.7253 (1.1887) lr 7.5131e-04 eta 17:39:18
epoch [35/50] batch [800/2000] time 2.057 (2.036) data 0.000 (0.001) loss 0.4386 (1.1939) lr 7.5131e-04 eta 17:38:28
epoch [35/50] batch [820/2000] time 2.059 (2.035) data 0.000 (0.001) loss 0.2481 (1.1914) lr 7.5131e-04 eta 17:37:45
epoch [35/50] batch [840/2000] time 2.033 (2.035) data 0.000 (0.001) loss 1.3201 (1.1874) lr 7.5131e-04 eta 17:37:00
epoch [35/50] batch [860/2000] time 2.060 (2.035) data 0.000 (0.001) loss 0.2633 (1.1913) lr 7.5131e-04 eta 17:36:19
epoch [35/50] batch [880/2000] time 2.058 (2.035) data 0.000 (0.001) loss 2.2637 (1.1941) lr 7.5131e-04 eta 17:35:38
epoch [35/50] batch [900/2000] time 2.033 (2.035) data 0.000 (0.001) loss 2.1491 (1.1910) lr 7.5131e-04 eta 17:34:54
epoch [35/50] batch [920/2000] time 2.059 (2.035) data 0.000 (0.001) loss 1.6386 (1.1897) lr 7.5131e-04 eta 17:34:17
epoch [35/50] batch [940/2000] time 2.039 (2.035) data 0.000 (0.001) loss 0.5213 (1.1920) lr 7.5131e-04 eta 17:33:39
epoch [35/50] batch [960/2000] time 2.038 (2.036) data 0.000 (0.001) loss 1.7808 (1.1935) lr 7.5131e-04 eta 17:33:02
epoch [35/50] batch [980/2000] time 2.060 (2.035) data 0.000 (0.001) loss 1.2126 (1.1964) lr 7.5131e-04 eta 17:32:20
epoch [35/50] batch [1000/2000] time 2.001 (2.035) data 0.000 (0.001) loss 1.2329 (1.1955) lr 7.5131e-04 eta 17:31:35
epoch [35/50] batch [1020/2000] time 2.002 (2.035) data 0.000 (0.001) loss 3.1890 (1.1944) lr 7.5131e-04 eta 17:30:57
epoch [35/50] batch [1040/2000] time 2.035 (2.035) data 0.000 (0.001) loss 0.9942 (1.1990) lr 7.5131e-04 eta 17:30:18
epoch [35/50] batch [1060/2000] time 2.038 (2.036) data 0.000 (0.001) loss 1.9759 (1.2040) lr 7.5131e-04 eta 17:29:40
epoch [35/50] batch [1080/2000] time 2.034 (2.036) data 0.000 (0.001) loss 0.2744 (1.1981) lr 7.5131e-04 eta 17:29:00
epoch [35/50] batch [1100/2000] time 2.005 (2.036) data 0.000 (0.001) loss 0.5226 (1.1902) lr 7.5131e-04 eta 17:28:18
epoch [35/50] batch [1120/2000] time 2.063 (2.036) data 0.000 (0.001) loss 2.8943 (1.1838) lr 7.5131e-04 eta 17:27:36
epoch [35/50] batch [1140/2000] time 2.061 (2.036) data 0.001 (0.001) loss 1.4476 (1.1897) lr 7.5131e-04 eta 17:26:57
epoch [35/50] batch [1160/2000] time 2.035 (2.036) data 0.000 (0.001) loss 1.0439 (1.1931) lr 7.5131e-04 eta 17:26:15
epoch [35/50] batch [1180/2000] time 2.037 (2.036) data 0.000 (0.001) loss 1.7113 (1.1959) lr 7.5131e-04 eta 17:25:35
epoch [35/50] batch [1200/2000] time 1.997 (2.035) data 0.000 (0.001) loss 2.8835 (1.1942) lr 7.5131e-04 eta 17:24:52
epoch [35/50] batch [1220/2000] time 2.002 (2.035) data 0.000 (0.001) loss 0.1229 (1.1895) lr 7.5131e-04 eta 17:24:09
epoch [35/50] batch [1240/2000] time 2.001 (2.035) data 0.000 (0.001) loss 2.0824 (1.1877) lr 7.5131e-04 eta 17:23:27
epoch [35/50] batch [1260/2000] time 1.999 (2.035) data 0.000 (0.001) loss 0.0710 (1.1883) lr 7.5131e-04 eta 17:22:45
epoch [35/50] batch [1280/2000] time 2.002 (2.035) data 0.000 (0.001) loss 0.4174 (1.1848) lr 7.5131e-04 eta 17:22:04
epoch [35/50] batch [1300/2000] time 2.034 (2.035) data 0.000 (0.001) loss 0.3698 (1.1843) lr 7.5131e-04 eta 17:21:25
epoch [35/50] batch [1320/2000] time 1.977 (2.035) data 0.000 (0.001) loss 0.0704 (1.1811) lr 7.5131e-04 eta 17:20:42
epoch [35/50] batch [1340/2000] time 2.061 (2.035) data 0.000 (0.001) loss 0.2595 (1.1784) lr 7.5131e-04 eta 17:20:01
epoch [35/50] batch [1360/2000] time 2.059 (2.035) data 0.000 (0.001) loss 0.1331 (1.1774) lr 7.5131e-04 eta 17:19:21
epoch [35/50] batch [1380/2000] time 2.057 (2.035) data 0.000 (0.001) loss 1.2915 (1.1771) lr 7.5131e-04 eta 17:18:43
epoch [35/50] batch [1400/2000] time 2.030 (2.035) data 0.000 (0.001) loss 2.8978 (1.1817) lr 7.5131e-04 eta 17:17:58
epoch [35/50] batch [1420/2000] time 2.057 (2.035) data 0.000 (0.001) loss 1.4694 (1.1809) lr 7.5131e-04 eta 17:17:18
epoch [35/50] batch [1440/2000] time 2.004 (2.035) data 0.000 (0.001) loss 0.2148 (1.1766) lr 7.5131e-04 eta 17:16:37
epoch [35/50] batch [1460/2000] time 2.036 (2.035) data 0.000 (0.001) loss 2.2953 (1.1805) lr 7.5131e-04 eta 17:15:58
epoch [35/50] batch [1480/2000] time 2.059 (2.035) data 0.000 (0.001) loss 0.8993 (1.1835) lr 7.5131e-04 eta 17:15:19
epoch [35/50] batch [1500/2000] time 2.038 (2.035) data 0.000 (0.001) loss 0.1378 (1.1846) lr 7.5131e-04 eta 17:14:39
epoch [35/50] batch [1520/2000] time 2.031 (2.035) data 0.000 (0.001) loss 0.7631 (1.1799) lr 7.5131e-04 eta 17:14:00
epoch [35/50] batch [1540/2000] time 2.032 (2.035) data 0.000 (0.001) loss 2.4962 (1.1824) lr 7.5131e-04 eta 17:13:20
epoch [35/50] batch [1560/2000] time 2.035 (2.035) data 0.000 (0.001) loss 0.4681 (1.1839) lr 7.5131e-04 eta 17:12:40
epoch [35/50] batch [1580/2000] time 2.031 (2.035) data 0.000 (0.001) loss 0.2296 (1.1834) lr 7.5131e-04 eta 17:11:57
epoch [35/50] batch [1600/2000] time 2.056 (2.035) data 0.000 (0.001) loss 0.5104 (1.1790) lr 7.5131e-04 eta 17:11:18
epoch [35/50] batch [1620/2000] time 2.060 (2.035) data 0.000 (0.001) loss 0.6888 (1.1771) lr 7.5131e-04 eta 17:10:36
epoch [35/50] batch [1640/2000] time 2.061 (2.035) data 0.000 (0.001) loss 0.9495 (1.1722) lr 7.5131e-04 eta 17:09:57
epoch [35/50] batch [1660/2000] time 2.005 (2.035) data 0.000 (0.001) loss 0.8681 (1.1699) lr 7.5131e-04 eta 17:09:16
epoch [35/50] batch [1680/2000] time 2.007 (2.036) data 0.001 (0.001) loss 2.3701 (1.1720) lr 7.5131e-04 eta 17:08:36
epoch [35/50] batch [1700/2000] time 2.037 (2.035) data 0.000 (0.001) loss 1.6612 (1.1703) lr 7.5131e-04 eta 17:07:55
epoch [35/50] batch [1720/2000] time 2.030 (2.035) data 0.000 (0.001) loss 0.7818 (1.1745) lr 7.5131e-04 eta 17:07:14
epoch [35/50] batch [1740/2000] time 1.999 (2.035) data 0.000 (0.001) loss 0.3430 (1.1745) lr 7.5131e-04 eta 17:06:33
epoch [35/50] batch [1760/2000] time 2.000 (2.035) data 0.000 (0.001) loss 1.2739 (1.1724) lr 7.5131e-04 eta 17:05:50
epoch [35/50] batch [1780/2000] time 2.037 (2.035) data 0.000 (0.001) loss 2.8190 (1.1776) lr 7.5131e-04 eta 17:05:10
epoch [35/50] batch [1800/2000] time 2.042 (2.035) data 0.000 (0.001) loss 0.6014 (1.1770) lr 7.5131e-04 eta 17:04:30
epoch [35/50] batch [1820/2000] time 2.040 (2.035) data 0.000 (0.001) loss 0.0879 (1.1782) lr 7.5131e-04 eta 17:03:49
epoch [35/50] batch [1840/2000] time 2.059 (2.035) data 0.000 (0.001) loss 1.2996 (1.1760) lr 7.5131e-04 eta 17:03:09
epoch [35/50] batch [1860/2000] time 2.039 (2.035) data 0.000 (0.001) loss 0.3503 (1.1772) lr 7.5131e-04 eta 17:02:26
epoch [35/50] batch [1880/2000] time 2.057 (2.035) data 0.000 (0.001) loss 0.7008 (1.1781) lr 7.5131e-04 eta 17:01:44
epoch [35/50] batch [1900/2000] time 2.004 (2.035) data 0.000 (0.001) loss 1.6787 (1.1832) lr 7.5131e-04 eta 17:01:00
epoch [35/50] batch [1920/2000] time 2.059 (2.035) data 0.000 (0.001) loss 0.3217 (1.1828) lr 7.5131e-04 eta 17:00:18
epoch [35/50] batch [1940/2000] time 2.061 (2.035) data 0.000 (0.001) loss 0.2612 (1.1867) lr 7.5131e-04 eta 16:59:36
epoch [35/50] batch [1960/2000] time 2.061 (2.035) data 0.000 (0.001) loss 0.4792 (1.1845) lr 7.5131e-04 eta 16:58:56
epoch [35/50] batch [1980/2000] time 2.055 (2.035) data 0.000 (0.001) loss 1.7556 (1.1839) lr 7.5131e-04 eta 16:58:14
epoch [35/50] batch [2000/2000] time 2.031 (2.035) data 0.000 (0.001) loss 0.1339 (1.1825) lr 6.9098e-04 eta 16:57:33
epoch [36/50] batch [20/2000] time 2.053 (2.059) data 0.000 (0.028) loss 0.1209 (1.6175) lr 6.9098e-04 eta 17:08:45
epoch [36/50] batch [40/2000] time 2.010 (2.039) data 0.000 (0.014) loss 1.5357 (1.4185) lr 6.9098e-04 eta 16:58:14
epoch [36/50] batch [60/2000] time 2.053 (2.038) data 0.001 (0.010) loss 2.6216 (1.3346) lr 6.9098e-04 eta 16:56:57
epoch [36/50] batch [80/2000] time 2.055 (2.036) data 0.000 (0.007) loss 3.0183 (1.3431) lr 6.9098e-04 eta 16:55:08
epoch [36/50] batch [100/2000] time 2.029 (2.034) data 0.000 (0.006) loss 0.6840 (1.2658) lr 6.9098e-04 eta 16:53:40
epoch [36/50] batch [120/2000] time 2.055 (2.033) data 0.000 (0.005) loss 2.3049 (1.2881) lr 6.9098e-04 eta 16:52:32
epoch [36/50] batch [140/2000] time 2.000 (2.033) data 0.000 (0.004) loss 1.8568 (1.2983) lr 6.9098e-04 eta 16:51:36
epoch [36/50] batch [160/2000] time 1.999 (2.032) data 0.000 (0.004) loss 0.3548 (1.2887) lr 6.9098e-04 eta 16:50:42
epoch [36/50] batch [180/2000] time 1.999 (2.033) data 0.000 (0.003) loss 1.3145 (1.2937) lr 6.9098e-04 eta 16:50:12
epoch [36/50] batch [200/2000] time 2.052 (2.032) data 0.000 (0.003) loss 0.1961 (1.2950) lr 6.9098e-04 eta 16:49:15
epoch [36/50] batch [220/2000] time 2.002 (2.030) data 0.000 (0.003) loss 1.3381 (1.3024) lr 6.9098e-04 eta 16:47:45
epoch [36/50] batch [240/2000] time 2.007 (2.031) data 0.000 (0.003) loss 1.8510 (1.3446) lr 6.9098e-04 eta 16:47:27
epoch [36/50] batch [260/2000] time 2.005 (2.032) data 0.000 (0.002) loss 0.0628 (1.3524) lr 6.9098e-04 eta 16:47:02
epoch [36/50] batch [280/2000] time 2.057 (2.032) data 0.000 (0.002) loss 1.4393 (1.3246) lr 6.9098e-04 eta 16:46:38
epoch [36/50] batch [300/2000] time 2.058 (2.032) data 0.000 (0.002) loss 0.4711 (1.3209) lr 6.9098e-04 eta 16:45:50
epoch [36/50] batch [320/2000] time 2.054 (2.032) data 0.000 (0.002) loss 0.2065 (1.3107) lr 6.9098e-04 eta 16:45:20
epoch [36/50] batch [340/2000] time 2.003 (2.033) data 0.000 (0.002) loss 1.4116 (1.3049) lr 6.9098e-04 eta 16:44:50
epoch [36/50] batch [360/2000] time 2.029 (2.033) data 0.000 (0.002) loss 0.4358 (1.2989) lr 6.9098e-04 eta 16:44:21
epoch [36/50] batch [380/2000] time 2.030 (2.033) data 0.000 (0.002) loss 0.9574 (1.2958) lr 6.9098e-04 eta 16:43:28
epoch [36/50] batch [400/2000] time 2.053 (2.033) data 0.000 (0.002) loss 1.2009 (1.2777) lr 6.9098e-04 eta 16:42:47
epoch [36/50] batch [420/2000] time 2.054 (2.033) data 0.000 (0.002) loss 2.1155 (1.2836) lr 6.9098e-04 eta 16:42:10
epoch [36/50] batch [440/2000] time 2.055 (2.033) data 0.000 (0.002) loss 0.7816 (1.2952) lr 6.9098e-04 eta 16:41:25
epoch [36/50] batch [460/2000] time 2.049 (2.033) data 0.000 (0.001) loss 1.0287 (1.2990) lr 6.9098e-04 eta 16:40:43
epoch [36/50] batch [480/2000] time 2.002 (2.032) data 0.000 (0.001) loss 2.2744 (1.2893) lr 6.9098e-04 eta 16:39:57
epoch [36/50] batch [500/2000] time 2.050 (2.032) data 0.000 (0.001) loss 1.2988 (1.2713) lr 6.9098e-04 eta 16:39:07
epoch [36/50] batch [520/2000] time 2.057 (2.032) data 0.000 (0.001) loss 0.9459 (1.2574) lr 6.9098e-04 eta 16:38:30
epoch [36/50] batch [540/2000] time 2.032 (2.032) data 0.000 (0.001) loss 0.2857 (1.2558) lr 6.9098e-04 eta 16:37:44
epoch [36/50] batch [560/2000] time 2.050 (2.032) data 0.000 (0.001) loss 0.0310 (1.2576) lr 6.9098e-04 eta 16:36:59
epoch [36/50] batch [580/2000] time 1.999 (2.032) data 0.000 (0.001) loss 0.1083 (1.2584) lr 6.9098e-04 eta 16:36:08
epoch [36/50] batch [600/2000] time 2.056 (2.032) data 0.001 (0.001) loss 0.9648 (1.2578) lr 6.9098e-04 eta 16:35:26
epoch [36/50] batch [620/2000] time 2.060 (2.032) data 0.000 (0.001) loss 0.5375 (1.2637) lr 6.9098e-04 eta 16:34:52
epoch [36/50] batch [640/2000] time 1.977 (2.032) data 0.000 (0.001) loss 1.6156 (1.2581) lr 6.9098e-04 eta 16:34:12
epoch [36/50] batch [660/2000] time 2.051 (2.032) data 0.000 (0.001) loss 1.7585 (1.2535) lr 6.9098e-04 eta 16:33:37
epoch [36/50] batch [680/2000] time 2.002 (2.032) data 0.000 (0.001) loss 1.0476 (1.2501) lr 6.9098e-04 eta 16:32:53
epoch [36/50] batch [700/2000] time 2.055 (2.032) data 0.000 (0.001) loss 0.5404 (1.2431) lr 6.9098e-04 eta 16:32:14
epoch [36/50] batch [720/2000] time 2.055 (2.032) data 0.000 (0.001) loss 0.0815 (1.2477) lr 6.9098e-04 eta 16:31:36
epoch [36/50] batch [740/2000] time 2.054 (2.032) data 0.000 (0.001) loss 0.3837 (1.2413) lr 6.9098e-04 eta 16:30:58
epoch [36/50] batch [760/2000] time 2.030 (2.032) data 0.000 (0.001) loss 3.0268 (1.2420) lr 6.9098e-04 eta 16:30:11
epoch [36/50] batch [780/2000] time 2.030 (2.032) data 0.000 (0.001) loss 0.2919 (1.2374) lr 6.9098e-04 eta 16:29:32
epoch [36/50] batch [800/2000] time 2.053 (2.032) data 0.000 (0.001) loss 0.7850 (1.2375) lr 6.9098e-04 eta 16:28:42
epoch [36/50] batch [820/2000] time 2.051 (2.032) data 0.000 (0.001) loss 2.4701 (1.2369) lr 6.9098e-04 eta 16:28:05
epoch [36/50] batch [840/2000] time 2.052 (2.032) data 0.000 (0.001) loss 0.2711 (1.2375) lr 6.9098e-04 eta 16:27:24
epoch [36/50] batch [860/2000] time 2.054 (2.032) data 0.000 (0.001) loss 1.2039 (1.2323) lr 6.9098e-04 eta 16:26:46
epoch [36/50] batch [880/2000] time 2.030 (2.032) data 0.000 (0.001) loss 1.0709 (1.2305) lr 6.9098e-04 eta 16:26:03
epoch [36/50] batch [900/2000] time 2.052 (2.032) data 0.000 (0.001) loss 0.1832 (1.2264) lr 6.9098e-04 eta 16:25:28
epoch [36/50] batch [920/2000] time 2.051 (2.032) data 0.000 (0.001) loss 0.7060 (1.2186) lr 6.9098e-04 eta 16:24:48
epoch [36/50] batch [940/2000] time 2.031 (2.032) data 0.000 (0.001) loss 0.6116 (1.2099) lr 6.9098e-04 eta 16:24:05
epoch [36/50] batch [960/2000] time 1.999 (2.032) data 0.000 (0.001) loss 0.2789 (1.2096) lr 6.9098e-04 eta 16:23:22
epoch [36/50] batch [980/2000] time 1.999 (2.032) data 0.000 (0.001) loss 0.8742 (1.2075) lr 6.9098e-04 eta 16:22:39
epoch [36/50] batch [1000/2000] time 1.976 (2.032) data 0.000 (0.001) loss 0.6226 (1.2050) lr 6.9098e-04 eta 16:21:58
epoch [36/50] batch [1020/2000] time 1.998 (2.032) data 0.000 (0.001) loss 0.0261 (1.1982) lr 6.9098e-04 eta 16:21:17
epoch [36/50] batch [1040/2000] time 2.052 (2.032) data 0.000 (0.001) loss 0.8147 (1.1998) lr 6.9098e-04 eta 16:20:36
epoch [36/50] batch [1060/2000] time 2.050 (2.032) data 0.000 (0.001) loss 0.2887 (1.1958) lr 6.9098e-04 eta 16:19:56
epoch [36/50] batch [1080/2000] time 2.053 (2.032) data 0.000 (0.001) loss 0.1777 (1.1884) lr 6.9098e-04 eta 16:19:16
epoch [36/50] batch [1100/2000] time 2.028 (2.032) data 0.000 (0.001) loss 0.6461 (1.1874) lr 6.9098e-04 eta 16:18:34
epoch [36/50] batch [1120/2000] time 2.034 (2.032) data 0.000 (0.001) loss 0.3377 (1.1878) lr 6.9098e-04 eta 16:17:50
epoch [36/50] batch [1140/2000] time 2.054 (2.032) data 0.001 (0.001) loss 1.7464 (1.1927) lr 6.9098e-04 eta 16:17:12
epoch [36/50] batch [1160/2000] time 2.031 (2.032) data 0.000 (0.001) loss 1.4547 (1.1888) lr 6.9098e-04 eta 16:16:34
epoch [36/50] batch [1180/2000] time 1.998 (2.032) data 0.000 (0.001) loss 1.0069 (1.1923) lr 6.9098e-04 eta 16:15:50
epoch [36/50] batch [1200/2000] time 2.053 (2.032) data 0.000 (0.001) loss 0.7509 (1.1930) lr 6.9098e-04 eta 16:15:10
epoch [36/50] batch [1220/2000] time 2.051 (2.032) data 0.000 (0.001) loss 0.2958 (1.1884) lr 6.9098e-04 eta 16:14:29
epoch [36/50] batch [1240/2000] time 1.998 (2.032) data 0.000 (0.001) loss 1.5510 (1.1890) lr 6.9098e-04 eta 16:13:52
epoch [36/50] batch [1260/2000] time 2.054 (2.032) data 0.000 (0.001) loss 0.3054 (1.1921) lr 6.9098e-04 eta 16:13:11
epoch [36/50] batch [1280/2000] time 2.051 (2.032) data 0.000 (0.001) loss 0.9185 (1.1924) lr 6.9098e-04 eta 16:12:32
epoch [36/50] batch [1300/2000] time 1.998 (2.032) data 0.000 (0.001) loss 1.6417 (1.1916) lr 6.9098e-04 eta 16:11:47
epoch [36/50] batch [1320/2000] time 2.027 (2.032) data 0.000 (0.001) loss 0.4020 (1.1902) lr 6.9098e-04 eta 16:11:08
epoch [36/50] batch [1340/2000] time 2.051 (2.032) data 0.000 (0.001) loss 0.7927 (1.1941) lr 6.9098e-04 eta 16:10:29
epoch [36/50] batch [1360/2000] time 2.050 (2.032) data 0.000 (0.001) loss 0.2391 (1.1913) lr 6.9098e-04 eta 16:09:48
epoch [36/50] batch [1380/2000] time 2.001 (2.032) data 0.000 (0.001) loss 0.3084 (1.1887) lr 6.9098e-04 eta 16:09:03
epoch [36/50] batch [1400/2000] time 2.003 (2.032) data 0.000 (0.001) loss 1.0326 (1.1868) lr 6.9098e-04 eta 16:08:24
epoch [36/50] batch [1420/2000] time 2.058 (2.032) data 0.000 (0.001) loss 0.8915 (1.1871) lr 6.9098e-04 eta 16:07:45
epoch [36/50] batch [1440/2000] time 2.034 (2.032) data 0.000 (0.001) loss 0.0617 (1.1851) lr 6.9098e-04 eta 16:07:07
epoch [36/50] batch [1460/2000] time 2.054 (2.032) data 0.000 (0.001) loss 1.3403 (1.1856) lr 6.9098e-04 eta 16:06:27
epoch [36/50] batch [1480/2000] time 2.053 (2.032) data 0.000 (0.001) loss 1.2715 (1.1835) lr 6.9098e-04 eta 16:05:44
epoch [36/50] batch [1500/2000] time 2.031 (2.032) data 0.000 (0.001) loss 0.8814 (1.1816) lr 6.9098e-04 eta 16:05:03
epoch [36/50] batch [1520/2000] time 2.056 (2.032) data 0.000 (0.001) loss 0.0150 (1.1822) lr 6.9098e-04 eta 16:04:21
epoch [36/50] batch [1540/2000] time 1.998 (2.032) data 0.000 (0.001) loss 1.4792 (1.1812) lr 6.9098e-04 eta 16:03:42
epoch [36/50] batch [1560/2000] time 2.029 (2.032) data 0.000 (0.001) loss 0.4754 (1.1773) lr 6.9098e-04 eta 16:03:00
epoch [36/50] batch [1580/2000] time 2.003 (2.032) data 0.000 (0.001) loss 1.5077 (1.1781) lr 6.9098e-04 eta 16:02:20
epoch [36/50] batch [1600/2000] time 1.998 (2.032) data 0.000 (0.001) loss 0.0156 (1.1764) lr 6.9098e-04 eta 16:01:43
epoch [36/50] batch [1620/2000] time 2.054 (2.032) data 0.000 (0.001) loss 0.3415 (1.1716) lr 6.9098e-04 eta 16:01:02
epoch [36/50] batch [1640/2000] time 2.000 (2.032) data 0.000 (0.001) loss 0.0222 (1.1711) lr 6.9098e-04 eta 16:00:23
epoch [36/50] batch [1660/2000] time 2.052 (2.032) data 0.000 (0.001) loss 1.1019 (1.1707) lr 6.9098e-04 eta 15:59:42
epoch [36/50] batch [1680/2000] time 2.049 (2.032) data 0.001 (0.001) loss 1.1099 (1.1694) lr 6.9098e-04 eta 15:59:01
epoch [36/50] batch [1700/2000] time 2.049 (2.032) data 0.000 (0.001) loss 0.3279 (1.1715) lr 6.9098e-04 eta 15:58:21
epoch [36/50] batch [1720/2000] time 2.050 (2.032) data 0.000 (0.001) loss 0.9408 (1.1674) lr 6.9098e-04 eta 15:57:41
epoch [36/50] batch [1740/2000] time 2.028 (2.032) data 0.000 (0.001) loss 0.2528 (1.1676) lr 6.9098e-04 eta 15:56:59
epoch [36/50] batch [1760/2000] time 2.050 (2.032) data 0.000 (0.001) loss 0.5061 (1.1672) lr 6.9098e-04 eta 15:56:16
epoch [36/50] batch [1780/2000] time 2.002 (2.032) data 0.001 (0.001) loss 0.3844 (1.1666) lr 6.9098e-04 eta 15:55:33
epoch [36/50] batch [1800/2000] time 1.998 (2.032) data 0.000 (0.001) loss 0.4938 (1.1658) lr 6.9098e-04 eta 15:54:54
epoch [36/50] batch [1820/2000] time 1.998 (2.032) data 0.000 (0.001) loss 1.0965 (1.1682) lr 6.9098e-04 eta 15:54:11
epoch [36/50] batch [1840/2000] time 1.999 (2.032) data 0.000 (0.001) loss 2.0180 (1.1681) lr 6.9098e-04 eta 15:53:31
epoch [36/50] batch [1860/2000] time 2.052 (2.032) data 0.000 (0.001) loss 0.0423 (1.1662) lr 6.9098e-04 eta 15:52:49
epoch [36/50] batch [1880/2000] time 2.029 (2.032) data 0.000 (0.001) loss 1.0626 (1.1690) lr 6.9098e-04 eta 15:52:06
epoch [36/50] batch [1900/2000] time 2.052 (2.032) data 0.000 (0.001) loss 2.0156 (1.1705) lr 6.9098e-04 eta 15:51:26
epoch [36/50] batch [1920/2000] time 2.000 (2.032) data 0.000 (0.001) loss 0.6690 (1.1674) lr 6.9098e-04 eta 15:50:44
epoch [36/50] batch [1940/2000] time 2.055 (2.031) data 0.000 (0.001) loss 0.8328 (1.1691) lr 6.9098e-04 eta 15:50:03
epoch [36/50] batch [1960/2000] time 2.055 (2.031) data 0.000 (0.001) loss 1.1611 (1.1731) lr 6.9098e-04 eta 15:49:20
epoch [36/50] batch [1980/2000] time 2.000 (2.031) data 0.000 (0.001) loss 1.8755 (1.1743) lr 6.9098e-04 eta 15:48:40
epoch [36/50] batch [2000/2000] time 2.032 (2.032) data 0.000 (0.001) loss 0.0630 (1.1741) lr 6.3188e-04 eta 15:48:02
epoch [37/50] batch [20/2000] time 2.059 (2.068) data 0.000 (0.035) loss 0.4802 (1.4690) lr 6.3188e-04 eta 16:04:27
epoch [37/50] batch [40/2000] time 1.979 (2.049) data 0.000 (0.018) loss 0.3112 (1.1900) lr 6.3188e-04 eta 15:55:03
epoch [37/50] batch [60/2000] time 2.061 (2.043) data 0.001 (0.012) loss 0.1159 (1.1139) lr 6.3188e-04 eta 15:51:29
epoch [37/50] batch [80/2000] time 1.999 (2.041) data 0.000 (0.009) loss 1.8658 (1.1713) lr 6.3188e-04 eta 15:49:35
epoch [37/50] batch [100/2000] time 2.004 (2.038) data 0.000 (0.007) loss 1.6349 (1.1556) lr 6.3188e-04 eta 15:47:49
epoch [37/50] batch [120/2000] time 2.035 (2.039) data 0.000 (0.006) loss 0.2083 (1.1665) lr 6.3188e-04 eta 15:47:15
epoch [37/50] batch [140/2000] time 1.978 (2.038) data 0.000 (0.005) loss 0.2080 (1.1637) lr 6.3188e-04 eta 15:46:10
epoch [37/50] batch [160/2000] time 2.063 (2.039) data 0.000 (0.005) loss 0.2946 (1.1504) lr 6.3188e-04 eta 15:46:03
epoch [37/50] batch [180/2000] time 2.003 (2.039) data 0.000 (0.004) loss 3.6560 (1.1639) lr 6.3188e-04 eta 15:45:24
epoch [37/50] batch [200/2000] time 2.059 (2.039) data 0.000 (0.004) loss 0.4307 (1.1284) lr 6.3188e-04 eta 15:44:33
epoch [37/50] batch [220/2000] time 2.052 (2.038) data 0.000 (0.003) loss 0.6265 (1.1443) lr 6.3188e-04 eta 15:43:39
epoch [37/50] batch [240/2000] time 2.036 (2.038) data 0.000 (0.003) loss 1.2485 (1.1595) lr 6.3188e-04 eta 15:42:46
epoch [37/50] batch [260/2000] time 2.002 (2.037) data 0.000 (0.003) loss 0.3282 (1.1485) lr 6.3188e-04 eta 15:41:57
epoch [37/50] batch [280/2000] time 2.032 (2.038) data 0.000 (0.003) loss 0.1966 (1.1551) lr 6.3188e-04 eta 15:41:19
epoch [37/50] batch [300/2000] time 2.029 (2.037) data 0.000 (0.003) loss 0.4357 (1.1402) lr 6.3188e-04 eta 15:40:23
epoch [37/50] batch [320/2000] time 2.034 (2.037) data 0.000 (0.002) loss 0.9113 (1.1614) lr 6.3188e-04 eta 15:39:49
epoch [37/50] batch [340/2000] time 2.002 (2.037) data 0.000 (0.002) loss 0.0165 (1.1529) lr 6.3188e-04 eta 15:39:02
epoch [37/50] batch [360/2000] time 2.036 (2.037) data 0.000 (0.002) loss 5.3535 (1.1522) lr 6.3188e-04 eta 15:38:28
epoch [37/50] batch [380/2000] time 2.060 (2.037) data 0.000 (0.002) loss 0.6252 (1.1506) lr 6.3188e-04 eta 15:37:48
epoch [37/50] batch [400/2000] time 2.004 (2.037) data 0.000 (0.002) loss 1.8318 (1.1546) lr 6.3188e-04 eta 15:37:02
epoch [37/50] batch [420/2000] time 2.037 (2.037) data 0.000 (0.002) loss 1.4886 (1.1496) lr 6.3188e-04 eta 15:36:14
epoch [37/50] batch [440/2000] time 2.033 (2.037) data 0.000 (0.002) loss 0.4686 (1.1454) lr 6.3188e-04 eta 15:35:37
epoch [37/50] batch [460/2000] time 2.059 (2.037) data 0.000 (0.002) loss 0.5601 (1.1307) lr 6.3188e-04 eta 15:34:55
epoch [37/50] batch [480/2000] time 2.034 (2.037) data 0.000 (0.002) loss 3.9298 (1.1493) lr 6.3188e-04 eta 15:34:06
epoch [37/50] batch [500/2000] time 2.057 (2.037) data 0.000 (0.002) loss 2.4300 (1.1485) lr 6.3188e-04 eta 15:33:28
epoch [37/50] batch [520/2000] time 2.059 (2.037) data 0.000 (0.002) loss 1.7189 (1.1555) lr 6.3188e-04 eta 15:32:45
epoch [37/50] batch [540/2000] time 2.060 (2.037) data 0.000 (0.002) loss 0.4815 (1.1580) lr 6.3188e-04 eta 15:32:06
epoch [37/50] batch [560/2000] time 2.080 (2.037) data 0.000 (0.002) loss 0.8482 (1.1608) lr 6.3188e-04 eta 15:31:42
epoch [37/50] batch [580/2000] time 2.064 (2.037) data 0.000 (0.001) loss 1.2629 (1.1511) lr 6.3188e-04 eta 15:30:59
epoch [37/50] batch [600/2000] time 2.058 (2.037) data 0.001 (0.001) loss 3.0760 (1.1532) lr 6.3188e-04 eta 15:30:20
epoch [37/50] batch [620/2000] time 2.033 (2.037) data 0.000 (0.001) loss 1.8084 (1.1576) lr 6.3188e-04 eta 15:29:42
epoch [37/50] batch [640/2000] time 2.036 (2.037) data 0.000 (0.001) loss 0.7984 (1.1510) lr 6.3188e-04 eta 15:28:58
epoch [37/50] batch [660/2000] time 2.001 (2.037) data 0.000 (0.001) loss 1.5283 (1.1421) lr 6.3188e-04 eta 15:28:13
epoch [37/50] batch [680/2000] time 2.060 (2.037) data 0.000 (0.001) loss 0.3263 (1.1441) lr 6.3188e-04 eta 15:27:30
epoch [37/50] batch [700/2000] time 2.038 (2.037) data 0.000 (0.001) loss 1.0641 (1.1435) lr 6.3188e-04 eta 15:26:49
epoch [37/50] batch [720/2000] time 2.060 (2.037) data 0.000 (0.001) loss 1.8609 (1.1393) lr 6.3188e-04 eta 15:26:08
epoch [37/50] batch [740/2000] time 2.064 (2.037) data 0.000 (0.001) loss 2.3760 (1.1477) lr 6.3188e-04 eta 15:25:21
epoch [37/50] batch [760/2000] time 2.062 (2.037) data 0.000 (0.001) loss 1.0907 (1.1460) lr 6.3188e-04 eta 15:24:39
epoch [37/50] batch [780/2000] time 2.061 (2.037) data 0.000 (0.001) loss 0.4147 (1.1496) lr 6.3188e-04 eta 15:23:59
epoch [37/50] batch [800/2000] time 2.033 (2.037) data 0.000 (0.001) loss 0.9689 (1.1437) lr 6.3188e-04 eta 15:23:17
epoch [37/50] batch [820/2000] time 2.000 (2.037) data 0.000 (0.001) loss 0.5920 (1.1448) lr 6.3188e-04 eta 15:22:38
epoch [37/50] batch [840/2000] time 1.998 (2.037) data 0.000 (0.001) loss 1.7777 (1.1537) lr 6.3188e-04 eta 15:21:57
epoch [37/50] batch [860/2000] time 2.000 (2.037) data 0.000 (0.001) loss 1.2828 (1.1502) lr 6.3188e-04 eta 15:21:13
epoch [37/50] batch [880/2000] time 2.060 (2.037) data 0.000 (0.001) loss 1.8793 (1.1548) lr 6.3188e-04 eta 15:20:32
epoch [37/50] batch [900/2000] time 2.060 (2.037) data 0.000 (0.001) loss 0.4367 (1.1505) lr 6.3188e-04 eta 15:19:51
epoch [37/50] batch [920/2000] time 2.060 (2.037) data 0.000 (0.001) loss 0.1938 (1.1513) lr 6.3188e-04 eta 15:19:10
epoch [37/50] batch [940/2000] time 2.062 (2.036) data 0.000 (0.001) loss 0.4911 (1.1482) lr 6.3188e-04 eta 15:18:27
epoch [37/50] batch [960/2000] time 2.037 (2.036) data 0.000 (0.001) loss 0.5255 (1.1498) lr 6.3188e-04 eta 15:17:44
epoch [37/50] batch [980/2000] time 2.042 (2.036) data 0.000 (0.001) loss 0.4465 (1.1491) lr 6.3188e-04 eta 15:17:03
epoch [37/50] batch [1000/2000] time 2.064 (2.036) data 0.000 (0.001) loss 1.5753 (1.1465) lr 6.3188e-04 eta 15:16:24
epoch [37/50] batch [1020/2000] time 2.044 (2.037) data 0.000 (0.001) loss 2.8847 (1.1490) lr 6.3188e-04 eta 15:15:49
epoch [37/50] batch [1040/2000] time 2.065 (2.036) data 0.000 (0.001) loss 0.0845 (1.1457) lr 6.3188e-04 eta 15:15:03
epoch [37/50] batch [1060/2000] time 2.067 (2.036) data 0.000 (0.001) loss 0.0363 (1.1480) lr 6.3188e-04 eta 15:14:22
epoch [37/50] batch [1080/2000] time 2.005 (2.037) data 0.000 (0.001) loss 0.4230 (1.1494) lr 6.3188e-04 eta 15:13:45
epoch [37/50] batch [1100/2000] time 2.041 (2.037) data 0.000 (0.001) loss 0.8404 (1.1481) lr 6.3188e-04 eta 15:13:07
epoch [37/50] batch [1120/2000] time 2.066 (2.037) data 0.000 (0.001) loss 2.0926 (1.1523) lr 6.3188e-04 eta 15:12:27
epoch [37/50] batch [1140/2000] time 2.066 (2.037) data 0.001 (0.001) loss 1.5122 (1.1532) lr 6.3188e-04 eta 15:11:45
epoch [37/50] batch [1160/2000] time 2.066 (2.037) data 0.000 (0.001) loss 1.7740 (1.1507) lr 6.3188e-04 eta 15:11:06
epoch [37/50] batch [1180/2000] time 2.008 (2.037) data 0.000 (0.001) loss 1.6753 (1.1530) lr 6.3188e-04 eta 15:10:24
epoch [37/50] batch [1200/2000] time 2.056 (2.037) data 0.000 (0.001) loss 0.7013 (1.1499) lr 6.3188e-04 eta 15:09:42
epoch [37/50] batch [1220/2000] time 1.976 (2.037) data 0.000 (0.001) loss 0.6062 (1.1482) lr 6.3188e-04 eta 15:08:58
epoch [37/50] batch [1240/2000] time 2.055 (2.037) data 0.000 (0.001) loss 0.7315 (1.1583) lr 6.3188e-04 eta 15:08:19
epoch [37/50] batch [1260/2000] time 2.002 (2.036) data 0.000 (0.001) loss 0.0858 (1.1579) lr 6.3188e-04 eta 15:07:35
epoch [37/50] batch [1280/2000] time 2.056 (2.037) data 0.000 (0.001) loss 0.6074 (1.1598) lr 6.3188e-04 eta 15:06:55
epoch [37/50] batch [1300/2000] time 2.001 (2.036) data 0.000 (0.001) loss 0.2070 (1.1646) lr 6.3188e-04 eta 15:06:11
epoch [37/50] batch [1320/2000] time 2.060 (2.036) data 0.000 (0.001) loss 1.1526 (1.1648) lr 6.3188e-04 eta 15:05:28
epoch [37/50] batch [1340/2000] time 2.065 (2.036) data 0.000 (0.001) loss 0.9781 (1.1660) lr 6.3188e-04 eta 15:04:48
epoch [37/50] batch [1360/2000] time 2.041 (2.036) data 0.000 (0.001) loss 1.3882 (1.1646) lr 6.3188e-04 eta 15:04:09
epoch [37/50] batch [1380/2000] time 2.063 (2.036) data 0.000 (0.001) loss 2.3535 (1.1619) lr 6.3188e-04 eta 15:03:31
epoch [37/50] batch [1400/2000] time 2.007 (2.037) data 0.001 (0.001) loss 0.3904 (1.1668) lr 6.3188e-04 eta 15:02:55
epoch [37/50] batch [1420/2000] time 2.059 (2.037) data 0.000 (0.001) loss 2.0221 (1.1655) lr 6.3188e-04 eta 15:02:16
epoch [37/50] batch [1440/2000] time 2.036 (2.037) data 0.000 (0.001) loss 0.8524 (1.1647) lr 6.3188e-04 eta 15:01:33
epoch [37/50] batch [1460/2000] time 2.059 (2.037) data 0.000 (0.001) loss 0.6546 (1.1627) lr 6.3188e-04 eta 15:00:53
epoch [37/50] batch [1480/2000] time 2.037 (2.037) data 0.001 (0.001) loss 0.6065 (1.1615) lr 6.3188e-04 eta 15:00:09
epoch [37/50] batch [1500/2000] time 2.060 (2.037) data 0.000 (0.001) loss 0.0841 (1.1607) lr 6.3188e-04 eta 14:59:28
epoch [37/50] batch [1520/2000] time 1.980 (2.036) data 0.000 (0.001) loss 1.5466 (1.1597) lr 6.3188e-04 eta 14:58:45
epoch [37/50] batch [1540/2000] time 2.058 (2.036) data 0.000 (0.001) loss 1.3472 (1.1583) lr 6.3188e-04 eta 14:58:01
epoch [37/50] batch [1560/2000] time 2.036 (2.036) data 0.000 (0.001) loss 0.3955 (1.1558) lr 6.3188e-04 eta 14:57:22
epoch [37/50] batch [1580/2000] time 2.002 (2.036) data 0.000 (0.001) loss 0.1656 (1.1580) lr 6.3188e-04 eta 14:56:38
epoch [37/50] batch [1600/2000] time 2.006 (2.036) data 0.000 (0.001) loss 1.0790 (1.1636) lr 6.3188e-04 eta 14:55:57
epoch [37/50] batch [1620/2000] time 2.062 (2.036) data 0.000 (0.001) loss 0.2964 (1.1605) lr 6.3188e-04 eta 14:55:21
epoch [37/50] batch [1640/2000] time 2.063 (2.036) data 0.000 (0.001) loss 2.0280 (1.1587) lr 6.3188e-04 eta 14:54:42
epoch [37/50] batch [1660/2000] time 2.059 (2.036) data 0.000 (0.001) loss 1.3437 (1.1604) lr 6.3188e-04 eta 14:53:59
epoch [37/50] batch [1680/2000] time 2.039 (2.037) data 0.001 (0.001) loss 3.0506 (1.1601) lr 6.3188e-04 eta 14:53:22
epoch [37/50] batch [1700/2000] time 2.059 (2.037) data 0.000 (0.001) loss 3.1831 (1.1640) lr 6.3188e-04 eta 14:52:42
epoch [37/50] batch [1720/2000] time 2.000 (2.037) data 0.000 (0.001) loss 0.3537 (1.1582) lr 6.3188e-04 eta 14:52:01
epoch [37/50] batch [1740/2000] time 2.062 (2.037) data 0.000 (0.001) loss 0.1327 (1.1558) lr 6.3188e-04 eta 14:51:20
epoch [37/50] batch [1760/2000] time 2.059 (2.037) data 0.000 (0.001) loss 0.8465 (1.1546) lr 6.3188e-04 eta 14:50:38
epoch [37/50] batch [1780/2000] time 2.058 (2.037) data 0.000 (0.001) loss 0.9769 (1.1495) lr 6.3188e-04 eta 14:49:58
epoch [37/50] batch [1800/2000] time 2.031 (2.037) data 0.000 (0.001) loss 0.1986 (1.1505) lr 6.3188e-04 eta 14:49:16
epoch [37/50] batch [1820/2000] time 2.003 (2.036) data 0.000 (0.001) loss 2.1407 (1.1553) lr 6.3188e-04 eta 14:48:33
epoch [37/50] batch [1840/2000] time 2.041 (2.036) data 0.000 (0.001) loss 0.4511 (1.1552) lr 6.3188e-04 eta 14:47:54
epoch [37/50] batch [1860/2000] time 2.062 (2.037) data 0.000 (0.001) loss 0.2714 (1.1552) lr 6.3188e-04 eta 14:47:15
epoch [37/50] batch [1880/2000] time 2.063 (2.037) data 0.000 (0.001) loss 2.0121 (1.1557) lr 6.3188e-04 eta 14:46:35
epoch [37/50] batch [1900/2000] time 2.008 (2.037) data 0.001 (0.001) loss 1.2632 (1.1551) lr 6.3188e-04 eta 14:45:55
epoch [37/50] batch [1920/2000] time 2.066 (2.037) data 0.000 (0.001) loss 0.8600 (1.1524) lr 6.3188e-04 eta 14:45:16
epoch [37/50] batch [1940/2000] time 2.064 (2.037) data 0.000 (0.001) loss 0.9699 (1.1522) lr 6.3188e-04 eta 14:44:37
epoch [37/50] batch [1960/2000] time 2.008 (2.037) data 0.000 (0.001) loss 0.8207 (1.1529) lr 6.3188e-04 eta 14:43:58
epoch [37/50] batch [1980/2000] time 1.983 (2.037) data 0.000 (0.001) loss 1.1055 (1.1565) lr 6.3188e-04 eta 14:43:19
epoch [37/50] batch [2000/2000] time 2.037 (2.037) data 0.000 (0.001) loss 1.6245 (1.1544) lr 5.7422e-04 eta 14:42:39
epoch [38/50] batch [20/2000] time 2.064 (2.079) data 0.000 (0.034) loss 0.6253 (1.1640) lr 5.7422e-04 eta 15:00:22
epoch [38/50] batch [40/2000] time 2.004 (2.058) data 0.000 (0.017) loss 1.2519 (1.0775) lr 5.7422e-04 eta 14:50:28
epoch [38/50] batch [60/2000] time 2.062 (2.053) data 0.001 (0.012) loss 0.0890 (1.1221) lr 5.7422e-04 eta 14:47:38
epoch [38/50] batch [80/2000] time 1.984 (2.051) data 0.000 (0.009) loss 0.2177 (1.1118) lr 5.7422e-04 eta 14:45:52
epoch [38/50] batch [100/2000] time 2.001 (2.050) data 0.000 (0.007) loss 0.9049 (1.1466) lr 5.7422e-04 eta 14:44:45
epoch [38/50] batch [120/2000] time 2.005 (2.049) data 0.000 (0.006) loss 0.3572 (1.1450) lr 5.7422e-04 eta 14:43:37
epoch [38/50] batch [140/2000] time 2.062 (2.047) data 0.000 (0.005) loss 0.6689 (1.1417) lr 5.7422e-04 eta 14:42:23
epoch [38/50] batch [160/2000] time 2.042 (2.046) data 0.000 (0.004) loss 1.2668 (1.1377) lr 5.7422e-04 eta 14:41:04
epoch [38/50] batch [180/2000] time 2.000 (2.045) data 0.000 (0.004) loss 0.6774 (1.1266) lr 5.7422e-04 eta 14:39:55
epoch [38/50] batch [200/2000] time 2.036 (2.044) data 0.000 (0.004) loss 1.6828 (1.1037) lr 5.7422e-04 eta 14:38:43
epoch [38/50] batch [220/2000] time 2.063 (2.042) data 0.000 (0.003) loss 0.5804 (1.0945) lr 5.7422e-04 eta 14:37:32
epoch [38/50] batch [240/2000] time 2.039 (2.042) data 0.000 (0.003) loss 1.7147 (1.1127) lr 5.7422e-04 eta 14:36:31
epoch [38/50] batch [260/2000] time 2.063 (2.042) data 0.000 (0.003) loss 1.0119 (1.1152) lr 5.7422e-04 eta 14:35:54
epoch [38/50] batch [280/2000] time 2.061 (2.042) data 0.000 (0.003) loss 1.7653 (1.1108) lr 5.7422e-04 eta 14:35:19
epoch [38/50] batch [300/2000] time 2.063 (2.042) data 0.000 (0.002) loss 0.3383 (1.0949) lr 5.7422e-04 eta 14:34:48
epoch [38/50] batch [320/2000] time 2.066 (2.042) data 0.000 (0.002) loss 1.3420 (1.1007) lr 5.7422e-04 eta 14:33:50
epoch [38/50] batch [340/2000] time 2.005 (2.042) data 0.000 (0.002) loss 0.1830 (1.1139) lr 5.7422e-04 eta 14:33:06
epoch [38/50] batch [360/2000] time 2.063 (2.042) data 0.000 (0.002) loss 0.0247 (1.1252) lr 5.7422e-04 eta 14:32:39
epoch [38/50] batch [380/2000] time 1.978 (2.042) data 0.000 (0.002) loss 0.1441 (1.1245) lr 5.7422e-04 eta 14:31:51
epoch [38/50] batch [400/2000] time 2.035 (2.041) data 0.000 (0.002) loss 1.6102 (1.1159) lr 5.7422e-04 eta 14:30:58
epoch [38/50] batch [420/2000] time 2.001 (2.041) data 0.000 (0.002) loss 1.6598 (1.1345) lr 5.7422e-04 eta 14:30:08
epoch [38/50] batch [440/2000] time 2.061 (2.041) data 0.000 (0.002) loss 0.1522 (1.1333) lr 5.7422e-04 eta 14:29:16
epoch [38/50] batch [460/2000] time 2.056 (2.040) data 0.000 (0.002) loss 0.7496 (1.1347) lr 5.7422e-04 eta 14:28:31
epoch [38/50] batch [480/2000] time 2.005 (2.040) data 0.000 (0.002) loss 1.1578 (1.1299) lr 5.7422e-04 eta 14:27:48
epoch [38/50] batch [500/2000] time 2.056 (2.040) data 0.000 (0.002) loss 0.1551 (1.1452) lr 5.7422e-04 eta 14:27:05
epoch [38/50] batch [520/2000] time 1.981 (2.040) data 0.000 (0.002) loss 1.2079 (1.1460) lr 5.7422e-04 eta 14:26:21
epoch [38/50] batch [540/2000] time 2.006 (2.040) data 0.000 (0.001) loss 2.6728 (1.1485) lr 5.7422e-04 eta 14:25:44
epoch [38/50] batch [560/2000] time 2.034 (2.040) data 0.000 (0.001) loss 0.3434 (1.1513) lr 5.7422e-04 eta 14:25:07
epoch [38/50] batch [580/2000] time 2.059 (2.040) data 0.000 (0.001) loss 3.1496 (1.1435) lr 5.7422e-04 eta 14:24:19
epoch [38/50] batch [600/2000] time 2.001 (2.040) data 0.001 (0.001) loss 1.4070 (1.1387) lr 5.7422e-04 eta 14:23:28
epoch [38/50] batch [620/2000] time 2.062 (2.040) data 0.000 (0.001) loss 0.6214 (1.1445) lr 5.7422e-04 eta 14:22:43
epoch [38/50] batch [640/2000] time 2.001 (2.039) data 0.000 (0.001) loss 0.9791 (1.1374) lr 5.7422e-04 eta 14:21:52
epoch [38/50] batch [660/2000] time 2.002 (2.039) data 0.000 (0.001) loss 0.6657 (1.1392) lr 5.7422e-04 eta 14:21:02
epoch [38/50] batch [680/2000] time 2.007 (2.039) data 0.000 (0.001) loss 0.9594 (1.1282) lr 5.7422e-04 eta 14:20:19
epoch [38/50] batch [700/2000] time 2.066 (2.039) data 0.000 (0.001) loss 0.5373 (1.1170) lr 5.7422e-04 eta 14:19:35
epoch [38/50] batch [720/2000] time 1.978 (2.039) data 0.000 (0.001) loss 0.0165 (1.1233) lr 5.7422e-04 eta 14:19:00
epoch [38/50] batch [740/2000] time 2.056 (2.039) data 0.000 (0.001) loss 0.0671 (1.1113) lr 5.7422e-04 eta 14:18:12
epoch [38/50] batch [760/2000] time 2.056 (2.038) data 0.000 (0.001) loss 1.3677 (1.1063) lr 5.7422e-04 eta 14:17:31
epoch [38/50] batch [780/2000] time 2.037 (2.038) data 0.000 (0.001) loss 1.2464 (1.1067) lr 5.7422e-04 eta 14:16:48
epoch [38/50] batch [800/2000] time 2.064 (2.038) data 0.000 (0.001) loss 1.8083 (1.1130) lr 5.7422e-04 eta 14:16:07
epoch [38/50] batch [820/2000] time 2.038 (2.038) data 0.000 (0.001) loss 2.2245 (1.1091) lr 5.7422e-04 eta 14:15:25
epoch [38/50] batch [840/2000] time 2.040 (2.038) data 0.000 (0.001) loss 1.3399 (1.1115) lr 5.7422e-04 eta 14:14:46
epoch [38/50] batch [860/2000] time 2.006 (2.038) data 0.000 (0.001) loss 0.1591 (1.1139) lr 5.7422e-04 eta 14:14:04
epoch [38/50] batch [880/2000] time 2.040 (2.038) data 0.000 (0.001) loss 0.4843 (1.1163) lr 5.7422e-04 eta 14:13:23
epoch [38/50] batch [900/2000] time 2.005 (2.038) data 0.000 (0.001) loss 0.5398 (1.1229) lr 5.7422e-04 eta 14:12:45
epoch [38/50] batch [920/2000] time 2.065 (2.039) data 0.000 (0.001) loss 0.1775 (1.1168) lr 5.7422e-04 eta 14:12:05
epoch [38/50] batch [940/2000] time 2.065 (2.038) data 0.000 (0.001) loss 1.6345 (1.1263) lr 5.7422e-04 eta 14:11:24
epoch [38/50] batch [960/2000] time 2.066 (2.039) data 0.000 (0.001) loss 1.1194 (1.1293) lr 5.7422e-04 eta 14:10:46
epoch [38/50] batch [980/2000] time 2.005 (2.039) data 0.000 (0.001) loss 0.4660 (1.1369) lr 5.7422e-04 eta 14:10:08
epoch [38/50] batch [1000/2000] time 2.043 (2.039) data 0.000 (0.001) loss 0.6626 (1.1357) lr 5.7422e-04 eta 14:09:29
epoch [38/50] batch [1020/2000] time 2.065 (2.039) data 0.000 (0.001) loss 0.7004 (1.1361) lr 5.7422e-04 eta 14:08:52
epoch [38/50] batch [1040/2000] time 2.037 (2.039) data 0.000 (0.001) loss 1.1838 (1.1353) lr 5.7422e-04 eta 14:08:11
epoch [38/50] batch [1060/2000] time 2.061 (2.039) data 0.000 (0.001) loss 0.3340 (1.1331) lr 5.7422e-04 eta 14:07:28
epoch [38/50] batch [1080/2000] time 2.059 (2.039) data 0.000 (0.001) loss 0.3972 (1.1335) lr 5.7422e-04 eta 14:06:49
epoch [38/50] batch [1100/2000] time 2.032 (2.039) data 0.000 (0.001) loss 0.3421 (1.1310) lr 5.7422e-04 eta 14:06:08
epoch [38/50] batch [1120/2000] time 2.002 (2.039) data 0.000 (0.001) loss 2.8015 (1.1270) lr 5.7422e-04 eta 14:05:24
epoch [38/50] batch [1140/2000] time 2.037 (2.039) data 0.001 (0.001) loss 2.6802 (1.1267) lr 5.7422e-04 eta 14:04:42
epoch [38/50] batch [1160/2000] time 2.035 (2.039) data 0.000 (0.001) loss 1.7567 (1.1275) lr 5.7422e-04 eta 14:03:59
epoch [38/50] batch [1180/2000] time 2.064 (2.039) data 0.000 (0.001) loss 1.4630 (1.1269) lr 5.7422e-04 eta 14:03:19
epoch [38/50] batch [1200/2000] time 2.006 (2.039) data 0.000 (0.001) loss 3.4454 (1.1271) lr 5.7422e-04 eta 14:02:37
epoch [38/50] batch [1220/2000] time 2.065 (2.039) data 0.000 (0.001) loss 0.3058 (1.1255) lr 5.7422e-04 eta 14:01:55
epoch [38/50] batch [1240/2000] time 2.063 (2.039) data 0.000 (0.001) loss 0.4210 (1.1184) lr 5.7422e-04 eta 14:01:13
epoch [38/50] batch [1260/2000] time 2.062 (2.039) data 0.000 (0.001) loss 2.1646 (1.1187) lr 5.7422e-04 eta 14:00:33
epoch [38/50] batch [1280/2000] time 2.064 (2.039) data 0.000 (0.001) loss 2.1303 (1.1181) lr 5.7422e-04 eta 13:59:53
epoch [38/50] batch [1300/2000] time 2.007 (2.039) data 0.000 (0.001) loss 0.1684 (1.1187) lr 5.7422e-04 eta 13:59:11
epoch [38/50] batch [1320/2000] time 2.002 (2.039) data 0.000 (0.001) loss 1.2546 (1.1201) lr 5.7422e-04 eta 13:58:31
epoch [38/50] batch [1340/2000] time 2.033 (2.038) data 0.000 (0.001) loss 1.7630 (1.1207) lr 5.7422e-04 eta 13:57:46
epoch [38/50] batch [1360/2000] time 2.059 (2.038) data 0.000 (0.001) loss 1.0335 (1.1195) lr 5.7422e-04 eta 13:57:04
epoch [38/50] batch [1380/2000] time 2.035 (2.038) data 0.000 (0.001) loss 1.0108 (1.1191) lr 5.7422e-04 eta 13:56:21
epoch [38/50] batch [1400/2000] time 2.034 (2.038) data 0.000 (0.001) loss 2.4872 (1.1214) lr 5.7422e-04 eta 13:55:37
epoch [38/50] batch [1420/2000] time 2.059 (2.038) data 0.000 (0.001) loss 2.9835 (1.1196) lr 5.7422e-04 eta 13:54:56
epoch [38/50] batch [1440/2000] time 2.033 (2.038) data 0.000 (0.001) loss 1.1849 (1.1214) lr 5.7422e-04 eta 13:54:15
epoch [38/50] batch [1460/2000] time 1.981 (2.038) data 0.000 (0.001) loss 0.9801 (1.1248) lr 5.7422e-04 eta 13:53:34
epoch [38/50] batch [1480/2000] time 2.036 (2.038) data 0.000 (0.001) loss 1.7627 (1.1253) lr 5.7422e-04 eta 13:52:55
epoch [38/50] batch [1500/2000] time 2.035 (2.038) data 0.000 (0.001) loss 0.5461 (1.1276) lr 5.7422e-04 eta 13:52:13
epoch [38/50] batch [1520/2000] time 2.002 (2.038) data 0.000 (0.001) loss 0.6514 (1.1297) lr 5.7422e-04 eta 13:51:29
epoch [38/50] batch [1540/2000] time 1.975 (2.038) data 0.000 (0.001) loss 0.2321 (1.1264) lr 5.7422e-04 eta 13:50:47
epoch [38/50] batch [1560/2000] time 2.000 (2.038) data 0.000 (0.001) loss 2.3306 (1.1249) lr 5.7422e-04 eta 13:50:05
epoch [38/50] batch [1580/2000] time 2.056 (2.038) data 0.000 (0.001) loss 5.3161 (1.1313) lr 5.7422e-04 eta 13:49:22
epoch [38/50] batch [1600/2000] time 2.001 (2.038) data 0.000 (0.001) loss 1.0898 (1.1326) lr 5.7422e-04 eta 13:48:39
epoch [38/50] batch [1620/2000] time 2.058 (2.038) data 0.000 (0.001) loss 2.3810 (1.1368) lr 5.7422e-04 eta 13:47:59
epoch [38/50] batch [1640/2000] time 2.003 (2.038) data 0.000 (0.001) loss 1.1424 (1.1383) lr 5.7422e-04 eta 13:47:18
epoch [38/50] batch [1660/2000] time 2.057 (2.038) data 0.000 (0.001) loss 1.2549 (1.1357) lr 5.7422e-04 eta 13:46:38
epoch [38/50] batch [1680/2000] time 2.037 (2.038) data 0.001 (0.001) loss 3.2692 (1.1389) lr 5.7422e-04 eta 13:45:54
epoch [38/50] batch [1700/2000] time 2.039 (2.038) data 0.000 (0.001) loss 1.5176 (1.1455) lr 5.7422e-04 eta 13:45:12
epoch [38/50] batch [1720/2000] time 1.974 (2.037) data 0.000 (0.001) loss 1.2377 (1.1497) lr 5.7422e-04 eta 13:44:30
epoch [38/50] batch [1740/2000] time 2.037 (2.037) data 0.000 (0.001) loss 0.3083 (1.1510) lr 5.7422e-04 eta 13:43:47
epoch [38/50] batch [1760/2000] time 2.065 (2.037) data 0.000 (0.001) loss 1.0829 (1.1545) lr 5.7422e-04 eta 13:43:05
epoch [38/50] batch [1780/2000] time 2.040 (2.037) data 0.000 (0.001) loss 0.5071 (1.1531) lr 5.7422e-04 eta 13:42:23
epoch [38/50] batch [1800/2000] time 2.007 (2.037) data 0.000 (0.001) loss 0.1143 (1.1547) lr 5.7422e-04 eta 13:41:43
epoch [38/50] batch [1820/2000] time 2.062 (2.037) data 0.000 (0.001) loss 0.2973 (1.1508) lr 5.7422e-04 eta 13:41:04
epoch [38/50] batch [1840/2000] time 2.057 (2.037) data 0.000 (0.001) loss 2.3408 (1.1522) lr 5.7422e-04 eta 13:40:24
epoch [38/50] batch [1860/2000] time 2.003 (2.037) data 0.000 (0.001) loss 0.5868 (1.1482) lr 5.7422e-04 eta 13:39:41
epoch [38/50] batch [1880/2000] time 2.060 (2.037) data 0.000 (0.001) loss 0.6340 (1.1470) lr 5.7422e-04 eta 13:39:01
epoch [38/50] batch [1900/2000] time 2.004 (2.037) data 0.000 (0.001) loss 1.7539 (1.1486) lr 5.7422e-04 eta 13:38:20
epoch [38/50] batch [1920/2000] time 2.059 (2.037) data 0.000 (0.001) loss 0.2706 (1.1487) lr 5.7422e-04 eta 13:37:37
epoch [38/50] batch [1940/2000] time 2.060 (2.037) data 0.000 (0.001) loss 0.5631 (1.1492) lr 5.7422e-04 eta 13:36:59
epoch [38/50] batch [1960/2000] time 2.034 (2.037) data 0.000 (0.001) loss 1.3752 (1.1501) lr 5.7422e-04 eta 13:36:17
epoch [38/50] batch [1980/2000] time 2.057 (2.037) data 0.000 (0.001) loss 1.2710 (1.1516) lr 5.7422e-04 eta 13:35:37
epoch [38/50] batch [2000/2000] time 2.055 (2.037) data 0.000 (0.001) loss 1.8661 (1.1540) lr 5.1825e-04 eta 13:34:56
epoch [39/50] batch [20/2000] time 2.002 (2.076) data 0.000 (0.036) loss 2.3905 (0.9413) lr 5.1825e-04 eta 13:49:44
epoch [39/50] batch [40/2000] time 2.000 (2.054) data 0.000 (0.018) loss 1.5984 (1.0314) lr 5.1825e-04 eta 13:40:10
epoch [39/50] batch [60/2000] time 2.060 (2.050) data 0.001 (0.012) loss 1.2093 (1.0354) lr 5.1825e-04 eta 13:37:58
epoch [39/50] batch [80/2000] time 2.060 (2.046) data 0.000 (0.009) loss 0.8558 (1.0706) lr 5.1825e-04 eta 13:35:46
epoch [39/50] batch [100/2000] time 2.058 (2.044) data 0.000 (0.007) loss 0.4344 (1.0808) lr 5.1825e-04 eta 13:34:22
epoch [39/50] batch [120/2000] time 2.059 (2.044) data 0.000 (0.006) loss 2.7208 (1.1154) lr 5.1825e-04 eta 13:33:22
epoch [39/50] batch [140/2000] time 2.061 (2.043) data 0.000 (0.005) loss 0.1012 (1.1121) lr 5.1825e-04 eta 13:32:27
epoch [39/50] batch [160/2000] time 2.060 (2.042) data 0.000 (0.005) loss 2.4005 (1.1374) lr 5.1825e-04 eta 13:31:17
epoch [39/50] batch [180/2000] time 2.006 (2.041) data 0.000 (0.004) loss 0.2472 (1.1233) lr 5.1825e-04 eta 13:30:16
epoch [39/50] batch [200/2000] time 2.034 (2.040) data 0.000 (0.004) loss 0.9050 (1.1022) lr 5.1825e-04 eta 13:29:21
epoch [39/50] batch [220/2000] time 2.036 (2.040) data 0.000 (0.003) loss 0.6938 (1.0734) lr 5.1825e-04 eta 13:28:32
epoch [39/50] batch [240/2000] time 2.033 (2.039) data 0.000 (0.003) loss 1.3265 (1.0630) lr 5.1825e-04 eta 13:27:33
epoch [39/50] batch [260/2000] time 2.033 (2.039) data 0.000 (0.003) loss 0.3291 (1.0597) lr 5.1825e-04 eta 13:26:45
epoch [39/50] batch [280/2000] time 2.056 (2.039) data 0.000 (0.003) loss 0.7122 (1.0683) lr 5.1825e-04 eta 13:25:56
epoch [39/50] batch [300/2000] time 2.057 (2.038) data 0.000 (0.003) loss 0.3290 (1.0704) lr 5.1825e-04 eta 13:24:59
epoch [39/50] batch [320/2000] time 2.060 (2.038) data 0.000 (0.002) loss 0.6592 (1.0641) lr 5.1825e-04 eta 13:24:11
epoch [39/50] batch [340/2000] time 2.000 (2.037) data 0.000 (0.002) loss 0.2806 (1.0774) lr 5.1825e-04 eta 13:23:16
epoch [39/50] batch [360/2000] time 2.061 (2.037) data 0.000 (0.002) loss 0.4865 (1.0846) lr 5.1825e-04 eta 13:22:32
epoch [39/50] batch [380/2000] time 2.058 (2.037) data 0.000 (0.002) loss 1.6951 (1.0958) lr 5.1825e-04 eta 13:21:56
epoch [39/50] batch [400/2000] time 2.035 (2.037) data 0.000 (0.002) loss 0.4422 (1.1012) lr 5.1825e-04 eta 13:21:20
epoch [39/50] batch [420/2000] time 2.036 (2.037) data 0.000 (0.002) loss 2.5241 (1.1069) lr 5.1825e-04 eta 13:20:25
epoch [39/50] batch [440/2000] time 2.058 (2.037) data 0.000 (0.002) loss 2.3381 (1.1127) lr 5.1825e-04 eta 13:19:41
epoch [39/50] batch [460/2000] time 2.063 (2.037) data 0.000 (0.002) loss 2.7125 (1.1319) lr 5.1825e-04 eta 13:19:04
epoch [39/50] batch [480/2000] time 2.039 (2.037) data 0.000 (0.002) loss 0.4732 (1.1364) lr 5.1825e-04 eta 13:18:25
epoch [39/50] batch [500/2000] time 2.062 (2.037) data 0.000 (0.002) loss 3.0038 (1.1471) lr 5.1825e-04 eta 13:17:55
epoch [39/50] batch [520/2000] time 2.007 (2.037) data 0.000 (0.002) loss 0.1261 (1.1496) lr 5.1825e-04 eta 13:17:16
epoch [39/50] batch [540/2000] time 2.036 (2.037) data 0.000 (0.002) loss 0.4941 (1.1539) lr 5.1825e-04 eta 13:16:37
epoch [39/50] batch [560/2000] time 1.975 (2.037) data 0.000 (0.002) loss 0.3612 (1.1599) lr 5.1825e-04 eta 13:15:55
epoch [39/50] batch [580/2000] time 2.035 (2.037) data 0.000 (0.001) loss 0.7421 (1.1509) lr 5.1825e-04 eta 13:15:12
epoch [39/50] batch [600/2000] time 2.000 (2.037) data 0.001 (0.001) loss 2.4217 (1.1712) lr 5.1825e-04 eta 13:14:28
epoch [39/50] batch [620/2000] time 2.037 (2.037) data 0.000 (0.001) loss 0.3181 (1.1702) lr 5.1825e-04 eta 13:13:47
epoch [39/50] batch [640/2000] time 2.066 (2.037) data 0.000 (0.001) loss 0.4453 (1.1688) lr 5.1825e-04 eta 13:13:12
epoch [39/50] batch [660/2000] time 2.004 (2.038) data 0.000 (0.001) loss 1.0533 (1.1687) lr 5.1825e-04 eta 13:12:35
epoch [39/50] batch [680/2000] time 2.059 (2.037) data 0.000 (0.001) loss 0.8307 (1.1731) lr 5.1825e-04 eta 13:11:51
epoch [39/50] batch [700/2000] time 2.055 (2.037) data 0.000 (0.001) loss 2.3862 (1.1743) lr 5.1825e-04 eta 13:11:09
epoch [39/50] batch [720/2000] time 2.000 (2.037) data 0.000 (0.001) loss 1.1664 (1.1684) lr 5.1825e-04 eta 13:10:21
epoch [39/50] batch [740/2000] time 2.053 (2.037) data 0.000 (0.001) loss 0.8800 (1.1672) lr 5.1825e-04 eta 13:09:37
epoch [39/50] batch [760/2000] time 2.031 (2.037) data 0.000 (0.001) loss 0.7094 (1.1632) lr 5.1825e-04 eta 13:08:54
epoch [39/50] batch [780/2000] time 2.031 (2.037) data 0.000 (0.001) loss 0.5366 (1.1576) lr 5.1825e-04 eta 13:08:13
epoch [39/50] batch [800/2000] time 2.000 (2.037) data 0.000 (0.001) loss 1.3936 (1.1679) lr 5.1825e-04 eta 13:07:30
epoch [39/50] batch [820/2000] time 2.054 (2.037) data 0.000 (0.001) loss 0.2070 (1.1747) lr 5.1825e-04 eta 13:06:48
epoch [39/50] batch [840/2000] time 1.999 (2.036) data 0.000 (0.001) loss 1.0796 (1.1734) lr 5.1825e-04 eta 13:06:02
epoch [39/50] batch [860/2000] time 2.030 (2.036) data 0.000 (0.001) loss 1.1086 (1.1692) lr 5.1825e-04 eta 13:05:14
epoch [39/50] batch [880/2000] time 2.055 (2.036) data 0.000 (0.001) loss 0.3843 (1.1692) lr 5.1825e-04 eta 13:04:33
epoch [39/50] batch [900/2000] time 2.057 (2.036) data 0.000 (0.001) loss 0.2871 (1.1684) lr 5.1825e-04 eta 13:03:52
epoch [39/50] batch [920/2000] time 2.031 (2.036) data 0.000 (0.001) loss 0.1048 (1.1784) lr 5.1825e-04 eta 13:03:10
epoch [39/50] batch [940/2000] time 2.035 (2.036) data 0.000 (0.001) loss 0.8358 (1.1784) lr 5.1825e-04 eta 13:02:26
epoch [39/50] batch [960/2000] time 2.056 (2.036) data 0.000 (0.001) loss 2.3882 (1.1821) lr 5.1825e-04 eta 13:01:48
epoch [39/50] batch [980/2000] time 2.054 (2.036) data 0.000 (0.001) loss 0.4797 (1.1810) lr 5.1825e-04 eta 13:01:04
epoch [39/50] batch [1000/2000] time 2.054 (2.036) data 0.000 (0.001) loss 0.6637 (1.1773) lr 5.1825e-04 eta 13:00:23
epoch [39/50] batch [1020/2000] time 2.051 (2.036) data 0.000 (0.001) loss 0.0216 (1.1783) lr 5.1825e-04 eta 12:59:37
epoch [39/50] batch [1040/2000] time 2.057 (2.036) data 0.000 (0.001) loss 1.3318 (1.1721) lr 5.1825e-04 eta 12:58:55
epoch [39/50] batch [1060/2000] time 2.029 (2.035) data 0.000 (0.001) loss 1.3391 (1.1709) lr 5.1825e-04 eta 12:58:13
epoch [39/50] batch [1080/2000] time 2.032 (2.035) data 0.000 (0.001) loss 0.6246 (1.1699) lr 5.1825e-04 eta 12:57:31
epoch [39/50] batch [1100/2000] time 1.975 (2.035) data 0.000 (0.001) loss 0.1424 (1.1629) lr 5.1825e-04 eta 12:56:48
epoch [39/50] batch [1120/2000] time 2.054 (2.035) data 0.000 (0.001) loss 0.8296 (1.1597) lr 5.1825e-04 eta 12:56:09
epoch [39/50] batch [1140/2000] time 2.054 (2.035) data 0.001 (0.001) loss 2.5801 (1.1604) lr 5.1825e-04 eta 12:55:30
epoch [39/50] batch [1160/2000] time 2.055 (2.035) data 0.000 (0.001) loss 0.6602 (1.1568) lr 5.1825e-04 eta 12:54:47
epoch [39/50] batch [1180/2000] time 2.031 (2.035) data 0.000 (0.001) loss 0.3245 (1.1557) lr 5.1825e-04 eta 12:54:06
epoch [39/50] batch [1200/2000] time 1.996 (2.035) data 0.000 (0.001) loss 1.1459 (1.1540) lr 5.1825e-04 eta 12:53:21
epoch [39/50] batch [1220/2000] time 2.055 (2.035) data 0.000 (0.001) loss 2.0092 (1.1514) lr 5.1825e-04 eta 12:52:42
epoch [39/50] batch [1240/2000] time 2.056 (2.035) data 0.000 (0.001) loss 0.2654 (1.1525) lr 5.1825e-04 eta 12:52:01
epoch [39/50] batch [1260/2000] time 2.001 (2.035) data 0.000 (0.001) loss 1.3575 (1.1560) lr 5.1825e-04 eta 12:51:20
epoch [39/50] batch [1280/2000] time 2.058 (2.035) data 0.000 (0.001) loss 0.3364 (1.1523) lr 5.1825e-04 eta 12:50:38
epoch [39/50] batch [1300/2000] time 2.034 (2.035) data 0.000 (0.001) loss 0.2002 (1.1540) lr 5.1825e-04 eta 12:49:55
epoch [39/50] batch [1320/2000] time 2.059 (2.035) data 0.000 (0.001) loss 1.1506 (1.1507) lr 5.1825e-04 eta 12:49:14
epoch [39/50] batch [1340/2000] time 2.056 (2.035) data 0.000 (0.001) loss 0.4807 (1.1473) lr 5.1825e-04 eta 12:48:34
epoch [39/50] batch [1360/2000] time 2.059 (2.035) data 0.000 (0.001) loss 1.0962 (1.1466) lr 5.1825e-04 eta 12:47:52
epoch [39/50] batch [1380/2000] time 2.048 (2.035) data 0.000 (0.001) loss 1.1156 (1.1515) lr 5.1825e-04 eta 12:47:11
epoch [39/50] batch [1400/2000] time 2.030 (2.035) data 0.000 (0.001) loss 0.3822 (1.1514) lr 5.1825e-04 eta 12:46:28
epoch [39/50] batch [1420/2000] time 2.032 (2.035) data 0.000 (0.001) loss 0.5219 (1.1522) lr 5.1825e-04 eta 12:45:42
epoch [39/50] batch [1440/2000] time 2.050 (2.035) data 0.000 (0.001) loss 0.2454 (1.1524) lr 5.1825e-04 eta 12:45:00
epoch [39/50] batch [1460/2000] time 1.996 (2.034) data 0.000 (0.001) loss 0.1396 (1.1487) lr 5.1825e-04 eta 12:44:16
epoch [39/50] batch [1480/2000] time 2.025 (2.034) data 0.000 (0.001) loss 0.3438 (1.1529) lr 5.1825e-04 eta 12:43:33
epoch [39/50] batch [1500/2000] time 2.048 (2.034) data 0.000 (0.001) loss 0.8114 (1.1515) lr 5.1825e-04 eta 12:42:49
epoch [39/50] batch [1520/2000] time 2.031 (2.034) data 0.000 (0.001) loss 1.7260 (1.1505) lr 5.1825e-04 eta 12:42:06
epoch [39/50] batch [1540/2000] time 2.055 (2.034) data 0.000 (0.001) loss 0.3973 (1.1488) lr 5.1825e-04 eta 12:41:24
epoch [39/50] batch [1560/2000] time 2.023 (2.034) data 0.000 (0.001) loss 1.0810 (1.1470) lr 5.1825e-04 eta 12:40:43
epoch [39/50] batch [1580/2000] time 1.976 (2.034) data 0.000 (0.001) loss 3.3209 (1.1469) lr 5.1825e-04 eta 12:39:59
epoch [39/50] batch [1600/2000] time 2.034 (2.034) data 0.000 (0.001) loss 0.7049 (1.1455) lr 5.1825e-04 eta 12:39:19
epoch [39/50] batch [1620/2000] time 2.033 (2.034) data 0.000 (0.001) loss 4.1067 (1.1491) lr 5.1825e-04 eta 12:38:40
epoch [39/50] batch [1640/2000] time 2.056 (2.034) data 0.000 (0.001) loss 0.5667 (1.1450) lr 5.1825e-04 eta 12:37:59
epoch [39/50] batch [1660/2000] time 2.032 (2.034) data 0.000 (0.001) loss 1.1268 (1.1484) lr 5.1825e-04 eta 12:37:17
epoch [39/50] batch [1680/2000] time 2.053 (2.034) data 0.001 (0.001) loss 1.9403 (1.1487) lr 5.1825e-04 eta 12:36:36
epoch [39/50] batch [1700/2000] time 2.049 (2.034) data 0.000 (0.001) loss 0.2350 (1.1484) lr 5.1825e-04 eta 12:35:53
epoch [39/50] batch [1720/2000] time 1.971 (2.034) data 0.000 (0.001) loss 1.1787 (1.1465) lr 5.1825e-04 eta 12:35:09
epoch [39/50] batch [1740/2000] time 1.973 (2.034) data 0.000 (0.001) loss 1.0328 (1.1475) lr 5.1825e-04 eta 12:34:26
epoch [39/50] batch [1760/2000] time 2.046 (2.033) data 0.000 (0.001) loss 1.3078 (1.1483) lr 5.1825e-04 eta 12:33:43
epoch [39/50] batch [1780/2000] time 1.998 (2.033) data 0.000 (0.001) loss 0.6014 (1.1475) lr 5.1825e-04 eta 12:33:02
epoch [39/50] batch [1800/2000] time 2.025 (2.033) data 0.000 (0.001) loss 2.6433 (1.1491) lr 5.1825e-04 eta 12:32:20
epoch [39/50] batch [1820/2000] time 2.049 (2.033) data 0.000 (0.001) loss 1.1015 (1.1540) lr 5.1825e-04 eta 12:31:36
epoch [39/50] batch [1840/2000] time 1.994 (2.033) data 0.000 (0.001) loss 1.5657 (1.1535) lr 5.1825e-04 eta 12:30:53
epoch [39/50] batch [1860/2000] time 1.997 (2.033) data 0.000 (0.001) loss 0.8893 (1.1537) lr 5.1825e-04 eta 12:30:11
epoch [39/50] batch [1880/2000] time 1.997 (2.033) data 0.000 (0.001) loss 0.0431 (1.1531) lr 5.1825e-04 eta 12:29:28
epoch [39/50] batch [1900/2000] time 2.027 (2.033) data 0.000 (0.001) loss 0.0682 (1.1533) lr 5.1825e-04 eta 12:28:46
epoch [39/50] batch [1920/2000] time 2.049 (2.033) data 0.000 (0.001) loss 0.2716 (1.1545) lr 5.1825e-04 eta 12:28:05
epoch [39/50] batch [1940/2000] time 2.027 (2.033) data 0.000 (0.001) loss 0.7202 (1.1568) lr 5.1825e-04 eta 12:27:24
epoch [39/50] batch [1960/2000] time 1.997 (2.033) data 0.000 (0.001) loss 0.7617 (1.1575) lr 5.1825e-04 eta 12:26:42
epoch [39/50] batch [1980/2000] time 2.053 (2.033) data 0.000 (0.001) loss 2.9755 (1.1579) lr 5.1825e-04 eta 12:26:01
epoch [39/50] batch [2000/2000] time 2.051 (2.033) data 0.000 (0.001) loss 0.0715 (1.1574) lr 4.6417e-04 eta 12:25:19
epoch [40/50] batch [20/2000] time 2.001 (2.057) data 0.000 (0.028) loss 4.3916 (1.0538) lr 4.6417e-04 eta 12:33:25
epoch [40/50] batch [40/2000] time 2.048 (2.045) data 0.000 (0.014) loss 0.4729 (1.2449) lr 4.6417e-04 eta 12:28:31
epoch [40/50] batch [60/2000] time 2.050 (2.038) data 0.001 (0.010) loss 1.2754 (1.2548) lr 4.6417e-04 eta 12:25:19
epoch [40/50] batch [80/2000] time 1.996 (2.037) data 0.000 (0.007) loss 3.1663 (1.2147) lr 4.6417e-04 eta 12:24:00
epoch [40/50] batch [100/2000] time 1.972 (2.034) data 0.000 (0.006) loss 0.7834 (1.2108) lr 4.6417e-04 eta 12:22:20
epoch [40/50] batch [120/2000] time 1.993 (2.033) data 0.000 (0.005) loss 1.5107 (1.2278) lr 4.6417e-04 eta 12:21:30
epoch [40/50] batch [140/2000] time 2.027 (2.032) data 0.000 (0.004) loss 1.3254 (1.2286) lr 4.6417e-04 eta 12:20:22
epoch [40/50] batch [160/2000] time 1.995 (2.033) data 0.000 (0.004) loss 1.5341 (1.2223) lr 4.6417e-04 eta 12:19:53
epoch [40/50] batch [180/2000] time 2.048 (2.032) data 0.000 (0.003) loss 2.9184 (1.2482) lr 4.6417e-04 eta 12:18:57
epoch [40/50] batch [200/2000] time 1.973 (2.032) data 0.000 (0.003) loss 1.2161 (1.2495) lr 4.6417e-04 eta 12:18:12
epoch [40/50] batch [220/2000] time 1.998 (2.032) data 0.000 (0.003) loss 0.2640 (1.2289) lr 4.6417e-04 eta 12:17:34
epoch [40/50] batch [240/2000] time 2.050 (2.032) data 0.000 (0.003) loss 0.3392 (1.2317) lr 4.6417e-04 eta 12:16:53
epoch [40/50] batch [260/2000] time 2.051 (2.031) data 0.000 (0.002) loss 0.2797 (1.2273) lr 4.6417e-04 eta 12:16:03
epoch [40/50] batch [280/2000] time 2.051 (2.031) data 0.000 (0.002) loss 0.9138 (1.2250) lr 4.6417e-04 eta 12:15:24
epoch [40/50] batch [300/2000] time 2.051 (2.031) data 0.000 (0.002) loss 1.4652 (1.2254) lr 4.6417e-04 eta 12:14:39
epoch [40/50] batch [320/2000] time 2.048 (2.031) data 0.000 (0.002) loss 0.9951 (1.2326) lr 4.6417e-04 eta 12:14:02
epoch [40/50] batch [340/2000] time 2.048 (2.032) data 0.000 (0.002) loss 0.5484 (1.2310) lr 4.6417e-04 eta 12:13:22
epoch [40/50] batch [360/2000] time 1.972 (2.031) data 0.000 (0.002) loss 0.4450 (1.2285) lr 4.6417e-04 eta 12:12:35
epoch [40/50] batch [380/2000] time 2.001 (2.032) data 0.000 (0.002) loss 2.6752 (1.2196) lr 4.6417e-04 eta 12:12:02
epoch [40/50] batch [400/2000] time 2.052 (2.032) data 0.000 (0.002) loss 1.1935 (1.2355) lr 4.6417e-04 eta 12:11:29
epoch [40/50] batch [420/2000] time 1.974 (2.032) data 0.000 (0.002) loss 0.0191 (1.2341) lr 4.6417e-04 eta 12:10:43
epoch [40/50] batch [440/2000] time 2.051 (2.032) data 0.000 (0.001) loss 1.5180 (1.2232) lr 4.6417e-04 eta 12:10:04
epoch [40/50] batch [460/2000] time 1.996 (2.031) data 0.000 (0.001) loss 1.8526 (1.2119) lr 4.6417e-04 eta 12:09:16
epoch [40/50] batch [480/2000] time 2.000 (2.031) data 0.000 (0.001) loss 0.9947 (1.2006) lr 4.6417e-04 eta 12:08:32
epoch [40/50] batch [500/2000] time 2.054 (2.031) data 0.000 (0.001) loss 1.3931 (1.1950) lr 4.6417e-04 eta 12:07:55
epoch [40/50] batch [520/2000] time 2.052 (2.031) data 0.000 (0.001) loss 3.6587 (1.2054) lr 4.6417e-04 eta 12:07:12
epoch [40/50] batch [540/2000] time 2.032 (2.031) data 0.000 (0.001) loss 0.9803 (1.2098) lr 4.6417e-04 eta 12:06:30
epoch [40/50] batch [560/2000] time 1.996 (2.031) data 0.000 (0.001) loss 4.2486 (1.2034) lr 4.6417e-04 eta 12:05:52
epoch [40/50] batch [580/2000] time 2.050 (2.031) data 0.000 (0.001) loss 0.0929 (1.2093) lr 4.6417e-04 eta 12:05:07
epoch [40/50] batch [600/2000] time 1.999 (2.031) data 0.001 (0.001) loss 1.0841 (1.2122) lr 4.6417e-04 eta 12:04:28
epoch [40/50] batch [620/2000] time 2.049 (2.031) data 0.000 (0.001) loss 1.6963 (1.2048) lr 4.6417e-04 eta 12:03:46
epoch [40/50] batch [640/2000] time 2.031 (2.031) data 0.000 (0.001) loss 3.9931 (1.2005) lr 4.6417e-04 eta 12:03:03
epoch [40/50] batch [660/2000] time 2.048 (2.031) data 0.000 (0.001) loss 1.6441 (1.2026) lr 4.6417e-04 eta 12:02:22
epoch [40/50] batch [680/2000] time 1.996 (2.031) data 0.000 (0.001) loss 1.7327 (1.1870) lr 4.6417e-04 eta 12:01:41
epoch [40/50] batch [700/2000] time 2.025 (2.031) data 0.000 (0.001) loss 0.4157 (1.1808) lr 4.6417e-04 eta 12:01:01
epoch [40/50] batch [720/2000] time 1.995 (2.031) data 0.000 (0.001) loss 0.2505 (1.1750) lr 4.6417e-04 eta 12:00:17
epoch [40/50] batch [740/2000] time 1.975 (2.031) data 0.000 (0.001) loss 0.6944 (1.1754) lr 4.6417e-04 eta 11:59:33
epoch [40/50] batch [760/2000] time 2.048 (2.031) data 0.000 (0.001) loss 0.6012 (1.1727) lr 4.6417e-04 eta 11:58:53
epoch [40/50] batch [780/2000] time 2.025 (2.031) data 0.000 (0.001) loss 0.4821 (1.1770) lr 4.6417e-04 eta 11:58:10
epoch [40/50] batch [800/2000] time 2.046 (2.031) data 0.000 (0.001) loss 1.5184 (1.1772) lr 4.6417e-04 eta 11:57:26
epoch [40/50] batch [820/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.6335 (1.1773) lr 4.6417e-04 eta 11:56:45
epoch [40/50] batch [840/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.0457 (1.1723) lr 4.6417e-04 eta 11:56:01
epoch [40/50] batch [860/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.1858 (1.1642) lr 4.6417e-04 eta 11:55:20
epoch [40/50] batch [880/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.4929 (1.1682) lr 4.6417e-04 eta 11:54:38
epoch [40/50] batch [900/2000] time 2.053 (2.030) data 0.000 (0.001) loss 1.1674 (1.1750) lr 4.6417e-04 eta 11:53:58
epoch [40/50] batch [920/2000] time 2.046 (2.030) data 0.000 (0.001) loss 0.1489 (1.1785) lr 4.6417e-04 eta 11:53:15
epoch [40/50] batch [940/2000] time 2.046 (2.030) data 0.000 (0.001) loss 2.1608 (1.1840) lr 4.6417e-04 eta 11:52:31
epoch [40/50] batch [960/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.2077 (1.1796) lr 4.6417e-04 eta 11:51:52
epoch [40/50] batch [980/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.8001 (1.1916) lr 4.6417e-04 eta 11:51:10
epoch [40/50] batch [1000/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.5625 (1.1901) lr 4.6417e-04 eta 11:50:25
epoch [40/50] batch [1020/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.5326 (1.1883) lr 4.6417e-04 eta 11:49:43
epoch [40/50] batch [1040/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.1110 (1.1856) lr 4.6417e-04 eta 11:49:03
epoch [40/50] batch [1060/2000] time 2.002 (2.030) data 0.000 (0.001) loss 1.1086 (1.1798) lr 4.6417e-04 eta 11:48:23
epoch [40/50] batch [1080/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.3229 (1.1758) lr 4.6417e-04 eta 11:47:44
epoch [40/50] batch [1100/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.3849 (1.1804) lr 4.6417e-04 eta 11:47:02
epoch [40/50] batch [1120/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.3437 (1.1774) lr 4.6417e-04 eta 11:46:22
epoch [40/50] batch [1140/2000] time 2.031 (2.030) data 0.001 (0.001) loss 2.2929 (1.1770) lr 4.6417e-04 eta 11:45:42
epoch [40/50] batch [1160/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.4739 (1.1752) lr 4.6417e-04 eta 11:45:02
epoch [40/50] batch [1180/2000] time 2.052 (2.030) data 0.000 (0.001) loss 4.1580 (1.1779) lr 4.6417e-04 eta 11:44:19
epoch [40/50] batch [1200/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.0665 (1.1798) lr 4.6417e-04 eta 11:43:41
epoch [40/50] batch [1220/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.0808 (1.1763) lr 4.6417e-04 eta 11:42:58
epoch [40/50] batch [1240/2000] time 1.975 (2.030) data 0.000 (0.001) loss 1.2177 (1.1790) lr 4.6417e-04 eta 11:42:16
epoch [40/50] batch [1260/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.2785 (1.1811) lr 4.6417e-04 eta 11:41:34
epoch [40/50] batch [1280/2000] time 1.997 (2.030) data 0.000 (0.001) loss 1.0069 (1.1787) lr 4.6417e-04 eta 11:40:52
epoch [40/50] batch [1300/2000] time 2.048 (2.030) data 0.000 (0.001) loss 1.6293 (1.1802) lr 4.6417e-04 eta 11:40:11
epoch [40/50] batch [1320/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.9625 (1.1834) lr 4.6417e-04 eta 11:39:31
epoch [40/50] batch [1340/2000] time 2.025 (2.030) data 0.000 (0.001) loss 0.6674 (1.1809) lr 4.6417e-04 eta 11:38:50
epoch [40/50] batch [1360/2000] time 2.025 (2.030) data 0.000 (0.001) loss 0.3078 (1.1811) lr 4.6417e-04 eta 11:38:09
epoch [40/50] batch [1380/2000] time 2.050 (2.030) data 0.000 (0.001) loss 2.3980 (1.1826) lr 4.6417e-04 eta 11:37:30
epoch [40/50] batch [1400/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.3846 (1.1876) lr 4.6417e-04 eta 11:36:48
epoch [40/50] batch [1420/2000] time 1.976 (2.030) data 0.000 (0.001) loss 1.1859 (1.1846) lr 4.6417e-04 eta 11:36:07
epoch [40/50] batch [1440/2000] time 2.032 (2.029) data 0.000 (0.001) loss 0.3670 (1.1802) lr 4.6417e-04 eta 11:35:25
epoch [40/50] batch [1460/2000] time 2.030 (2.029) data 0.000 (0.001) loss 3.4129 (1.1878) lr 4.6417e-04 eta 11:34:44
epoch [40/50] batch [1480/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.9006 (1.1863) lr 4.6417e-04 eta 11:34:04
epoch [40/50] batch [1500/2000] time 2.033 (2.029) data 0.000 (0.001) loss 1.6726 (1.1814) lr 4.6417e-04 eta 11:33:23
epoch [40/50] batch [1520/2000] time 2.059 (2.030) data 0.000 (0.001) loss 0.6578 (1.1821) lr 4.6417e-04 eta 11:32:44
epoch [40/50] batch [1540/2000] time 2.028 (2.030) data 0.000 (0.001) loss 0.5700 (1.1862) lr 4.6417e-04 eta 11:32:03
epoch [40/50] batch [1560/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.1015 (1.1808) lr 4.6417e-04 eta 11:31:22
epoch [40/50] batch [1580/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.3017 (1.1809) lr 4.6417e-04 eta 11:30:40
epoch [40/50] batch [1600/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.3061 (1.1807) lr 4.6417e-04 eta 11:30:01
epoch [40/50] batch [1620/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.2232 (1.1777) lr 4.6417e-04 eta 11:29:19
epoch [40/50] batch [1640/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.0650 (1.1747) lr 4.6417e-04 eta 11:28:38
epoch [40/50] batch [1660/2000] time 1.975 (2.029) data 0.000 (0.001) loss 1.5920 (1.1727) lr 4.6417e-04 eta 11:27:56
epoch [40/50] batch [1680/2000] time 1.997 (2.029) data 0.001 (0.001) loss 0.3117 (1.1752) lr 4.6417e-04 eta 11:27:15
epoch [40/50] batch [1700/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.0905 (1.1736) lr 4.6417e-04 eta 11:26:32
epoch [40/50] batch [1720/2000] time 2.026 (2.029) data 0.000 (0.001) loss 3.2189 (1.1736) lr 4.6417e-04 eta 11:25:51
epoch [40/50] batch [1740/2000] time 2.002 (2.029) data 0.000 (0.001) loss 1.5529 (1.1747) lr 4.6417e-04 eta 11:25:11
epoch [40/50] batch [1760/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.0752 (1.1744) lr 4.6417e-04 eta 11:24:33
epoch [40/50] batch [1780/2000] time 2.037 (2.029) data 0.000 (0.001) loss 1.2133 (1.1780) lr 4.6417e-04 eta 11:23:54
epoch [40/50] batch [1800/2000] time 2.057 (2.029) data 0.000 (0.001) loss 0.2132 (1.1779) lr 4.6417e-04 eta 11:23:13
epoch [40/50] batch [1820/2000] time 2.032 (2.029) data 0.000 (0.001) loss 0.3816 (1.1831) lr 4.6417e-04 eta 11:22:33
epoch [40/50] batch [1840/2000] time 2.003 (2.029) data 0.000 (0.001) loss 3.1428 (1.1850) lr 4.6417e-04 eta 11:21:53
epoch [40/50] batch [1860/2000] time 2.058 (2.030) data 0.000 (0.001) loss 1.2459 (1.1843) lr 4.6417e-04 eta 11:21:14
epoch [40/50] batch [1880/2000] time 2.003 (2.030) data 0.000 (0.001) loss 0.6910 (1.1843) lr 4.6417e-04 eta 11:20:35
epoch [40/50] batch [1900/2000] time 2.002 (2.030) data 0.000 (0.001) loss 2.6299 (1.1842) lr 4.6417e-04 eta 11:19:55
epoch [40/50] batch [1920/2000] time 2.030 (2.030) data 0.000 (0.000) loss 1.9529 (1.1862) lr 4.6417e-04 eta 11:19:15
epoch [40/50] batch [1940/2000] time 2.048 (2.030) data 0.000 (0.000) loss 0.8689 (1.1888) lr 4.6417e-04 eta 11:18:34
epoch [40/50] batch [1960/2000] time 2.051 (2.030) data 0.000 (0.000) loss 0.8182 (1.1870) lr 4.6417e-04 eta 11:17:53
epoch [40/50] batch [1980/2000] time 2.002 (2.030) data 0.000 (0.000) loss 0.2194 (1.1861) lr 4.6417e-04 eta 11:17:12
epoch [40/50] batch [2000/2000] time 1.995 (2.030) data 0.000 (0.000) loss 1.0710 (1.1846) lr 4.1221e-04 eta 11:16:30
epoch [41/50] batch [20/2000] time 2.049 (2.060) data 0.000 (0.028) loss 1.0958 (1.3186) lr 4.1221e-04 eta 11:25:53
epoch [41/50] batch [40/2000] time 2.047 (2.042) data 0.000 (0.014) loss 0.4912 (1.4144) lr 4.1221e-04 eta 11:19:08
epoch [41/50] batch [60/2000] time 2.058 (2.037) data 0.001 (0.009) loss 1.1258 (1.3022) lr 4.1221e-04 eta 11:16:56
epoch [41/50] batch [80/2000] time 2.058 (2.038) data 0.000 (0.007) loss 2.3416 (1.3159) lr 4.1221e-04 eta 11:16:28
epoch [41/50] batch [100/2000] time 1.978 (2.036) data 0.000 (0.006) loss 0.9471 (1.3174) lr 4.1221e-04 eta 11:15:16
epoch [41/50] batch [120/2000] time 2.057 (2.035) data 0.000 (0.005) loss 0.3319 (1.3338) lr 4.1221e-04 eta 11:14:21
epoch [41/50] batch [140/2000] time 2.037 (2.034) data 0.000 (0.004) loss 1.9022 (1.2750) lr 4.1221e-04 eta 11:13:09
epoch [41/50] batch [160/2000] time 1.980 (2.034) data 0.000 (0.004) loss 0.8389 (1.2073) lr 4.1221e-04 eta 11:12:33
epoch [41/50] batch [180/2000] time 2.058 (2.034) data 0.000 (0.003) loss 0.6112 (1.2173) lr 4.1221e-04 eta 11:11:52
epoch [41/50] batch [200/2000] time 2.056 (2.034) data 0.000 (0.003) loss 2.2156 (1.1991) lr 4.1221e-04 eta 11:11:07
epoch [41/50] batch [220/2000] time 2.059 (2.034) data 0.000 (0.003) loss 1.4226 (1.1939) lr 4.1221e-04 eta 11:10:26
epoch [41/50] batch [240/2000] time 1.998 (2.033) data 0.000 (0.003) loss 1.1389 (1.1829) lr 4.1221e-04 eta 11:09:31
epoch [41/50] batch [260/2000] time 2.001 (2.033) data 0.000 (0.002) loss 1.6366 (1.2028) lr 4.1221e-04 eta 11:08:45
epoch [41/50] batch [280/2000] time 2.053 (2.032) data 0.000 (0.002) loss 0.1864 (1.1915) lr 4.1221e-04 eta 11:07:54
epoch [41/50] batch [300/2000] time 2.053 (2.032) data 0.000 (0.002) loss 1.5187 (1.1692) lr 4.1221e-04 eta 11:07:10
epoch [41/50] batch [320/2000] time 2.026 (2.032) data 0.000 (0.002) loss 0.7298 (1.1622) lr 4.1221e-04 eta 11:06:30
epoch [41/50] batch [340/2000] time 2.026 (2.032) data 0.000 (0.002) loss 0.2316 (1.1543) lr 4.1221e-04 eta 11:05:42
epoch [41/50] batch [360/2000] time 1.999 (2.032) data 0.000 (0.002) loss 0.5547 (1.1521) lr 4.1221e-04 eta 11:04:59
epoch [41/50] batch [380/2000] time 1.999 (2.032) data 0.000 (0.002) loss 3.3453 (1.1565) lr 4.1221e-04 eta 11:04:18
epoch [41/50] batch [400/2000] time 2.054 (2.032) data 0.000 (0.002) loss 0.0288 (1.1515) lr 4.1221e-04 eta 11:03:39
epoch [41/50] batch [420/2000] time 2.052 (2.032) data 0.000 (0.002) loss 2.6229 (1.1590) lr 4.1221e-04 eta 11:02:56
epoch [41/50] batch [440/2000] time 2.053 (2.031) data 0.000 (0.001) loss 2.4865 (1.1614) lr 4.1221e-04 eta 11:02:15
epoch [41/50] batch [460/2000] time 2.029 (2.031) data 0.000 (0.001) loss 1.0369 (1.1819) lr 4.1221e-04 eta 11:01:34
epoch [41/50] batch [480/2000] time 2.032 (2.031) data 0.000 (0.001) loss 1.0658 (1.1674) lr 4.1221e-04 eta 11:00:48
epoch [41/50] batch [500/2000] time 2.053 (2.031) data 0.000 (0.001) loss 3.0100 (1.1713) lr 4.1221e-04 eta 11:00:09
epoch [41/50] batch [520/2000] time 2.002 (2.031) data 0.000 (0.001) loss 1.2035 (1.1794) lr 4.1221e-04 eta 10:59:31
epoch [41/50] batch [540/2000] time 2.031 (2.032) data 0.000 (0.001) loss 0.1437 (1.1767) lr 4.1221e-04 eta 10:58:55
epoch [41/50] batch [560/2000] time 2.049 (2.031) data 0.000 (0.001) loss 1.2249 (1.1792) lr 4.1221e-04 eta 10:58:07
epoch [41/50] batch [580/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.3631 (1.1703) lr 4.1221e-04 eta 10:57:21
epoch [41/50] batch [600/2000] time 2.053 (2.031) data 0.001 (0.001) loss 0.1949 (1.1756) lr 4.1221e-04 eta 10:56:43
epoch [41/50] batch [620/2000] time 2.029 (2.031) data 0.000 (0.001) loss 3.1981 (1.1828) lr 4.1221e-04 eta 10:56:04
epoch [41/50] batch [640/2000] time 2.032 (2.031) data 0.000 (0.001) loss 0.1687 (1.1832) lr 4.1221e-04 eta 10:55:29
epoch [41/50] batch [660/2000] time 2.006 (2.032) data 0.000 (0.001) loss 1.3301 (1.1893) lr 4.1221e-04 eta 10:54:52
epoch [41/50] batch [680/2000] time 2.004 (2.032) data 0.000 (0.001) loss 2.8440 (1.1913) lr 4.1221e-04 eta 10:54:11
epoch [41/50] batch [700/2000] time 2.053 (2.032) data 0.000 (0.001) loss 0.4067 (1.1831) lr 4.1221e-04 eta 10:53:30
epoch [41/50] batch [720/2000] time 2.051 (2.032) data 0.000 (0.001) loss 0.1648 (1.1790) lr 4.1221e-04 eta 10:52:50
epoch [41/50] batch [740/2000] time 2.048 (2.032) data 0.000 (0.001) loss 1.8585 (1.1810) lr 4.1221e-04 eta 10:52:09
epoch [41/50] batch [760/2000] time 2.055 (2.032) data 0.000 (0.001) loss 0.7876 (1.1738) lr 4.1221e-04 eta 10:51:26
epoch [41/50] batch [780/2000] time 2.002 (2.031) data 0.000 (0.001) loss 0.2058 (1.1771) lr 4.1221e-04 eta 10:50:44
epoch [41/50] batch [800/2000] time 2.055 (2.031) data 0.000 (0.001) loss 0.7877 (1.1697) lr 4.1221e-04 eta 10:50:03
epoch [41/50] batch [820/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.8405 (1.1741) lr 4.1221e-04 eta 10:49:23
epoch [41/50] batch [840/2000] time 2.054 (2.031) data 0.000 (0.001) loss 0.7514 (1.1731) lr 4.1221e-04 eta 10:48:41
epoch [41/50] batch [860/2000] time 2.027 (2.031) data 0.000 (0.001) loss 0.2161 (1.1745) lr 4.1221e-04 eta 10:48:00
epoch [41/50] batch [880/2000] time 2.001 (2.031) data 0.000 (0.001) loss 3.7689 (1.1717) lr 4.1221e-04 eta 10:47:20
epoch [41/50] batch [900/2000] time 1.996 (2.031) data 0.000 (0.001) loss 2.7090 (1.1719) lr 4.1221e-04 eta 10:46:38
epoch [41/50] batch [920/2000] time 2.051 (2.031) data 0.000 (0.001) loss 1.6441 (1.1708) lr 4.1221e-04 eta 10:45:55
epoch [41/50] batch [940/2000] time 2.048 (2.031) data 0.000 (0.001) loss 1.6418 (1.1676) lr 4.1221e-04 eta 10:45:13
epoch [41/50] batch [960/2000] time 2.026 (2.031) data 0.000 (0.001) loss 0.6887 (1.1659) lr 4.1221e-04 eta 10:44:32
epoch [41/50] batch [980/2000] time 2.048 (2.031) data 0.000 (0.001) loss 1.0963 (1.1701) lr 4.1221e-04 eta 10:43:50
epoch [41/50] batch [1000/2000] time 2.051 (2.031) data 0.000 (0.001) loss 0.2040 (1.1679) lr 4.1221e-04 eta 10:43:10
epoch [41/50] batch [1020/2000] time 1.975 (2.031) data 0.000 (0.001) loss 2.2096 (1.1661) lr 4.1221e-04 eta 10:42:30
epoch [41/50] batch [1040/2000] time 2.058 (2.031) data 0.000 (0.001) loss 1.1814 (1.1702) lr 4.1221e-04 eta 10:41:52
epoch [41/50] batch [1060/2000] time 2.055 (2.031) data 0.000 (0.001) loss 2.4254 (1.1741) lr 4.1221e-04 eta 10:41:13
epoch [41/50] batch [1080/2000] time 2.058 (2.031) data 0.000 (0.001) loss 0.3140 (1.1730) lr 4.1221e-04 eta 10:40:34
epoch [41/50] batch [1100/2000] time 2.001 (2.031) data 0.000 (0.001) loss 1.0468 (1.1704) lr 4.1221e-04 eta 10:39:53
epoch [41/50] batch [1120/2000] time 1.998 (2.031) data 0.000 (0.001) loss 1.5851 (1.1747) lr 4.1221e-04 eta 10:39:13
epoch [41/50] batch [1140/2000] time 2.001 (2.031) data 0.000 (0.001) loss 0.0341 (1.1770) lr 4.1221e-04 eta 10:38:28
epoch [41/50] batch [1160/2000] time 2.003 (2.031) data 0.000 (0.001) loss 2.9066 (1.1834) lr 4.1221e-04 eta 10:37:47
epoch [41/50] batch [1180/2000] time 2.028 (2.031) data 0.000 (0.001) loss 0.7341 (1.1796) lr 4.1221e-04 eta 10:37:06
epoch [41/50] batch [1200/2000] time 2.056 (2.031) data 0.000 (0.001) loss 1.3145 (1.1811) lr 4.1221e-04 eta 10:36:23
epoch [41/50] batch [1220/2000] time 2.058 (2.031) data 0.000 (0.001) loss 2.3241 (1.1832) lr 4.1221e-04 eta 10:35:44
epoch [41/50] batch [1240/2000] time 1.998 (2.031) data 0.000 (0.001) loss 2.0973 (1.1813) lr 4.1221e-04 eta 10:35:03
epoch [41/50] batch [1260/2000] time 1.999 (2.031) data 0.000 (0.001) loss 0.2908 (1.1813) lr 4.1221e-04 eta 10:34:22
epoch [41/50] batch [1280/2000] time 2.000 (2.031) data 0.000 (0.001) loss 1.7746 (1.1806) lr 4.1221e-04 eta 10:33:40
epoch [41/50] batch [1300/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.8710 (1.1756) lr 4.1221e-04 eta 10:33:01
epoch [41/50] batch [1320/2000] time 2.027 (2.031) data 0.000 (0.001) loss 0.0457 (1.1765) lr 4.1221e-04 eta 10:32:18
epoch [41/50] batch [1340/2000] time 1.994 (2.031) data 0.000 (0.001) loss 0.1776 (1.1755) lr 4.1221e-04 eta 10:31:36
epoch [41/50] batch [1360/2000] time 2.027 (2.031) data 0.000 (0.001) loss 1.3690 (1.1740) lr 4.1221e-04 eta 10:30:54
epoch [41/50] batch [1380/2000] time 2.050 (2.031) data 0.000 (0.001) loss 1.2437 (1.1698) lr 4.1221e-04 eta 10:30:12
epoch [41/50] batch [1400/2000] time 2.056 (2.031) data 0.000 (0.001) loss 0.3023 (1.1663) lr 4.1221e-04 eta 10:29:28
epoch [41/50] batch [1420/2000] time 2.000 (2.031) data 0.000 (0.001) loss 0.5708 (1.1654) lr 4.1221e-04 eta 10:28:48
epoch [41/50] batch [1440/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.1403 (1.1628) lr 4.1221e-04 eta 10:28:06
epoch [41/50] batch [1460/2000] time 2.055 (2.031) data 0.000 (0.001) loss 1.5581 (1.1637) lr 4.1221e-04 eta 10:27:26
epoch [41/50] batch [1480/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.2446 (1.1633) lr 4.1221e-04 eta 10:26:44
epoch [41/50] batch [1500/2000] time 1.994 (2.030) data 0.000 (0.001) loss 0.6874 (1.1656) lr 4.1221e-04 eta 10:26:03
epoch [41/50] batch [1520/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.1024 (1.1659) lr 4.1221e-04 eta 10:25:22
epoch [41/50] batch [1540/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.6893 (1.1638) lr 4.1221e-04 eta 10:24:41
epoch [41/50] batch [1560/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.7248 (1.1591) lr 4.1221e-04 eta 10:24:01
epoch [41/50] batch [1580/2000] time 1.998 (2.030) data 0.000 (0.001) loss 0.7547 (1.1604) lr 4.1221e-04 eta 10:23:19
epoch [41/50] batch [1600/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.0438 (1.1606) lr 4.1221e-04 eta 10:22:38
epoch [41/50] batch [1620/2000] time 2.026 (2.030) data 0.000 (0.001) loss 2.5478 (1.1579) lr 4.1221e-04 eta 10:21:58
epoch [41/50] batch [1640/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.0544 (1.1608) lr 4.1221e-04 eta 10:21:18
epoch [41/50] batch [1660/2000] time 2.047 (2.030) data 0.000 (0.001) loss 3.7179 (1.1589) lr 4.1221e-04 eta 10:20:37
epoch [41/50] batch [1680/2000] time 2.053 (2.030) data 0.001 (0.001) loss 1.6807 (1.1602) lr 4.1221e-04 eta 10:19:56
epoch [41/50] batch [1700/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.3088 (1.1593) lr 4.1221e-04 eta 10:19:16
epoch [41/50] batch [1720/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.3688 (1.1588) lr 4.1221e-04 eta 10:18:35
epoch [41/50] batch [1740/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.3406 (1.1563) lr 4.1221e-04 eta 10:17:53
epoch [41/50] batch [1760/2000] time 2.029 (2.030) data 0.000 (0.001) loss 3.4303 (1.1546) lr 4.1221e-04 eta 10:17:13
epoch [41/50] batch [1780/2000] time 2.029 (2.030) data 0.000 (0.001) loss 1.0627 (1.1518) lr 4.1221e-04 eta 10:16:32
epoch [41/50] batch [1800/2000] time 2.000 (2.030) data 0.000 (0.001) loss 2.1547 (1.1501) lr 4.1221e-04 eta 10:15:52
epoch [41/50] batch [1820/2000] time 2.035 (2.030) data 0.000 (0.001) loss 1.7461 (1.1507) lr 4.1221e-04 eta 10:15:13
epoch [41/50] batch [1840/2000] time 2.056 (2.031) data 0.000 (0.001) loss 0.6856 (1.1531) lr 4.1221e-04 eta 10:14:34
epoch [41/50] batch [1860/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.3697 (1.1531) lr 4.1221e-04 eta 10:13:52
epoch [41/50] batch [1880/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.8393 (1.1511) lr 4.1221e-04 eta 10:13:11
epoch [41/50] batch [1900/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.6077 (1.1558) lr 4.1221e-04 eta 10:12:30
epoch [41/50] batch [1920/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.6143 (1.1607) lr 4.1221e-04 eta 10:11:49
epoch [41/50] batch [1940/2000] time 2.057 (2.030) data 0.000 (0.000) loss 1.4658 (1.1621) lr 4.1221e-04 eta 10:11:09
epoch [41/50] batch [1960/2000] time 2.029 (2.031) data 0.000 (0.000) loss 1.7790 (1.1608) lr 4.1221e-04 eta 10:10:30
epoch [41/50] batch [1980/2000] time 2.051 (2.031) data 0.000 (0.000) loss 0.1374 (1.1593) lr 4.1221e-04 eta 10:09:49
epoch [41/50] batch [2000/2000] time 2.047 (2.030) data 0.000 (0.000) loss 0.3725 (1.1606) lr 3.6258e-04 eta 10:09:08
epoch [42/50] batch [20/2000] time 2.029 (2.051) data 0.000 (0.028) loss 0.0183 (0.8308) lr 3.6258e-04 eta 10:14:44
epoch [42/50] batch [40/2000] time 2.056 (2.047) data 0.000 (0.014) loss 1.6783 (0.9511) lr 3.6258e-04 eta 10:12:51
epoch [42/50] batch [60/2000] time 2.031 (2.043) data 0.001 (0.009) loss 1.6276 (1.1099) lr 3.6258e-04 eta 10:10:53
epoch [42/50] batch [80/2000] time 2.050 (2.039) data 0.000 (0.007) loss 0.0796 (1.2012) lr 3.6258e-04 eta 10:09:06
epoch [42/50] batch [100/2000] time 2.003 (2.036) data 0.000 (0.006) loss 0.4461 (1.1405) lr 3.6258e-04 eta 10:07:16
epoch [42/50] batch [120/2000] time 1.974 (2.036) data 0.000 (0.005) loss 0.8317 (1.1766) lr 3.6258e-04 eta 10:06:38
epoch [42/50] batch [140/2000] time 2.054 (2.034) data 0.000 (0.004) loss 0.6300 (1.1246) lr 3.6258e-04 eta 10:05:35
epoch [42/50] batch [160/2000] time 2.033 (2.034) data 0.000 (0.004) loss 0.2066 (1.1633) lr 3.6258e-04 eta 10:04:49
epoch [42/50] batch [180/2000] time 2.000 (2.033) data 0.000 (0.003) loss 0.4205 (1.2055) lr 3.6258e-04 eta 10:03:53
epoch [42/50] batch [200/2000] time 2.052 (2.034) data 0.000 (0.003) loss 0.9423 (1.2073) lr 3.6258e-04 eta 10:03:21
epoch [42/50] batch [220/2000] time 1.998 (2.033) data 0.000 (0.003) loss 1.2210 (1.2261) lr 3.6258e-04 eta 10:02:21
epoch [42/50] batch [240/2000] time 1.998 (2.032) data 0.000 (0.002) loss 1.6732 (1.2430) lr 3.6258e-04 eta 10:01:30
epoch [42/50] batch [260/2000] time 2.050 (2.032) data 0.000 (0.002) loss 1.3206 (1.2360) lr 3.6258e-04 eta 10:00:47
epoch [42/50] batch [280/2000] time 2.053 (2.032) data 0.000 (0.002) loss 0.1556 (1.2590) lr 3.6258e-04 eta 10:00:01
epoch [42/50] batch [300/2000] time 2.055 (2.032) data 0.000 (0.002) loss 1.1388 (1.2717) lr 3.6258e-04 eta 9:59:17
epoch [42/50] batch [320/2000] time 1.997 (2.032) data 0.000 (0.002) loss 1.1570 (1.2583) lr 3.6258e-04 eta 9:58:40
epoch [42/50] batch [340/2000] time 2.027 (2.032) data 0.000 (0.002) loss 0.0931 (1.2348) lr 3.6258e-04 eta 9:58:01
epoch [42/50] batch [360/2000] time 2.047 (2.032) data 0.000 (0.002) loss 0.8906 (1.2351) lr 3.6258e-04 eta 9:57:19
epoch [42/50] batch [380/2000] time 2.048 (2.031) data 0.000 (0.002) loss 0.1821 (1.2182) lr 3.6258e-04 eta 9:56:31
epoch [42/50] batch [400/2000] time 2.050 (2.031) data 0.000 (0.002) loss 3.5086 (1.2272) lr 3.6258e-04 eta 9:55:47
epoch [42/50] batch [420/2000] time 1.996 (2.031) data 0.000 (0.002) loss 0.8277 (1.2174) lr 3.6258e-04 eta 9:55:05
epoch [42/50] batch [440/2000] time 2.026 (2.031) data 0.000 (0.001) loss 2.7507 (1.2262) lr 3.6258e-04 eta 9:54:18
epoch [42/50] batch [460/2000] time 2.049 (2.031) data 0.000 (0.001) loss 0.1329 (1.2037) lr 3.6258e-04 eta 9:53:40
epoch [42/50] batch [480/2000] time 1.998 (2.031) data 0.000 (0.001) loss 0.3373 (1.2057) lr 3.6258e-04 eta 9:52:55
epoch [42/50] batch [500/2000] time 2.025 (2.031) data 0.000 (0.001) loss 0.6021 (1.2136) lr 3.6258e-04 eta 9:52:18
epoch [42/50] batch [520/2000] time 2.029 (2.031) data 0.000 (0.001) loss 0.7324 (1.2188) lr 3.6258e-04 eta 9:51:33
epoch [42/50] batch [540/2000] time 2.030 (2.030) data 0.000 (0.001) loss 3.5033 (1.2123) lr 3.6258e-04 eta 9:50:50
epoch [42/50] batch [560/2000] time 2.001 (2.030) data 0.000 (0.001) loss 0.9024 (1.2085) lr 3.6258e-04 eta 9:50:10
epoch [42/50] batch [580/2000] time 2.028 (2.030) data 0.000 (0.001) loss 1.2695 (1.2133) lr 3.6258e-04 eta 9:49:25
epoch [42/50] batch [600/2000] time 2.049 (2.030) data 0.001 (0.001) loss 0.9890 (1.2024) lr 3.6258e-04 eta 9:48:41
epoch [42/50] batch [620/2000] time 2.026 (2.030) data 0.000 (0.001) loss 2.0701 (1.1963) lr 3.6258e-04 eta 9:47:57
epoch [42/50] batch [640/2000] time 2.027 (2.030) data 0.000 (0.001) loss 0.4275 (1.1821) lr 3.6258e-04 eta 9:47:13
epoch [42/50] batch [660/2000] time 1.995 (2.030) data 0.000 (0.001) loss 0.2416 (1.1825) lr 3.6258e-04 eta 9:46:32
epoch [42/50] batch [680/2000] time 1.971 (2.029) data 0.000 (0.001) loss 2.3660 (1.1812) lr 3.6258e-04 eta 9:45:46
epoch [42/50] batch [700/2000] time 2.046 (2.029) data 0.000 (0.001) loss 0.7749 (1.1815) lr 3.6258e-04 eta 9:45:05
epoch [42/50] batch [720/2000] time 2.047 (2.029) data 0.000 (0.001) loss 0.0747 (1.1786) lr 3.6258e-04 eta 9:44:22
epoch [42/50] batch [740/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.2907 (1.1754) lr 3.6258e-04 eta 9:43:42
epoch [42/50] batch [760/2000] time 2.000 (2.029) data 0.000 (0.001) loss 0.0600 (1.1673) lr 3.6258e-04 eta 9:43:02
epoch [42/50] batch [780/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.6625 (1.1628) lr 3.6258e-04 eta 9:42:22
epoch [42/50] batch [800/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.6886 (1.1570) lr 3.6258e-04 eta 9:41:42
epoch [42/50] batch [820/2000] time 2.030 (2.029) data 0.000 (0.001) loss 1.9988 (1.1586) lr 3.6258e-04 eta 9:41:00
epoch [42/50] batch [840/2000] time 2.055 (2.029) data 0.000 (0.001) loss 1.3166 (1.1552) lr 3.6258e-04 eta 9:40:23
epoch [42/50] batch [860/2000] time 2.026 (2.029) data 0.000 (0.001) loss 3.2168 (1.1591) lr 3.6258e-04 eta 9:39:44
epoch [42/50] batch [880/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.3093 (1.1570) lr 3.6258e-04 eta 9:38:59
epoch [42/50] batch [900/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.7329 (1.1517) lr 3.6258e-04 eta 9:38:21
epoch [42/50] batch [920/2000] time 2.054 (2.029) data 0.000 (0.001) loss 2.9245 (1.1518) lr 3.6258e-04 eta 9:37:40
epoch [42/50] batch [940/2000] time 1.980 (2.029) data 0.000 (0.001) loss 1.8909 (1.1478) lr 3.6258e-04 eta 9:36:59
epoch [42/50] batch [960/2000] time 2.032 (2.029) data 0.000 (0.001) loss 2.6451 (1.1530) lr 3.6258e-04 eta 9:36:18
epoch [42/50] batch [980/2000] time 1.977 (2.029) data 0.000 (0.001) loss 0.5499 (1.1588) lr 3.6258e-04 eta 9:35:39
epoch [42/50] batch [1000/2000] time 2.002 (2.029) data 0.000 (0.001) loss 0.9299 (1.1614) lr 3.6258e-04 eta 9:34:59
epoch [42/50] batch [1020/2000] time 2.001 (2.029) data 0.000 (0.001) loss 0.0256 (1.1614) lr 3.6258e-04 eta 9:34:20
epoch [42/50] batch [1040/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.3580 (1.1605) lr 3.6258e-04 eta 9:33:42
epoch [42/50] batch [1060/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.4327 (1.1610) lr 3.6258e-04 eta 9:33:02
epoch [42/50] batch [1080/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.4613 (1.1568) lr 3.6258e-04 eta 9:32:23
epoch [42/50] batch [1100/2000] time 2.057 (2.030) data 0.000 (0.001) loss 0.7642 (1.1530) lr 3.6258e-04 eta 9:31:44
epoch [42/50] batch [1120/2000] time 1.998 (2.030) data 0.000 (0.001) loss 2.6256 (1.1559) lr 3.6258e-04 eta 9:31:02
epoch [42/50] batch [1140/2000] time 2.033 (2.030) data 0.001 (0.001) loss 0.7521 (1.1617) lr 3.6258e-04 eta 9:30:22
epoch [42/50] batch [1160/2000] time 2.055 (2.030) data 0.000 (0.001) loss 1.1852 (1.1580) lr 3.6258e-04 eta 9:29:43
epoch [42/50] batch [1180/2000] time 2.062 (2.030) data 0.000 (0.001) loss 1.3061 (1.1650) lr 3.6258e-04 eta 9:29:04
epoch [42/50] batch [1200/2000] time 2.001 (2.030) data 0.000 (0.001) loss 0.7336 (1.1649) lr 3.6258e-04 eta 9:28:24
epoch [42/50] batch [1220/2000] time 2.057 (2.030) data 0.000 (0.001) loss 0.1889 (1.1621) lr 3.6258e-04 eta 9:27:45
epoch [42/50] batch [1240/2000] time 1.973 (2.030) data 0.000 (0.001) loss 1.5106 (1.1597) lr 3.6258e-04 eta 9:27:06
epoch [42/50] batch [1260/2000] time 1.976 (2.030) data 0.000 (0.001) loss 1.6138 (1.1613) lr 3.6258e-04 eta 9:26:24
epoch [42/50] batch [1280/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.2194 (1.1585) lr 3.6258e-04 eta 9:25:44
epoch [42/50] batch [1300/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.9720 (1.1573) lr 3.6258e-04 eta 9:25:02
epoch [42/50] batch [1320/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.1846 (1.1571) lr 3.6258e-04 eta 9:24:22
epoch [42/50] batch [1340/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.4863 (1.1559) lr 3.6258e-04 eta 9:23:42
epoch [42/50] batch [1360/2000] time 2.051 (2.030) data 0.000 (0.001) loss 4.1879 (1.1593) lr 3.6258e-04 eta 9:23:02
epoch [42/50] batch [1380/2000] time 1.994 (2.030) data 0.000 (0.001) loss 1.4607 (1.1581) lr 3.6258e-04 eta 9:22:20
epoch [42/50] batch [1400/2000] time 1.997 (2.030) data 0.000 (0.001) loss 2.3328 (1.1628) lr 3.6258e-04 eta 9:21:40
epoch [42/50] batch [1420/2000] time 2.055 (2.030) data 0.000 (0.001) loss 0.8890 (1.1648) lr 3.6258e-04 eta 9:20:58
epoch [42/50] batch [1440/2000] time 2.050 (2.030) data 0.000 (0.001) loss 0.8248 (1.1635) lr 3.6258e-04 eta 9:20:17
epoch [42/50] batch [1460/2000] time 1.995 (2.030) data 0.000 (0.001) loss 0.0468 (1.1593) lr 3.6258e-04 eta 9:19:34
epoch [42/50] batch [1480/2000] time 1.995 (2.030) data 0.000 (0.001) loss 1.3782 (1.1598) lr 3.6258e-04 eta 9:18:54
epoch [42/50] batch [1500/2000] time 2.052 (2.030) data 0.000 (0.001) loss 2.2913 (1.1595) lr 3.6258e-04 eta 9:18:13
epoch [42/50] batch [1520/2000] time 2.057 (2.030) data 0.000 (0.001) loss 0.7144 (1.1585) lr 3.6258e-04 eta 9:17:32
epoch [42/50] batch [1540/2000] time 2.037 (2.030) data 0.000 (0.001) loss 0.9031 (1.1586) lr 3.6258e-04 eta 9:16:51
epoch [42/50] batch [1560/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.4295 (1.1589) lr 3.6258e-04 eta 9:16:11
epoch [42/50] batch [1580/2000] time 1.973 (2.030) data 0.000 (0.001) loss 1.5417 (1.1626) lr 3.6258e-04 eta 9:15:29
epoch [42/50] batch [1600/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.1670 (1.1669) lr 3.6258e-04 eta 9:14:50
epoch [42/50] batch [1620/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.6931 (1.1668) lr 3.6258e-04 eta 9:14:09
epoch [42/50] batch [1640/2000] time 1.997 (2.030) data 0.000 (0.001) loss 3.4508 (1.1650) lr 3.6258e-04 eta 9:13:28
epoch [42/50] batch [1660/2000] time 2.054 (2.030) data 0.000 (0.001) loss 1.0725 (1.1708) lr 3.6258e-04 eta 9:12:48
epoch [42/50] batch [1680/2000] time 1.998 (2.030) data 0.001 (0.001) loss 0.1722 (1.1693) lr 3.6258e-04 eta 9:12:08
epoch [42/50] batch [1700/2000] time 1.973 (2.030) data 0.000 (0.001) loss 0.1966 (1.1732) lr 3.6258e-04 eta 9:11:27
epoch [42/50] batch [1720/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.1868 (1.1751) lr 3.6258e-04 eta 9:10:46
epoch [42/50] batch [1740/2000] time 1.998 (2.030) data 0.000 (0.001) loss 3.9371 (1.1782) lr 3.6258e-04 eta 9:10:07
epoch [42/50] batch [1760/2000] time 2.057 (2.030) data 0.000 (0.001) loss 0.8015 (1.1765) lr 3.6258e-04 eta 9:09:26
epoch [42/50] batch [1780/2000] time 2.036 (2.030) data 0.000 (0.001) loss 1.3157 (1.1761) lr 3.6258e-04 eta 9:08:47
epoch [42/50] batch [1800/2000] time 2.000 (2.030) data 0.000 (0.001) loss 2.3068 (1.1744) lr 3.6258e-04 eta 9:08:06
epoch [42/50] batch [1820/2000] time 2.057 (2.030) data 0.000 (0.001) loss 0.3350 (1.1753) lr 3.6258e-04 eta 9:07:28
epoch [42/50] batch [1840/2000] time 2.037 (2.030) data 0.000 (0.001) loss 3.0007 (1.1775) lr 3.6258e-04 eta 9:06:46
epoch [42/50] batch [1860/2000] time 2.059 (2.030) data 0.000 (0.001) loss 0.3010 (1.1748) lr 3.6258e-04 eta 9:06:07
epoch [42/50] batch [1880/2000] time 2.000 (2.030) data 0.000 (0.001) loss 0.1081 (1.1749) lr 3.6258e-04 eta 9:05:27
epoch [42/50] batch [1900/2000] time 1.999 (2.030) data 0.000 (0.001) loss 1.5523 (1.1769) lr 3.6258e-04 eta 9:04:47
epoch [42/50] batch [1920/2000] time 2.003 (2.030) data 0.000 (0.001) loss 1.8826 (1.1787) lr 3.6258e-04 eta 9:04:07
epoch [42/50] batch [1940/2000] time 2.056 (2.030) data 0.000 (0.000) loss 0.9707 (1.1755) lr 3.6258e-04 eta 9:03:28
epoch [42/50] batch [1960/2000] time 2.059 (2.030) data 0.000 (0.000) loss 2.7452 (1.1758) lr 3.6258e-04 eta 9:02:47
epoch [42/50] batch [1980/2000] time 2.002 (2.030) data 0.000 (0.000) loss 0.2684 (1.1732) lr 3.6258e-04 eta 9:02:07
epoch [42/50] batch [2000/2000] time 2.032 (2.030) data 0.000 (0.000) loss 2.1037 (1.1736) lr 3.1545e-04 eta 9:01:27
epoch [43/50] batch [20/2000] time 2.035 (2.058) data 0.000 (0.028) loss 0.1950 (1.4203) lr 3.1545e-04 eta 9:08:02
epoch [43/50] batch [40/2000] time 2.057 (2.042) data 0.000 (0.014) loss 1.8367 (1.2294) lr 3.1545e-04 eta 9:03:03
epoch [43/50] batch [60/2000] time 2.037 (2.037) data 0.001 (0.009) loss 1.3279 (1.1200) lr 3.1545e-04 eta 9:01:15
epoch [43/50] batch [80/2000] time 2.001 (2.035) data 0.000 (0.007) loss 0.3743 (1.1348) lr 3.1545e-04 eta 8:59:58
epoch [43/50] batch [100/2000] time 2.002 (2.034) data 0.000 (0.006) loss 0.7098 (1.1636) lr 3.1545e-04 eta 8:59:01
epoch [43/50] batch [120/2000] time 2.000 (2.033) data 0.000 (0.005) loss 1.3312 (1.1912) lr 3.1545e-04 eta 8:58:11
epoch [43/50] batch [140/2000] time 1.999 (2.033) data 0.000 (0.004) loss 1.2395 (1.2064) lr 3.1545e-04 eta 8:57:23
epoch [43/50] batch [160/2000] time 2.000 (2.033) data 0.000 (0.004) loss 2.0745 (1.2130) lr 3.1545e-04 eta 8:56:44
epoch [43/50] batch [180/2000] time 2.055 (2.033) data 0.000 (0.003) loss 2.4915 (1.2108) lr 3.1545e-04 eta 8:56:02
epoch [43/50] batch [200/2000] time 2.003 (2.033) data 0.000 (0.003) loss 2.0491 (1.2226) lr 3.1545e-04 eta 8:55:18
epoch [43/50] batch [220/2000] time 2.052 (2.033) data 0.000 (0.003) loss 2.3036 (1.2233) lr 3.1545e-04 eta 8:54:38
epoch [43/50] batch [240/2000] time 2.053 (2.033) data 0.000 (0.003) loss 0.8456 (1.1903) lr 3.1545e-04 eta 8:53:54
epoch [43/50] batch [260/2000] time 1.998 (2.032) data 0.000 (0.002) loss 0.5148 (1.1767) lr 3.1545e-04 eta 8:53:05
epoch [43/50] batch [280/2000] time 1.999 (2.032) data 0.000 (0.002) loss 1.7117 (1.1582) lr 3.1545e-04 eta 8:52:27
epoch [43/50] batch [300/2000] time 2.053 (2.032) data 0.000 (0.002) loss 1.6653 (1.1568) lr 3.1545e-04 eta 8:51:49
epoch [43/50] batch [320/2000] time 2.052 (2.032) data 0.000 (0.002) loss 0.5366 (1.1689) lr 3.1545e-04 eta 8:51:00
epoch [43/50] batch [340/2000] time 2.054 (2.032) data 0.000 (0.002) loss 0.8882 (1.1905) lr 3.1545e-04 eta 8:50:18
epoch [43/50] batch [360/2000] time 2.050 (2.031) data 0.000 (0.002) loss 1.9646 (1.1856) lr 3.1545e-04 eta 8:49:31
epoch [43/50] batch [380/2000] time 2.048 (2.031) data 0.000 (0.002) loss 1.4469 (1.1683) lr 3.1545e-04 eta 8:48:46
epoch [43/50] batch [400/2000] time 2.051 (2.031) data 0.000 (0.002) loss 0.6595 (1.1768) lr 3.1545e-04 eta 8:48:05
epoch [43/50] batch [420/2000] time 2.027 (2.031) data 0.000 (0.002) loss 0.6854 (1.1599) lr 3.1545e-04 eta 8:47:24
epoch [43/50] batch [440/2000] time 2.049 (2.031) data 0.000 (0.001) loss 0.1939 (1.1597) lr 3.1545e-04 eta 8:46:45
epoch [43/50] batch [460/2000] time 1.995 (2.031) data 0.000 (0.001) loss 1.3225 (1.1668) lr 3.1545e-04 eta 8:45:55
epoch [43/50] batch [480/2000] time 2.048 (2.030) data 0.000 (0.001) loss 0.2983 (1.1588) lr 3.1545e-04 eta 8:45:11
epoch [43/50] batch [500/2000] time 2.024 (2.030) data 0.000 (0.001) loss 1.1966 (1.1548) lr 3.1545e-04 eta 8:44:30
epoch [43/50] batch [520/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.4994 (1.1518) lr 3.1545e-04 eta 8:43:47
epoch [43/50] batch [540/2000] time 2.047 (2.030) data 0.000 (0.001) loss 2.1118 (1.1651) lr 3.1545e-04 eta 8:43:08
epoch [43/50] batch [560/2000] time 2.027 (2.030) data 0.000 (0.001) loss 1.6700 (1.1676) lr 3.1545e-04 eta 8:42:29
epoch [43/50] batch [580/2000] time 2.028 (2.030) data 0.000 (0.001) loss 1.7191 (1.1921) lr 3.1545e-04 eta 8:41:47
epoch [43/50] batch [600/2000] time 1.998 (2.030) data 0.001 (0.001) loss 0.8467 (1.1875) lr 3.1545e-04 eta 8:41:05
epoch [43/50] batch [620/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.4401 (1.1825) lr 3.1545e-04 eta 8:40:26
epoch [43/50] batch [640/2000] time 2.049 (2.030) data 0.000 (0.001) loss 3.8493 (1.2026) lr 3.1545e-04 eta 8:39:41
epoch [43/50] batch [660/2000] time 1.996 (2.030) data 0.000 (0.001) loss 1.6380 (1.1997) lr 3.1545e-04 eta 8:38:56
epoch [43/50] batch [680/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.9301 (1.2006) lr 3.1545e-04 eta 8:38:16
epoch [43/50] batch [700/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.2495 (1.1978) lr 3.1545e-04 eta 8:37:35
epoch [43/50] batch [720/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.7395 (1.1917) lr 3.1545e-04 eta 8:36:54
epoch [43/50] batch [740/2000] time 2.027 (2.030) data 0.000 (0.001) loss 2.8384 (1.1981) lr 3.1545e-04 eta 8:36:13
epoch [43/50] batch [760/2000] time 2.052 (2.030) data 0.000 (0.001) loss 0.3969 (1.1963) lr 3.1545e-04 eta 8:35:33
epoch [43/50] batch [780/2000] time 1.972 (2.030) data 0.000 (0.001) loss 1.5982 (1.1851) lr 3.1545e-04 eta 8:34:51
epoch [43/50] batch [800/2000] time 2.025 (2.029) data 0.000 (0.001) loss 0.3973 (1.1813) lr 3.1545e-04 eta 8:34:06
epoch [43/50] batch [820/2000] time 1.997 (2.029) data 0.000 (0.001) loss 1.5212 (1.1788) lr 3.1545e-04 eta 8:33:26
epoch [43/50] batch [840/2000] time 2.060 (2.030) data 0.000 (0.001) loss 0.6338 (1.1762) lr 3.1545e-04 eta 8:32:47
epoch [43/50] batch [860/2000] time 2.001 (2.030) data 0.000 (0.001) loss 1.6830 (1.1750) lr 3.1545e-04 eta 8:32:11
epoch [43/50] batch [880/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.8502 (1.1763) lr 3.1545e-04 eta 8:31:31
epoch [43/50] batch [900/2000] time 2.050 (2.030) data 0.000 (0.001) loss 2.0979 (1.1830) lr 3.1545e-04 eta 8:30:52
epoch [43/50] batch [920/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.3894 (1.1831) lr 3.1545e-04 eta 8:30:09
epoch [43/50] batch [940/2000] time 1.973 (2.030) data 0.000 (0.001) loss 2.2777 (1.1830) lr 3.1545e-04 eta 8:29:26
epoch [43/50] batch [960/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.7196 (1.1865) lr 3.1545e-04 eta 8:28:44
epoch [43/50] batch [980/2000] time 1.999 (2.029) data 0.000 (0.001) loss 0.3844 (1.1857) lr 3.1545e-04 eta 8:28:00
epoch [43/50] batch [1000/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.9559 (1.1836) lr 3.1545e-04 eta 8:27:19
epoch [43/50] batch [1020/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.5050 (1.1791) lr 3.1545e-04 eta 8:26:36
epoch [43/50] batch [1040/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.6505 (1.1765) lr 3.1545e-04 eta 8:25:57
epoch [43/50] batch [1060/2000] time 2.051 (2.029) data 0.000 (0.001) loss 2.1346 (1.1722) lr 3.1545e-04 eta 8:25:16
epoch [43/50] batch [1080/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.3315 (1.1757) lr 3.1545e-04 eta 8:24:35
epoch [43/50] batch [1100/2000] time 2.001 (2.029) data 0.000 (0.001) loss 0.3095 (1.1720) lr 3.1545e-04 eta 8:23:55
epoch [43/50] batch [1120/2000] time 2.055 (2.029) data 0.000 (0.001) loss 0.2629 (1.1695) lr 3.1545e-04 eta 8:23:16
epoch [43/50] batch [1140/2000] time 2.000 (2.029) data 0.001 (0.001) loss 0.7311 (1.1664) lr 3.1545e-04 eta 8:22:36
epoch [43/50] batch [1160/2000] time 2.030 (2.029) data 0.000 (0.001) loss 2.2696 (1.1704) lr 3.1545e-04 eta 8:21:55
epoch [43/50] batch [1180/2000] time 2.033 (2.029) data 0.000 (0.001) loss 0.7441 (1.1657) lr 3.1545e-04 eta 8:21:16
epoch [43/50] batch [1200/2000] time 2.033 (2.029) data 0.000 (0.001) loss 1.2208 (1.1607) lr 3.1545e-04 eta 8:20:36
epoch [43/50] batch [1220/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.6990 (1.1575) lr 3.1545e-04 eta 8:19:55
epoch [43/50] batch [1240/2000] time 1.995 (2.030) data 0.000 (0.001) loss 1.0338 (1.1546) lr 3.1545e-04 eta 8:19:15
epoch [43/50] batch [1260/2000] time 2.024 (2.029) data 0.000 (0.001) loss 2.0013 (1.1565) lr 3.1545e-04 eta 8:18:32
epoch [43/50] batch [1280/2000] time 1.999 (2.029) data 0.000 (0.001) loss 1.0573 (1.1526) lr 3.1545e-04 eta 8:17:51
epoch [43/50] batch [1300/2000] time 2.058 (2.029) data 0.000 (0.001) loss 0.4242 (1.1548) lr 3.1545e-04 eta 8:17:12
epoch [43/50] batch [1320/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.2687 (1.1509) lr 3.1545e-04 eta 8:16:32
epoch [43/50] batch [1340/2000] time 2.055 (2.029) data 0.000 (0.001) loss 2.0873 (1.1531) lr 3.1545e-04 eta 8:15:52
epoch [43/50] batch [1360/2000] time 1.976 (2.029) data 0.000 (0.001) loss 0.6989 (1.1481) lr 3.1545e-04 eta 8:15:11
epoch [43/50] batch [1380/2000] time 2.025 (2.030) data 0.000 (0.001) loss 0.0794 (1.1453) lr 3.1545e-04 eta 8:14:31
epoch [43/50] batch [1400/2000] time 2.050 (2.030) data 0.000 (0.001) loss 4.2977 (1.1462) lr 3.1545e-04 eta 8:13:50
epoch [43/50] batch [1420/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.2119 (1.1508) lr 3.1545e-04 eta 8:13:09
epoch [43/50] batch [1440/2000] time 2.050 (2.030) data 0.000 (0.001) loss 2.3648 (1.1491) lr 3.1545e-04 eta 8:12:29
epoch [43/50] batch [1460/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.3708 (1.1509) lr 3.1545e-04 eta 8:11:47
epoch [43/50] batch [1480/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.1053 (1.1548) lr 3.1545e-04 eta 8:11:07
epoch [43/50] batch [1500/2000] time 2.053 (2.029) data 0.000 (0.001) loss 2.9093 (1.1557) lr 3.1545e-04 eta 8:10:26
epoch [43/50] batch [1520/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.6795 (1.1555) lr 3.1545e-04 eta 8:09:44
epoch [43/50] batch [1540/2000] time 2.025 (2.029) data 0.000 (0.001) loss 0.9213 (1.1516) lr 3.1545e-04 eta 8:09:04
epoch [43/50] batch [1560/2000] time 1.995 (2.029) data 0.000 (0.001) loss 0.3892 (1.1501) lr 3.1545e-04 eta 8:08:22
epoch [43/50] batch [1580/2000] time 2.024 (2.029) data 0.000 (0.001) loss 0.5001 (1.1529) lr 3.1545e-04 eta 8:07:40
epoch [43/50] batch [1600/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.6578 (1.1515) lr 3.1545e-04 eta 8:06:59
epoch [43/50] batch [1620/2000] time 2.025 (2.029) data 0.000 (0.001) loss 0.4332 (1.1541) lr 3.1545e-04 eta 8:06:17
epoch [43/50] batch [1640/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.1604 (1.1546) lr 3.1545e-04 eta 8:05:36
epoch [43/50] batch [1660/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.8238 (1.1578) lr 3.1545e-04 eta 8:04:55
epoch [43/50] batch [1680/2000] time 2.050 (2.029) data 0.001 (0.001) loss 0.2807 (1.1566) lr 3.1545e-04 eta 8:04:16
epoch [43/50] batch [1700/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.4052 (1.1549) lr 3.1545e-04 eta 8:03:34
epoch [43/50] batch [1720/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.1216 (1.1513) lr 3.1545e-04 eta 8:02:52
epoch [43/50] batch [1740/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.3502 (1.1499) lr 3.1545e-04 eta 8:02:12
epoch [43/50] batch [1760/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.1206 (1.1485) lr 3.1545e-04 eta 8:01:31
epoch [43/50] batch [1780/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.2836 (1.1475) lr 3.1545e-04 eta 8:00:50
epoch [43/50] batch [1800/2000] time 2.027 (2.029) data 0.000 (0.001) loss 2.9437 (1.1516) lr 3.1545e-04 eta 8:00:08
epoch [43/50] batch [1820/2000] time 2.054 (2.029) data 0.000 (0.001) loss 2.1175 (1.1551) lr 3.1545e-04 eta 7:59:29
epoch [43/50] batch [1840/2000] time 2.032 (2.029) data 0.000 (0.001) loss 2.7633 (1.1571) lr 3.1545e-04 eta 7:58:47
epoch [43/50] batch [1860/2000] time 2.055 (2.029) data 0.000 (0.001) loss 1.8906 (1.1550) lr 3.1545e-04 eta 7:58:08
epoch [43/50] batch [1880/2000] time 2.054 (2.029) data 0.000 (0.000) loss 1.3759 (1.1540) lr 3.1545e-04 eta 7:57:28
epoch [43/50] batch [1900/2000] time 2.030 (2.029) data 0.000 (0.000) loss 1.9161 (1.1589) lr 3.1545e-04 eta 7:56:48
epoch [43/50] batch [1920/2000] time 2.048 (2.029) data 0.000 (0.000) loss 0.2615 (1.1578) lr 3.1545e-04 eta 7:56:07
epoch [43/50] batch [1940/2000] time 1.997 (2.029) data 0.000 (0.000) loss 2.8729 (1.1600) lr 3.1545e-04 eta 7:55:26
epoch [43/50] batch [1960/2000] time 2.048 (2.029) data 0.000 (0.000) loss 0.1433 (1.1558) lr 3.1545e-04 eta 7:54:46
epoch [43/50] batch [1980/2000] time 2.048 (2.029) data 0.000 (0.000) loss 0.9604 (1.1573) lr 3.1545e-04 eta 7:54:05
epoch [43/50] batch [2000/2000] time 2.050 (2.029) data 0.000 (0.000) loss 0.4347 (1.1603) lr 2.7103e-04 eta 7:53:24
epoch [44/50] batch [20/2000] time 2.047 (2.059) data 0.000 (0.028) loss 1.5791 (1.5251) lr 2.7103e-04 eta 7:59:40
epoch [44/50] batch [40/2000] time 2.048 (2.042) data 0.000 (0.014) loss 0.3897 (1.2288) lr 2.7103e-04 eta 7:55:13
epoch [44/50] batch [60/2000] time 2.027 (2.036) data 0.001 (0.009) loss 0.7434 (1.3346) lr 2.7103e-04 eta 7:53:07
epoch [44/50] batch [80/2000] time 1.996 (2.035) data 0.000 (0.007) loss 0.7498 (1.3029) lr 2.7103e-04 eta 7:52:02
epoch [44/50] batch [100/2000] time 2.054 (2.034) data 0.000 (0.006) loss 0.8930 (1.2836) lr 2.7103e-04 eta 7:51:18
epoch [44/50] batch [120/2000] time 1.996 (2.033) data 0.000 (0.005) loss 2.7349 (1.2547) lr 2.7103e-04 eta 7:50:15
epoch [44/50] batch [140/2000] time 2.028 (2.033) data 0.000 (0.004) loss 1.0207 (1.2374) lr 2.7103e-04 eta 7:49:32
epoch [44/50] batch [160/2000] time 2.052 (2.032) data 0.000 (0.004) loss 0.4461 (1.1962) lr 2.7103e-04 eta 7:48:46
epoch [44/50] batch [180/2000] time 2.048 (2.031) data 0.000 (0.003) loss 1.5238 (1.1943) lr 2.7103e-04 eta 7:47:53
epoch [44/50] batch [200/2000] time 1.972 (2.031) data 0.000 (0.003) loss 2.0183 (1.2019) lr 2.7103e-04 eta 7:47:05
epoch [44/50] batch [220/2000] time 2.050 (2.030) data 0.000 (0.003) loss 0.7378 (1.1937) lr 2.7103e-04 eta 7:46:15
epoch [44/50] batch [240/2000] time 2.046 (2.029) data 0.000 (0.003) loss 0.2917 (1.2070) lr 2.7103e-04 eta 7:45:25
epoch [44/50] batch [260/2000] time 2.048 (2.029) data 0.000 (0.002) loss 1.0114 (1.1978) lr 2.7103e-04 eta 7:44:42
epoch [44/50] batch [280/2000] time 2.048 (2.029) data 0.000 (0.002) loss 0.6837 (1.1877) lr 2.7103e-04 eta 7:43:55
epoch [44/50] batch [300/2000] time 2.027 (2.029) data 0.000 (0.002) loss 1.0815 (1.1823) lr 2.7103e-04 eta 7:43:11
epoch [44/50] batch [320/2000] time 2.050 (2.029) data 0.000 (0.002) loss 0.7690 (1.1727) lr 2.7103e-04 eta 7:42:29
epoch [44/50] batch [340/2000] time 2.055 (2.029) data 0.000 (0.002) loss 1.6328 (1.1633) lr 2.7103e-04 eta 7:41:49
epoch [44/50] batch [360/2000] time 2.051 (2.029) data 0.000 (0.002) loss 4.0815 (1.1754) lr 2.7103e-04 eta 7:41:09
epoch [44/50] batch [380/2000] time 1.998 (2.028) data 0.000 (0.002) loss 0.2602 (1.1797) lr 2.7103e-04 eta 7:40:25
epoch [44/50] batch [400/2000] time 2.049 (2.028) data 0.000 (0.002) loss 2.1476 (1.1701) lr 2.7103e-04 eta 7:39:43
epoch [44/50] batch [420/2000] time 2.047 (2.028) data 0.000 (0.002) loss 2.4215 (1.1596) lr 2.7103e-04 eta 7:39:04
epoch [44/50] batch [440/2000] time 2.028 (2.028) data 0.000 (0.001) loss 0.0431 (1.1513) lr 2.7103e-04 eta 7:38:24
epoch [44/50] batch [460/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.6344 (1.1630) lr 2.7103e-04 eta 7:37:46
epoch [44/50] batch [480/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.9320 (1.1613) lr 2.7103e-04 eta 7:37:06
epoch [44/50] batch [500/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.3210 (1.1648) lr 2.7103e-04 eta 7:36:28
epoch [44/50] batch [520/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.4626 (1.1670) lr 2.7103e-04 eta 7:35:46
epoch [44/50] batch [540/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.8819 (1.1584) lr 2.7103e-04 eta 7:35:04
epoch [44/50] batch [560/2000] time 2.032 (2.029) data 0.000 (0.001) loss 0.1049 (1.1545) lr 2.7103e-04 eta 7:34:25
epoch [44/50] batch [580/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.5043 (1.1448) lr 2.7103e-04 eta 7:33:45
epoch [44/50] batch [600/2000] time 2.051 (2.029) data 0.001 (0.001) loss 2.6061 (1.1539) lr 2.7103e-04 eta 7:33:07
epoch [44/50] batch [620/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.9741 (1.1571) lr 2.7103e-04 eta 7:32:28
epoch [44/50] batch [640/2000] time 1.973 (2.029) data 0.000 (0.001) loss 1.3421 (1.1588) lr 2.7103e-04 eta 7:31:48
epoch [44/50] batch [660/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.9419 (1.1563) lr 2.7103e-04 eta 7:31:07
epoch [44/50] batch [680/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.3058 (1.1577) lr 2.7103e-04 eta 7:30:29
epoch [44/50] batch [700/2000] time 2.049 (2.029) data 0.000 (0.001) loss 2.2481 (1.1657) lr 2.7103e-04 eta 7:29:46
epoch [44/50] batch [720/2000] time 2.031 (2.029) data 0.000 (0.001) loss 2.0573 (1.1657) lr 2.7103e-04 eta 7:29:06
epoch [44/50] batch [740/2000] time 2.050 (2.029) data 0.000 (0.001) loss 0.8504 (1.1662) lr 2.7103e-04 eta 7:28:25
epoch [44/50] batch [760/2000] time 2.049 (2.029) data 0.000 (0.001) loss 2.5204 (1.1666) lr 2.7103e-04 eta 7:27:44
epoch [44/50] batch [780/2000] time 1.995 (2.029) data 0.000 (0.001) loss 1.3986 (1.1701) lr 2.7103e-04 eta 7:27:04
epoch [44/50] batch [800/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.9976 (1.1713) lr 2.7103e-04 eta 7:26:23
epoch [44/50] batch [820/2000] time 2.001 (2.029) data 0.000 (0.001) loss 0.0884 (1.1675) lr 2.7103e-04 eta 7:25:40
epoch [44/50] batch [840/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.4998 (1.1670) lr 2.7103e-04 eta 7:24:59
epoch [44/50] batch [860/2000] time 2.032 (2.029) data 0.000 (0.001) loss 1.0260 (1.1623) lr 2.7103e-04 eta 7:24:16
epoch [44/50] batch [880/2000] time 2.055 (2.029) data 0.000 (0.001) loss 2.3703 (1.1689) lr 2.7103e-04 eta 7:23:36
epoch [44/50] batch [900/2000] time 2.033 (2.029) data 0.000 (0.001) loss 0.9106 (1.1652) lr 2.7103e-04 eta 7:22:56
epoch [44/50] batch [920/2000] time 2.057 (2.029) data 0.000 (0.001) loss 2.6195 (1.1610) lr 2.7103e-04 eta 7:22:15
epoch [44/50] batch [940/2000] time 2.056 (2.029) data 0.000 (0.001) loss 0.8286 (1.1585) lr 2.7103e-04 eta 7:21:37
epoch [44/50] batch [960/2000] time 2.055 (2.029) data 0.000 (0.001) loss 2.2693 (1.1643) lr 2.7103e-04 eta 7:20:57
epoch [44/50] batch [980/2000] time 2.052 (2.029) data 0.000 (0.001) loss 2.5269 (1.1614) lr 2.7103e-04 eta 7:20:18
epoch [44/50] batch [1000/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.1255 (1.1631) lr 2.7103e-04 eta 7:19:38
epoch [44/50] batch [1020/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.7080 (1.1684) lr 2.7103e-04 eta 7:18:56
epoch [44/50] batch [1040/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.5412 (1.1669) lr 2.7103e-04 eta 7:18:13
epoch [44/50] batch [1060/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.0729 (1.1596) lr 2.7103e-04 eta 7:17:32
epoch [44/50] batch [1080/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.7036 (1.1600) lr 2.7103e-04 eta 7:16:50
epoch [44/50] batch [1100/2000] time 2.047 (2.029) data 0.000 (0.001) loss 2.8242 (1.1589) lr 2.7103e-04 eta 7:16:09
epoch [44/50] batch [1120/2000] time 2.050 (2.029) data 0.000 (0.001) loss 1.9014 (1.1583) lr 2.7103e-04 eta 7:15:28
epoch [44/50] batch [1140/2000] time 2.049 (2.029) data 0.001 (0.001) loss 0.2248 (1.1637) lr 2.7103e-04 eta 7:14:47
epoch [44/50] batch [1160/2000] time 1.995 (2.029) data 0.000 (0.001) loss 1.1282 (1.1585) lr 2.7103e-04 eta 7:14:06
epoch [44/50] batch [1180/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.0130 (1.1567) lr 2.7103e-04 eta 7:13:24
epoch [44/50] batch [1200/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.1217 (1.1551) lr 2.7103e-04 eta 7:12:43
epoch [44/50] batch [1220/2000] time 2.025 (2.028) data 0.000 (0.001) loss 0.9962 (1.1603) lr 2.7103e-04 eta 7:12:02
epoch [44/50] batch [1240/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.8861 (1.1586) lr 2.7103e-04 eta 7:11:21
epoch [44/50] batch [1260/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.6776 (1.1581) lr 2.7103e-04 eta 7:10:42
epoch [44/50] batch [1280/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.2036 (1.1595) lr 2.7103e-04 eta 7:10:01
epoch [44/50] batch [1300/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.7409 (1.1558) lr 2.7103e-04 eta 7:09:20
epoch [44/50] batch [1320/2000] time 2.028 (2.028) data 0.000 (0.001) loss 0.5080 (1.1533) lr 2.7103e-04 eta 7:08:40
epoch [44/50] batch [1340/2000] time 1.997 (2.028) data 0.000 (0.001) loss 1.3955 (1.1565) lr 2.7103e-04 eta 7:07:57
epoch [44/50] batch [1360/2000] time 1.997 (2.028) data 0.000 (0.001) loss 1.8812 (1.1605) lr 2.7103e-04 eta 7:07:17
epoch [44/50] batch [1380/2000] time 2.029 (2.028) data 0.000 (0.001) loss 3.5359 (1.1576) lr 2.7103e-04 eta 7:06:36
epoch [44/50] batch [1400/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.3348 (1.1535) lr 2.7103e-04 eta 7:05:55
epoch [44/50] batch [1420/2000] time 2.047 (2.028) data 0.000 (0.001) loss 1.1284 (1.1567) lr 2.7103e-04 eta 7:05:13
epoch [44/50] batch [1440/2000] time 2.047 (2.028) data 0.000 (0.001) loss 0.9153 (1.1557) lr 2.7103e-04 eta 7:04:32
epoch [44/50] batch [1460/2000] time 2.024 (2.028) data 0.000 (0.001) loss 0.3103 (1.1550) lr 2.7103e-04 eta 7:03:51
epoch [44/50] batch [1480/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.7955 (1.1555) lr 2.7103e-04 eta 7:03:10
epoch [44/50] batch [1500/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.5578 (1.1601) lr 2.7103e-04 eta 7:02:28
epoch [44/50] batch [1520/2000] time 2.026 (2.028) data 0.000 (0.001) loss 3.7601 (1.1595) lr 2.7103e-04 eta 7:01:47
epoch [44/50] batch [1540/2000] time 2.051 (2.028) data 0.000 (0.001) loss 4.2477 (1.1628) lr 2.7103e-04 eta 7:01:06
epoch [44/50] batch [1560/2000] time 2.028 (2.028) data 0.000 (0.001) loss 2.9295 (1.1664) lr 2.7103e-04 eta 7:00:25
epoch [44/50] batch [1580/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.3198 (1.1666) lr 2.7103e-04 eta 6:59:44
epoch [44/50] batch [1600/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.1757 (1.1680) lr 2.7103e-04 eta 6:59:04
epoch [44/50] batch [1620/2000] time 2.003 (2.028) data 0.000 (0.001) loss 0.6793 (1.1647) lr 2.7103e-04 eta 6:58:24
epoch [44/50] batch [1640/2000] time 2.028 (2.028) data 0.000 (0.001) loss 2.3601 (1.1671) lr 2.7103e-04 eta 6:57:43
epoch [44/50] batch [1660/2000] time 1.973 (2.028) data 0.000 (0.001) loss 0.9647 (1.1662) lr 2.7103e-04 eta 6:57:02
epoch [44/50] batch [1680/2000] time 1.994 (2.028) data 0.001 (0.001) loss 0.5416 (1.1610) lr 2.7103e-04 eta 6:56:22
epoch [44/50] batch [1700/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.9241 (1.1624) lr 2.7103e-04 eta 6:55:40
epoch [44/50] batch [1720/2000] time 2.047 (2.028) data 0.000 (0.001) loss 2.2888 (1.1682) lr 2.7103e-04 eta 6:55:00
epoch [44/50] batch [1740/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.9901 (1.1724) lr 2.7103e-04 eta 6:54:19
epoch [44/50] batch [1760/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.8993 (1.1731) lr 2.7103e-04 eta 6:53:38
epoch [44/50] batch [1780/2000] time 2.051 (2.028) data 0.000 (0.001) loss 2.3432 (1.1783) lr 2.7103e-04 eta 6:52:58
epoch [44/50] batch [1800/2000] time 2.052 (2.028) data 0.000 (0.001) loss 2.4941 (1.1774) lr 2.7103e-04 eta 6:52:17
epoch [44/50] batch [1820/2000] time 2.028 (2.028) data 0.000 (0.001) loss 0.2345 (1.1766) lr 2.7103e-04 eta 6:51:37
epoch [44/50] batch [1840/2000] time 2.052 (2.028) data 0.000 (0.000) loss 0.9992 (1.1751) lr 2.7103e-04 eta 6:50:56
epoch [44/50] batch [1860/2000] time 2.029 (2.028) data 0.000 (0.000) loss 1.3586 (1.1748) lr 2.7103e-04 eta 6:50:15
epoch [44/50] batch [1880/2000] time 2.002 (2.028) data 0.000 (0.000) loss 4.8136 (1.1774) lr 2.7103e-04 eta 6:49:35
epoch [44/50] batch [1900/2000] time 2.031 (2.028) data 0.000 (0.000) loss 1.1797 (1.1805) lr 2.7103e-04 eta 6:48:55
epoch [44/50] batch [1920/2000] time 1.999 (2.028) data 0.000 (0.000) loss 1.4666 (1.1807) lr 2.7103e-04 eta 6:48:16
epoch [44/50] batch [1940/2000] time 2.028 (2.028) data 0.000 (0.000) loss 2.0082 (1.1793) lr 2.7103e-04 eta 6:47:35
epoch [44/50] batch [1960/2000] time 2.027 (2.028) data 0.000 (0.000) loss 0.4576 (1.1772) lr 2.7103e-04 eta 6:46:55
epoch [44/50] batch [1980/2000] time 1.976 (2.028) data 0.000 (0.000) loss 1.1136 (1.1787) lr 2.7103e-04 eta 6:46:14
epoch [44/50] batch [2000/2000] time 1.994 (2.028) data 0.000 (0.000) loss 0.0667 (1.1766) lr 2.2949e-04 eta 6:45:33
epoch [45/50] batch [20/2000] time 2.033 (2.056) data 0.000 (0.027) loss 1.5753 (1.0248) lr 2.2949e-04 eta 6:50:33
epoch [45/50] batch [40/2000] time 2.051 (2.042) data 0.000 (0.014) loss 2.2130 (1.2068) lr 2.2949e-04 eta 6:46:56
epoch [45/50] batch [60/2000] time 2.050 (2.040) data 0.001 (0.009) loss 0.3296 (1.3089) lr 2.2949e-04 eta 6:45:52
epoch [45/50] batch [80/2000] time 2.050 (2.037) data 0.000 (0.007) loss 0.0147 (1.2065) lr 2.2949e-04 eta 6:44:39
epoch [45/50] batch [100/2000] time 2.049 (2.034) data 0.000 (0.006) loss 2.2482 (1.1945) lr 2.2949e-04 eta 6:43:27
epoch [45/50] batch [120/2000] time 2.003 (2.035) data 0.000 (0.005) loss 0.7510 (1.2384) lr 2.2949e-04 eta 6:42:52
epoch [45/50] batch [140/2000] time 2.055 (2.034) data 0.000 (0.004) loss 2.5575 (1.2341) lr 2.2949e-04 eta 6:42:04
epoch [45/50] batch [160/2000] time 2.049 (2.032) data 0.000 (0.004) loss 0.4565 (1.2383) lr 2.2949e-04 eta 6:41:04
epoch [45/50] batch [180/2000] time 1.997 (2.033) data 0.000 (0.003) loss 1.1014 (1.2368) lr 2.2949e-04 eta 6:40:28
epoch [45/50] batch [200/2000] time 2.048 (2.032) data 0.000 (0.003) loss 1.9148 (1.2537) lr 2.2949e-04 eta 6:39:39
epoch [45/50] batch [220/2000] time 2.048 (2.031) data 0.000 (0.003) loss 1.1602 (1.2611) lr 2.2949e-04 eta 6:38:49
epoch [45/50] batch [240/2000] time 2.028 (2.031) data 0.000 (0.002) loss 0.0477 (1.2744) lr 2.2949e-04 eta 6:38:07
epoch [45/50] batch [260/2000] time 2.028 (2.031) data 0.000 (0.002) loss 0.7291 (1.2457) lr 2.2949e-04 eta 6:37:24
epoch [45/50] batch [280/2000] time 1.997 (2.030) data 0.000 (0.002) loss 1.7884 (1.2291) lr 2.2949e-04 eta 6:36:36
epoch [45/50] batch [300/2000] time 2.048 (2.030) data 0.000 (0.002) loss 0.0771 (1.2413) lr 2.2949e-04 eta 6:35:55
epoch [45/50] batch [320/2000] time 2.050 (2.030) data 0.000 (0.002) loss 2.1550 (1.2366) lr 2.2949e-04 eta 6:35:11
epoch [45/50] batch [340/2000] time 2.050 (2.030) data 0.000 (0.002) loss 1.0834 (1.2295) lr 2.2949e-04 eta 6:34:26
epoch [45/50] batch [360/2000] time 1.996 (2.030) data 0.000 (0.002) loss 0.0415 (1.2217) lr 2.2949e-04 eta 6:33:46
epoch [45/50] batch [380/2000] time 2.049 (2.029) data 0.000 (0.002) loss 1.4194 (1.2033) lr 2.2949e-04 eta 6:33:02
epoch [45/50] batch [400/2000] time 2.048 (2.029) data 0.000 (0.002) loss 0.6797 (1.1976) lr 2.2949e-04 eta 6:32:17
epoch [45/50] batch [420/2000] time 1.996 (2.029) data 0.000 (0.002) loss 0.9924 (1.1822) lr 2.2949e-04 eta 6:31:37
epoch [45/50] batch [440/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.2400 (1.1742) lr 2.2949e-04 eta 6:30:55
epoch [45/50] batch [460/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.7849 (1.1694) lr 2.2949e-04 eta 6:30:13
epoch [45/50] batch [480/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.1756 (1.1865) lr 2.2949e-04 eta 6:29:33
epoch [45/50] batch [500/2000] time 2.048 (2.029) data 0.000 (0.001) loss 2.1876 (1.1861) lr 2.2949e-04 eta 6:28:53
epoch [45/50] batch [520/2000] time 1.996 (2.029) data 0.000 (0.001) loss 1.9308 (1.1893) lr 2.2949e-04 eta 6:28:13
epoch [45/50] batch [540/2000] time 1.995 (2.029) data 0.000 (0.001) loss 1.8266 (1.1913) lr 2.2949e-04 eta 6:27:33
epoch [45/50] batch [560/2000] time 1.995 (2.029) data 0.000 (0.001) loss 1.1296 (1.1950) lr 2.2949e-04 eta 6:26:51
epoch [45/50] batch [580/2000] time 2.047 (2.029) data 0.000 (0.001) loss 1.1228 (1.1912) lr 2.2949e-04 eta 6:26:07
epoch [45/50] batch [600/2000] time 2.054 (2.029) data 0.001 (0.001) loss 1.5662 (1.1921) lr 2.2949e-04 eta 6:25:27
epoch [45/50] batch [620/2000] time 2.055 (2.029) data 0.000 (0.001) loss 0.5004 (1.2062) lr 2.2949e-04 eta 6:24:50
epoch [45/50] batch [640/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.7614 (1.2079) lr 2.2949e-04 eta 6:24:12
epoch [45/50] batch [660/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.7753 (1.2064) lr 2.2949e-04 eta 6:23:30
epoch [45/50] batch [680/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.1450 (1.1944) lr 2.2949e-04 eta 6:22:48
epoch [45/50] batch [700/2000] time 2.050 (2.029) data 0.000 (0.001) loss 4.4297 (1.1871) lr 2.2949e-04 eta 6:22:07
epoch [45/50] batch [720/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.2251 (1.1785) lr 2.2949e-04 eta 6:21:25
epoch [45/50] batch [740/2000] time 1.998 (2.029) data 0.000 (0.001) loss 0.9538 (1.1854) lr 2.2949e-04 eta 6:20:45
epoch [45/50] batch [760/2000] time 2.048 (2.029) data 0.000 (0.001) loss 2.3785 (1.1951) lr 2.2949e-04 eta 6:20:05
epoch [45/50] batch [780/2000] time 2.048 (2.029) data 0.000 (0.001) loss 2.4466 (1.1950) lr 2.2949e-04 eta 6:19:25
epoch [45/50] batch [800/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.3990 (1.1946) lr 2.2949e-04 eta 6:18:41
epoch [45/50] batch [820/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.6256 (1.1946) lr 2.2949e-04 eta 6:18:00
epoch [45/50] batch [840/2000] time 2.056 (2.029) data 0.000 (0.001) loss 2.0538 (1.1953) lr 2.2949e-04 eta 6:17:20
epoch [45/50] batch [860/2000] time 2.051 (2.029) data 0.000 (0.001) loss 0.0246 (1.1895) lr 2.2949e-04 eta 6:16:40
epoch [45/50] batch [880/2000] time 1.994 (2.029) data 0.000 (0.001) loss 0.3639 (1.1872) lr 2.2949e-04 eta 6:15:59
epoch [45/50] batch [900/2000] time 1.995 (2.029) data 0.000 (0.001) loss 2.4446 (1.1828) lr 2.2949e-04 eta 6:15:18
epoch [45/50] batch [920/2000] time 1.996 (2.029) data 0.000 (0.001) loss 0.1526 (1.1877) lr 2.2949e-04 eta 6:14:38
epoch [45/50] batch [940/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.7453 (1.1936) lr 2.2949e-04 eta 6:13:57
epoch [45/50] batch [960/2000] time 2.047 (2.029) data 0.000 (0.001) loss 0.1331 (1.1946) lr 2.2949e-04 eta 6:13:15
epoch [45/50] batch [980/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.9294 (1.1866) lr 2.2949e-04 eta 6:12:36
epoch [45/50] batch [1000/2000] time 2.046 (2.029) data 0.000 (0.001) loss 1.2998 (1.1832) lr 2.2949e-04 eta 6:11:54
epoch [45/50] batch [1020/2000] time 1.994 (2.028) data 0.000 (0.001) loss 0.1484 (1.1851) lr 2.2949e-04 eta 6:11:12
epoch [45/50] batch [1040/2000] time 2.023 (2.028) data 0.000 (0.001) loss 1.7443 (1.1922) lr 2.2949e-04 eta 6:10:30
epoch [45/50] batch [1060/2000] time 2.047 (2.028) data 0.000 (0.001) loss 0.5200 (1.1919) lr 2.2949e-04 eta 6:09:48
epoch [45/50] batch [1080/2000] time 2.029 (2.028) data 0.000 (0.001) loss 1.5970 (1.1921) lr 2.2949e-04 eta 6:09:06
epoch [45/50] batch [1100/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.4838 (1.1909) lr 2.2949e-04 eta 6:08:25
epoch [45/50] batch [1120/2000] time 2.053 (2.028) data 0.000 (0.001) loss 1.6420 (1.1967) lr 2.2949e-04 eta 6:07:45
epoch [45/50] batch [1140/2000] time 1.996 (2.028) data 0.001 (0.001) loss 1.4345 (1.1978) lr 2.2949e-04 eta 6:07:03
epoch [45/50] batch [1160/2000] time 1.993 (2.028) data 0.000 (0.001) loss 0.2598 (1.1919) lr 2.2949e-04 eta 6:06:21
epoch [45/50] batch [1180/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.6028 (1.1930) lr 2.2949e-04 eta 6:05:40
epoch [45/50] batch [1200/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.5379 (1.1923) lr 2.2949e-04 eta 6:05:00
epoch [45/50] batch [1220/2000] time 2.052 (2.028) data 0.000 (0.001) loss 1.1900 (1.1892) lr 2.2949e-04 eta 6:04:18
epoch [45/50] batch [1240/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.3764 (1.1863) lr 2.2949e-04 eta 6:03:38
epoch [45/50] batch [1260/2000] time 1.994 (2.028) data 0.000 (0.001) loss 0.5682 (1.1856) lr 2.2949e-04 eta 6:02:56
epoch [45/50] batch [1280/2000] time 1.997 (2.028) data 0.000 (0.001) loss 0.8749 (1.1841) lr 2.2949e-04 eta 6:02:15
epoch [45/50] batch [1300/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.4061 (1.1806) lr 2.2949e-04 eta 6:01:34
epoch [45/50] batch [1320/2000] time 2.049 (2.027) data 0.000 (0.001) loss 1.5856 (1.1835) lr 2.2949e-04 eta 6:00:53
epoch [45/50] batch [1340/2000] time 2.049 (2.027) data 0.000 (0.001) loss 1.4005 (1.1831) lr 2.2949e-04 eta 6:00:12
epoch [45/50] batch [1360/2000] time 2.049 (2.027) data 0.000 (0.001) loss 2.4178 (1.1886) lr 2.2949e-04 eta 5:59:31
epoch [45/50] batch [1380/2000] time 1.999 (2.027) data 0.000 (0.001) loss 0.7287 (1.1862) lr 2.2949e-04 eta 5:58:50
epoch [45/50] batch [1400/2000] time 1.996 (2.027) data 0.000 (0.001) loss 0.5175 (1.1890) lr 2.2949e-04 eta 5:58:09
epoch [45/50] batch [1420/2000] time 2.000 (2.027) data 0.000 (0.001) loss 2.3919 (1.1848) lr 2.2949e-04 eta 5:57:28
epoch [45/50] batch [1440/2000] time 2.050 (2.027) data 0.000 (0.001) loss 0.6692 (1.1856) lr 2.2949e-04 eta 5:56:47
epoch [45/50] batch [1460/2000] time 1.997 (2.027) data 0.000 (0.001) loss 3.3783 (1.1933) lr 2.2949e-04 eta 5:56:07
epoch [45/50] batch [1480/2000] time 2.051 (2.027) data 0.000 (0.001) loss 2.3098 (1.1968) lr 2.2949e-04 eta 5:55:26
epoch [45/50] batch [1500/2000] time 2.051 (2.027) data 0.000 (0.001) loss 1.0246 (1.1917) lr 2.2949e-04 eta 5:54:46
epoch [45/50] batch [1520/2000] time 2.056 (2.027) data 0.000 (0.001) loss 0.2233 (1.1892) lr 2.2949e-04 eta 5:54:05
epoch [45/50] batch [1540/2000] time 2.058 (2.027) data 0.000 (0.001) loss 1.7715 (1.1857) lr 2.2949e-04 eta 5:53:25
epoch [45/50] batch [1560/2000] time 1.999 (2.027) data 0.000 (0.001) loss 0.5091 (1.1870) lr 2.2949e-04 eta 5:52:45
epoch [45/50] batch [1580/2000] time 2.049 (2.027) data 0.000 (0.001) loss 1.0004 (1.1897) lr 2.2949e-04 eta 5:52:04
epoch [45/50] batch [1600/2000] time 2.048 (2.027) data 0.000 (0.001) loss 0.2022 (1.1893) lr 2.2949e-04 eta 5:51:23
epoch [45/50] batch [1620/2000] time 1.971 (2.027) data 0.000 (0.001) loss 0.5984 (1.1908) lr 2.2949e-04 eta 5:50:42
epoch [45/50] batch [1640/2000] time 1.972 (2.027) data 0.000 (0.001) loss 1.1147 (1.1929) lr 2.2949e-04 eta 5:50:01
epoch [45/50] batch [1660/2000] time 2.050 (2.027) data 0.000 (0.001) loss 1.1702 (1.1951) lr 2.2949e-04 eta 5:49:22
epoch [45/50] batch [1680/2000] time 2.047 (2.027) data 0.001 (0.001) loss 0.9573 (1.1945) lr 2.2949e-04 eta 5:48:41
epoch [45/50] batch [1700/2000] time 2.023 (2.027) data 0.000 (0.001) loss 1.2959 (1.1925) lr 2.2949e-04 eta 5:48:00
epoch [45/50] batch [1720/2000] time 1.993 (2.027) data 0.000 (0.001) loss 0.2492 (1.1915) lr 2.2949e-04 eta 5:47:18
epoch [45/50] batch [1740/2000] time 2.043 (2.027) data 0.000 (0.001) loss 0.5056 (1.1891) lr 2.2949e-04 eta 5:46:37
epoch [45/50] batch [1760/2000] time 1.993 (2.027) data 0.000 (0.001) loss 0.6893 (1.1924) lr 2.2949e-04 eta 5:45:56
epoch [45/50] batch [1780/2000] time 2.047 (2.027) data 0.000 (0.001) loss 0.1424 (1.1909) lr 2.2949e-04 eta 5:45:16
epoch [45/50] batch [1800/2000] time 2.045 (2.027) data 0.000 (0.001) loss 0.6786 (1.1871) lr 2.2949e-04 eta 5:44:35
epoch [45/50] batch [1820/2000] time 2.026 (2.027) data 0.000 (0.001) loss 1.4049 (1.1879) lr 2.2949e-04 eta 5:43:54
epoch [45/50] batch [1840/2000] time 2.047 (2.027) data 0.000 (0.001) loss 0.8895 (1.1849) lr 2.2949e-04 eta 5:43:13
epoch [45/50] batch [1860/2000] time 2.047 (2.027) data 0.000 (0.000) loss 1.1465 (1.1802) lr 2.2949e-04 eta 5:42:33
epoch [45/50] batch [1880/2000] time 2.025 (2.027) data 0.000 (0.000) loss 0.1161 (1.1769) lr 2.2949e-04 eta 5:41:52
epoch [45/50] batch [1900/2000] time 2.048 (2.027) data 0.000 (0.000) loss 0.4749 (1.1773) lr 2.2949e-04 eta 5:41:11
epoch [45/50] batch [1920/2000] time 2.028 (2.027) data 0.000 (0.000) loss 0.6521 (1.1775) lr 2.2949e-04 eta 5:40:31
epoch [45/50] batch [1940/2000] time 2.055 (2.027) data 0.000 (0.000) loss 1.9718 (1.1803) lr 2.2949e-04 eta 5:39:51
epoch [45/50] batch [1960/2000] time 1.973 (2.027) data 0.000 (0.000) loss 0.6540 (1.1813) lr 2.2949e-04 eta 5:39:10
epoch [45/50] batch [1980/2000] time 1.997 (2.027) data 0.000 (0.000) loss 0.9053 (1.1775) lr 2.2949e-04 eta 5:38:29
epoch [45/50] batch [2000/2000] time 2.047 (2.027) data 0.000 (0.000) loss 1.6078 (1.1750) lr 1.9098e-04 eta 5:37:48
epoch [46/50] batch [20/2000] time 2.051 (2.052) data 0.000 (0.028) loss 0.2758 (1.0604) lr 1.9098e-04 eta 5:41:14
epoch [46/50] batch [40/2000] time 2.029 (2.039) data 0.000 (0.014) loss 3.8199 (1.1576) lr 1.9098e-04 eta 5:38:32
epoch [46/50] batch [60/2000] time 2.050 (2.037) data 0.001 (0.009) loss 1.4090 (1.2283) lr 1.9098e-04 eta 5:37:27
epoch [46/50] batch [80/2000] time 1.997 (2.033) data 0.000 (0.007) loss 0.3785 (1.3291) lr 1.9098e-04 eta 5:36:08
epoch [46/50] batch [100/2000] time 1.994 (2.031) data 0.000 (0.006) loss 0.5686 (1.3433) lr 1.9098e-04 eta 5:35:10
epoch [46/50] batch [120/2000] time 2.052 (2.031) data 0.000 (0.005) loss 0.7164 (1.3096) lr 1.9098e-04 eta 5:34:30
epoch [46/50] batch [140/2000] time 2.050 (2.032) data 0.000 (0.004) loss 1.3518 (1.3470) lr 1.9098e-04 eta 5:33:57
epoch [46/50] batch [160/2000] time 2.028 (2.033) data 0.000 (0.004) loss 0.5485 (1.3012) lr 1.9098e-04 eta 5:33:20
epoch [46/50] batch [180/2000] time 2.026 (2.032) data 0.000 (0.003) loss 0.4059 (1.2433) lr 1.9098e-04 eta 5:32:30
epoch [46/50] batch [200/2000] time 2.048 (2.032) data 0.000 (0.003) loss 1.0552 (1.2520) lr 1.9098e-04 eta 5:31:50
epoch [46/50] batch [220/2000] time 1.970 (2.031) data 0.000 (0.003) loss 0.4075 (1.2279) lr 1.9098e-04 eta 5:31:01
epoch [46/50] batch [240/2000] time 2.047 (2.030) data 0.000 (0.002) loss 0.8927 (1.2172) lr 1.9098e-04 eta 5:30:17
epoch [46/50] batch [260/2000] time 2.025 (2.030) data 0.000 (0.002) loss 2.0526 (1.2109) lr 1.9098e-04 eta 5:29:35
epoch [46/50] batch [280/2000] time 1.998 (2.030) data 0.000 (0.002) loss 2.1207 (1.2287) lr 1.9098e-04 eta 5:28:49
epoch [46/50] batch [300/2000] time 2.047 (2.030) data 0.000 (0.002) loss 1.7435 (1.2231) lr 1.9098e-04 eta 5:28:06
epoch [46/50] batch [320/2000] time 2.025 (2.030) data 0.000 (0.002) loss 1.3221 (1.2256) lr 1.9098e-04 eta 5:27:27
epoch [46/50] batch [340/2000] time 2.027 (2.030) data 0.000 (0.002) loss 1.6487 (1.2142) lr 1.9098e-04 eta 5:26:45
epoch [46/50] batch [360/2000] time 1.998 (2.029) data 0.000 (0.002) loss 0.4078 (1.1928) lr 1.9098e-04 eta 5:26:03
epoch [46/50] batch [380/2000] time 2.050 (2.029) data 0.000 (0.002) loss 1.3517 (1.2041) lr 1.9098e-04 eta 5:25:19
epoch [46/50] batch [400/2000] time 2.027 (2.029) data 0.000 (0.002) loss 2.0667 (1.2081) lr 1.9098e-04 eta 5:24:36
epoch [46/50] batch [420/2000] time 2.050 (2.029) data 0.000 (0.002) loss 0.7684 (1.1837) lr 1.9098e-04 eta 5:23:56
epoch [46/50] batch [440/2000] time 2.027 (2.029) data 0.000 (0.001) loss 2.8133 (1.1920) lr 1.9098e-04 eta 5:23:16
epoch [46/50] batch [460/2000] time 2.053 (2.029) data 0.000 (0.001) loss 0.7861 (1.2055) lr 1.9098e-04 eta 5:22:35
epoch [46/50] batch [480/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.1325 (1.2023) lr 1.9098e-04 eta 5:21:54
epoch [46/50] batch [500/2000] time 2.052 (2.029) data 0.000 (0.001) loss 0.3633 (1.1979) lr 1.9098e-04 eta 5:21:11
epoch [46/50] batch [520/2000] time 2.027 (2.029) data 0.000 (0.001) loss 1.8966 (1.1936) lr 1.9098e-04 eta 5:20:32
epoch [46/50] batch [540/2000] time 2.047 (2.028) data 0.000 (0.001) loss 1.6201 (1.1873) lr 1.9098e-04 eta 5:19:48
epoch [46/50] batch [560/2000] time 2.048 (2.028) data 0.000 (0.001) loss 1.5900 (1.1817) lr 1.9098e-04 eta 5:19:07
epoch [46/50] batch [580/2000] time 2.046 (2.029) data 0.000 (0.001) loss 0.5344 (1.1776) lr 1.9098e-04 eta 5:18:29
epoch [46/50] batch [600/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.1010 (1.1889) lr 1.9098e-04 eta 5:17:48
epoch [46/50] batch [620/2000] time 2.048 (2.029) data 0.000 (0.001) loss 2.1755 (1.1868) lr 1.9098e-04 eta 5:17:08
epoch [46/50] batch [640/2000] time 2.047 (2.029) data 0.000 (0.001) loss 2.6385 (1.1943) lr 1.9098e-04 eta 5:16:26
epoch [46/50] batch [660/2000] time 1.996 (2.028) data 0.000 (0.001) loss 1.2005 (1.1914) lr 1.9098e-04 eta 5:15:45
epoch [46/50] batch [680/2000] time 2.028 (2.028) data 0.000 (0.001) loss 1.5684 (1.1815) lr 1.9098e-04 eta 5:15:04
epoch [46/50] batch [700/2000] time 1.994 (2.028) data 0.000 (0.001) loss 1.2492 (1.1756) lr 1.9098e-04 eta 5:14:23
epoch [46/50] batch [720/2000] time 2.032 (2.028) data 0.000 (0.001) loss 0.6554 (1.1752) lr 1.9098e-04 eta 5:13:41
epoch [46/50] batch [740/2000] time 2.026 (2.028) data 0.000 (0.001) loss 2.2770 (1.1792) lr 1.9098e-04 eta 5:13:01
epoch [46/50] batch [760/2000] time 2.029 (2.028) data 0.000 (0.001) loss 2.0124 (1.1700) lr 1.9098e-04 eta 5:12:20
epoch [46/50] batch [780/2000] time 1.997 (2.028) data 0.000 (0.001) loss 3.0160 (1.1703) lr 1.9098e-04 eta 5:11:39
epoch [46/50] batch [800/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.7939 (1.1616) lr 1.9098e-04 eta 5:10:59
epoch [46/50] batch [820/2000] time 1.996 (2.028) data 0.000 (0.001) loss 1.3513 (1.1651) lr 1.9098e-04 eta 5:10:17
epoch [46/50] batch [840/2000] time 2.045 (2.028) data 0.000 (0.001) loss 2.1141 (1.1740) lr 1.9098e-04 eta 5:09:37
epoch [46/50] batch [860/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.0761 (1.1684) lr 1.9098e-04 eta 5:08:57
epoch [46/50] batch [880/2000] time 2.047 (2.028) data 0.000 (0.001) loss 0.4059 (1.1724) lr 1.9098e-04 eta 5:08:16
epoch [46/50] batch [900/2000] time 2.000 (2.028) data 0.000 (0.001) loss 1.4261 (1.1686) lr 1.9098e-04 eta 5:07:35
epoch [46/50] batch [920/2000] time 2.053 (2.028) data 0.000 (0.001) loss 1.2875 (1.1722) lr 1.9098e-04 eta 5:06:55
epoch [46/50] batch [940/2000] time 2.030 (2.028) data 0.000 (0.001) loss 0.0949 (1.1649) lr 1.9098e-04 eta 5:06:15
epoch [46/50] batch [960/2000] time 2.055 (2.028) data 0.000 (0.001) loss 3.0421 (1.1702) lr 1.9098e-04 eta 5:05:36
epoch [46/50] batch [980/2000] time 2.054 (2.028) data 0.000 (0.001) loss 1.2964 (1.1760) lr 1.9098e-04 eta 5:04:55
epoch [46/50] batch [1000/2000] time 2.053 (2.028) data 0.000 (0.001) loss 0.3961 (1.1816) lr 1.9098e-04 eta 5:04:16
epoch [46/50] batch [1020/2000] time 2.032 (2.029) data 0.000 (0.001) loss 2.4277 (1.1866) lr 1.9098e-04 eta 5:03:35
epoch [46/50] batch [1040/2000] time 1.995 (2.028) data 0.000 (0.001) loss 2.7121 (1.1857) lr 1.9098e-04 eta 5:02:55
epoch [46/50] batch [1060/2000] time 2.026 (2.028) data 0.000 (0.001) loss 0.2641 (1.1865) lr 1.9098e-04 eta 5:02:14
epoch [46/50] batch [1080/2000] time 2.024 (2.028) data 0.000 (0.001) loss 0.1975 (1.1915) lr 1.9098e-04 eta 5:01:33
epoch [46/50] batch [1100/2000] time 2.047 (2.028) data 0.000 (0.001) loss 1.1008 (1.1930) lr 1.9098e-04 eta 5:00:51
epoch [46/50] batch [1120/2000] time 2.046 (2.028) data 0.000 (0.001) loss 0.2254 (1.1934) lr 1.9098e-04 eta 5:00:10
epoch [46/50] batch [1140/2000] time 2.025 (2.028) data 0.001 (0.001) loss 0.8803 (1.1922) lr 1.9098e-04 eta 4:59:30
epoch [46/50] batch [1160/2000] time 2.045 (2.028) data 0.000 (0.001) loss 0.5338 (1.1895) lr 1.9098e-04 eta 4:58:49
epoch [46/50] batch [1180/2000] time 1.994 (2.028) data 0.000 (0.001) loss 0.7274 (1.1880) lr 1.9098e-04 eta 4:58:08
epoch [46/50] batch [1200/2000] time 2.047 (2.028) data 0.000 (0.001) loss 1.6816 (1.1814) lr 1.9098e-04 eta 4:57:27
epoch [46/50] batch [1220/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.7117 (1.1823) lr 1.9098e-04 eta 4:56:47
epoch [46/50] batch [1240/2000] time 2.029 (2.028) data 0.000 (0.001) loss 0.6500 (1.1816) lr 1.9098e-04 eta 4:56:06
epoch [46/50] batch [1260/2000] time 1.996 (2.028) data 0.000 (0.001) loss 1.1629 (1.1782) lr 1.9098e-04 eta 4:55:25
epoch [46/50] batch [1280/2000] time 1.972 (2.028) data 0.000 (0.001) loss 2.3946 (1.1793) lr 1.9098e-04 eta 4:54:44
epoch [46/50] batch [1300/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.1210 (1.1804) lr 1.9098e-04 eta 4:54:03
epoch [46/50] batch [1320/2000] time 2.048 (2.028) data 0.000 (0.001) loss 0.1639 (1.1787) lr 1.9098e-04 eta 4:53:23
epoch [46/50] batch [1340/2000] time 2.003 (2.028) data 0.000 (0.001) loss 1.1152 (1.1854) lr 1.9098e-04 eta 4:52:42
epoch [46/50] batch [1360/2000] time 2.002 (2.028) data 0.000 (0.001) loss 1.2171 (1.1865) lr 1.9098e-04 eta 4:52:01
epoch [46/50] batch [1380/2000] time 2.029 (2.028) data 0.000 (0.001) loss 0.6342 (1.1937) lr 1.9098e-04 eta 4:51:21
epoch [46/50] batch [1400/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.3396 (1.1885) lr 1.9098e-04 eta 4:50:41
epoch [46/50] batch [1420/2000] time 2.027 (2.028) data 0.000 (0.001) loss 1.1517 (1.1873) lr 1.9098e-04 eta 4:50:00
epoch [46/50] batch [1440/2000] time 2.027 (2.028) data 0.000 (0.001) loss 0.1916 (1.1831) lr 1.9098e-04 eta 4:49:19
epoch [46/50] batch [1460/2000] time 2.029 (2.028) data 0.000 (0.001) loss 0.8920 (1.1839) lr 1.9098e-04 eta 4:48:38
epoch [46/50] batch [1480/2000] time 2.030 (2.028) data 0.000 (0.001) loss 0.6897 (1.1828) lr 1.9098e-04 eta 4:47:57
epoch [46/50] batch [1500/2000] time 2.028 (2.028) data 0.000 (0.001) loss 3.2715 (1.1887) lr 1.9098e-04 eta 4:47:16
epoch [46/50] batch [1520/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.4072 (1.1876) lr 1.9098e-04 eta 4:46:36
epoch [46/50] batch [1540/2000] time 1.998 (2.028) data 0.000 (0.001) loss 3.1516 (1.1889) lr 1.9098e-04 eta 4:45:54
epoch [46/50] batch [1560/2000] time 2.025 (2.028) data 0.000 (0.001) loss 0.7370 (1.1878) lr 1.9098e-04 eta 4:45:14
epoch [46/50] batch [1580/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.5410 (1.1863) lr 1.9098e-04 eta 4:44:33
epoch [46/50] batch [1600/2000] time 2.052 (2.028) data 0.000 (0.001) loss 1.2261 (1.1853) lr 1.9098e-04 eta 4:43:52
epoch [46/50] batch [1620/2000] time 1.997 (2.028) data 0.000 (0.001) loss 0.2819 (1.1862) lr 1.9098e-04 eta 4:43:11
epoch [46/50] batch [1640/2000] time 2.049 (2.028) data 0.000 (0.001) loss 2.3273 (1.1869) lr 1.9098e-04 eta 4:42:31
epoch [46/50] batch [1660/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.1327 (1.1846) lr 1.9098e-04 eta 4:41:50
epoch [46/50] batch [1680/2000] time 2.025 (2.028) data 0.001 (0.001) loss 1.0916 (1.1859) lr 1.9098e-04 eta 4:41:09
epoch [46/50] batch [1700/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.6821 (1.1848) lr 1.9098e-04 eta 4:40:28
epoch [46/50] batch [1720/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.2969 (1.1833) lr 1.9098e-04 eta 4:39:48
epoch [46/50] batch [1740/2000] time 2.026 (2.028) data 0.000 (0.001) loss 2.5029 (1.1832) lr 1.9098e-04 eta 4:39:07
epoch [46/50] batch [1760/2000] time 2.029 (2.028) data 0.000 (0.001) loss 1.2079 (1.1805) lr 1.9098e-04 eta 4:38:26
epoch [46/50] batch [1780/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.0278 (1.1822) lr 1.9098e-04 eta 4:37:46
epoch [46/50] batch [1800/2000] time 2.047 (2.028) data 0.000 (0.000) loss 0.1467 (1.1800) lr 1.9098e-04 eta 4:37:06
epoch [46/50] batch [1820/2000] time 2.028 (2.028) data 0.000 (0.000) loss 0.3037 (1.1767) lr 1.9098e-04 eta 4:36:25
epoch [46/50] batch [1840/2000] time 2.049 (2.028) data 0.000 (0.000) loss 0.1893 (1.1725) lr 1.9098e-04 eta 4:35:44
epoch [46/50] batch [1860/2000] time 1.976 (2.028) data 0.000 (0.000) loss 0.3357 (1.1722) lr 1.9098e-04 eta 4:35:04
epoch [46/50] batch [1880/2000] time 2.051 (2.028) data 0.000 (0.000) loss 1.3338 (1.1689) lr 1.9098e-04 eta 4:34:24
epoch [46/50] batch [1900/2000] time 2.054 (2.028) data 0.000 (0.000) loss 1.0920 (1.1684) lr 1.9098e-04 eta 4:33:43
epoch [46/50] batch [1920/2000] time 2.049 (2.028) data 0.000 (0.000) loss 1.1252 (1.1693) lr 1.9098e-04 eta 4:33:02
epoch [46/50] batch [1940/2000] time 2.049 (2.028) data 0.000 (0.000) loss 0.1856 (1.1721) lr 1.9098e-04 eta 4:32:21
epoch [46/50] batch [1960/2000] time 1.971 (2.027) data 0.000 (0.000) loss 0.8011 (1.1698) lr 1.9098e-04 eta 4:31:40
epoch [46/50] batch [1980/2000] time 1.997 (2.027) data 0.000 (0.000) loss 0.5658 (1.1713) lr 1.9098e-04 eta 4:31:00
epoch [46/50] batch [2000/2000] time 1.993 (2.028) data 0.000 (0.000) loss 1.1195 (1.1735) lr 1.5567e-04 eta 4:30:20
epoch [47/50] batch [20/2000] time 2.049 (2.063) data 0.000 (0.028) loss 0.1227 (1.2535) lr 1.5567e-04 eta 4:34:24
epoch [47/50] batch [40/2000] time 1.994 (2.044) data 0.000 (0.014) loss 2.1737 (1.3190) lr 1.5567e-04 eta 4:31:09
epoch [47/50] batch [60/2000] time 2.025 (2.038) data 0.001 (0.009) loss 1.7023 (1.3593) lr 1.5567e-04 eta 4:29:42
epoch [47/50] batch [80/2000] time 2.048 (2.036) data 0.000 (0.007) loss 1.2442 (1.3962) lr 1.5567e-04 eta 4:28:46
epoch [47/50] batch [100/2000] time 2.049 (2.034) data 0.000 (0.006) loss 1.1637 (1.3758) lr 1.5567e-04 eta 4:27:49
epoch [47/50] batch [120/2000] time 1.996 (2.033) data 0.000 (0.005) loss 0.9397 (1.2797) lr 1.5567e-04 eta 4:27:01
epoch [47/50] batch [140/2000] time 2.053 (2.033) data 0.000 (0.004) loss 0.3995 (1.2793) lr 1.5567e-04 eta 4:26:16
epoch [47/50] batch [160/2000] time 1.999 (2.033) data 0.000 (0.004) loss 1.4452 (1.2682) lr 1.5567e-04 eta 4:25:38
epoch [47/50] batch [180/2000] time 2.052 (2.033) data 0.000 (0.003) loss 0.0845 (1.2022) lr 1.5567e-04 eta 4:24:55
epoch [47/50] batch [200/2000] time 2.028 (2.032) data 0.000 (0.003) loss 0.4271 (1.1867) lr 1.5567e-04 eta 4:24:10
epoch [47/50] batch [220/2000] time 2.051 (2.033) data 0.000 (0.003) loss 0.6028 (1.1658) lr 1.5567e-04 eta 4:23:33
epoch [47/50] batch [240/2000] time 2.035 (2.033) data 0.000 (0.002) loss 0.9329 (1.1719) lr 1.5567e-04 eta 4:22:52
epoch [47/50] batch [260/2000] time 2.054 (2.033) data 0.000 (0.002) loss 1.2871 (1.1441) lr 1.5567e-04 eta 4:22:14
epoch [47/50] batch [280/2000] time 2.053 (2.033) data 0.000 (0.002) loss 1.9928 (1.1374) lr 1.5567e-04 eta 4:21:31
epoch [47/50] batch [300/2000] time 2.049 (2.032) data 0.000 (0.002) loss 2.1940 (1.1479) lr 1.5567e-04 eta 4:20:49
epoch [47/50] batch [320/2000] time 2.031 (2.032) data 0.000 (0.002) loss 0.5892 (1.1403) lr 1.5567e-04 eta 4:20:07
epoch [47/50] batch [340/2000] time 2.000 (2.033) data 0.000 (0.002) loss 0.7874 (1.1266) lr 1.5567e-04 eta 4:19:29
epoch [47/50] batch [360/2000] time 2.029 (2.033) data 0.000 (0.002) loss 1.6681 (1.1235) lr 1.5567e-04 eta 4:18:48
epoch [47/50] batch [380/2000] time 2.055 (2.033) data 0.000 (0.002) loss 0.3691 (1.1359) lr 1.5567e-04 eta 4:18:08
epoch [47/50] batch [400/2000] time 1.997 (2.033) data 0.000 (0.002) loss 0.6427 (1.1578) lr 1.5567e-04 eta 4:17:27
epoch [47/50] batch [420/2000] time 1.997 (2.033) data 0.000 (0.002) loss 1.5526 (1.1588) lr 1.5567e-04 eta 4:16:47
epoch [47/50] batch [440/2000] time 2.031 (2.033) data 0.000 (0.001) loss 0.6349 (1.1536) lr 1.5567e-04 eta 4:16:06
epoch [47/50] batch [460/2000] time 2.056 (2.033) data 0.000 (0.001) loss 0.5240 (1.1525) lr 1.5567e-04 eta 4:15:26
epoch [47/50] batch [480/2000] time 2.055 (2.033) data 0.000 (0.001) loss 1.3364 (1.1568) lr 1.5567e-04 eta 4:14:46
epoch [47/50] batch [500/2000] time 2.060 (2.033) data 0.000 (0.001) loss 2.0102 (1.1598) lr 1.5567e-04 eta 4:14:08
epoch [47/50] batch [520/2000] time 2.060 (2.033) data 0.000 (0.001) loss 2.4009 (1.1614) lr 1.5567e-04 eta 4:13:28
epoch [47/50] batch [540/2000] time 2.058 (2.033) data 0.000 (0.001) loss 1.4468 (1.1514) lr 1.5567e-04 eta 4:12:48
epoch [47/50] batch [560/2000] time 2.002 (2.033) data 0.000 (0.001) loss 2.3115 (1.1691) lr 1.5567e-04 eta 4:12:07
epoch [47/50] batch [580/2000] time 2.057 (2.033) data 0.000 (0.001) loss 0.0844 (1.1615) lr 1.5567e-04 eta 4:11:26
epoch [47/50] batch [600/2000] time 2.039 (2.033) data 0.001 (0.001) loss 0.0397 (1.1538) lr 1.5567e-04 eta 4:10:46
epoch [47/50] batch [620/2000] time 2.060 (2.033) data 0.000 (0.001) loss 0.3491 (1.1462) lr 1.5567e-04 eta 4:10:05
epoch [47/50] batch [640/2000] time 2.056 (2.033) data 0.000 (0.001) loss 1.8412 (1.1434) lr 1.5567e-04 eta 4:09:25
epoch [47/50] batch [660/2000] time 2.056 (2.033) data 0.000 (0.001) loss 1.1867 (1.1553) lr 1.5567e-04 eta 4:08:44
epoch [47/50] batch [680/2000] time 2.055 (2.033) data 0.000 (0.001) loss 0.9083 (1.1576) lr 1.5567e-04 eta 4:08:04
epoch [47/50] batch [700/2000] time 2.053 (2.033) data 0.000 (0.001) loss 2.9032 (1.1608) lr 1.5567e-04 eta 4:07:23
epoch [47/50] batch [720/2000] time 2.059 (2.033) data 0.000 (0.001) loss 0.0897 (1.1579) lr 1.5567e-04 eta 4:06:41
epoch [47/50] batch [740/2000] time 1.999 (2.033) data 0.000 (0.001) loss 2.0149 (1.1551) lr 1.5567e-04 eta 4:06:01
epoch [47/50] batch [760/2000] time 2.061 (2.033) data 0.000 (0.001) loss 1.0546 (1.1555) lr 1.5567e-04 eta 4:05:20
epoch [47/50] batch [780/2000] time 1.998 (2.033) data 0.000 (0.001) loss 1.1540 (1.1506) lr 1.5567e-04 eta 4:04:40
epoch [47/50] batch [800/2000] time 1.974 (2.033) data 0.000 (0.001) loss 0.7052 (1.1475) lr 1.5567e-04 eta 4:03:59
epoch [47/50] batch [820/2000] time 2.030 (2.033) data 0.000 (0.001) loss 1.4252 (1.1456) lr 1.5567e-04 eta 4:03:18
epoch [47/50] batch [840/2000] time 2.112 (2.035) data 0.000 (0.001) loss 0.8635 (1.1485) lr 1.5567e-04 eta 4:02:50
epoch [47/50] batch [860/2000] time 2.079 (2.037) data 0.000 (0.001) loss 0.7932 (1.1507) lr 1.5567e-04 eta 4:02:23
epoch [47/50] batch [880/2000] time 2.039 (2.038) data 0.000 (0.001) loss 4.1449 (1.1484) lr 1.5567e-04 eta 4:01:48
epoch [47/50] batch [900/2000] time 2.040 (2.038) data 0.000 (0.001) loss 1.3792 (1.1541) lr 1.5567e-04 eta 4:01:08
epoch [47/50] batch [920/2000] time 2.056 (2.038) data 0.000 (0.001) loss 0.9074 (1.1609) lr 1.5567e-04 eta 4:00:28
epoch [47/50] batch [940/2000] time 1.998 (2.038) data 0.000 (0.001) loss 0.2096 (1.1624) lr 1.5567e-04 eta 3:59:46
epoch [47/50] batch [960/2000] time 2.031 (2.038) data 0.000 (0.001) loss 0.8333 (1.1628) lr 1.5567e-04 eta 3:59:05
epoch [47/50] batch [980/2000] time 2.053 (2.038) data 0.000 (0.001) loss 2.0146 (1.1586) lr 1.5567e-04 eta 3:58:24
epoch [47/50] batch [1000/2000] time 2.032 (2.037) data 0.000 (0.001) loss 0.0525 (1.1575) lr 1.5567e-04 eta 3:57:41
epoch [47/50] batch [1020/2000] time 2.000 (2.037) data 0.000 (0.001) loss 2.3311 (1.1583) lr 1.5567e-04 eta 3:56:59
epoch [47/50] batch [1040/2000] time 2.034 (2.037) data 0.000 (0.001) loss 1.2356 (1.1601) lr 1.5567e-04 eta 3:56:17
epoch [47/50] batch [1060/2000] time 2.057 (2.037) data 0.000 (0.001) loss 0.2537 (1.1602) lr 1.5567e-04 eta 3:55:36
epoch [47/50] batch [1080/2000] time 2.056 (2.037) data 0.000 (0.001) loss 0.6005 (1.1582) lr 1.5567e-04 eta 3:54:53
epoch [47/50] batch [1100/2000] time 2.056 (2.037) data 0.000 (0.001) loss 1.4880 (1.1618) lr 1.5567e-04 eta 3:54:12
epoch [47/50] batch [1120/2000] time 1.973 (2.037) data 0.000 (0.001) loss 0.0656 (1.1607) lr 1.5567e-04 eta 3:53:31
epoch [47/50] batch [1140/2000] time 2.052 (2.037) data 0.001 (0.001) loss 0.9828 (1.1675) lr 1.5567e-04 eta 3:52:51
epoch [47/50] batch [1160/2000] time 2.054 (2.036) data 0.000 (0.001) loss 0.2169 (1.1663) lr 1.5567e-04 eta 3:52:09
epoch [47/50] batch [1180/2000] time 2.002 (2.036) data 0.000 (0.001) loss 1.5718 (1.1624) lr 1.5567e-04 eta 3:51:28
epoch [47/50] batch [1200/2000] time 2.031 (2.036) data 0.000 (0.001) loss 1.4055 (1.1601) lr 1.5567e-04 eta 3:50:47
epoch [47/50] batch [1220/2000] time 2.053 (2.036) data 0.000 (0.001) loss 0.1461 (1.1598) lr 1.5567e-04 eta 3:50:05
epoch [47/50] batch [1240/2000] time 2.053 (2.036) data 0.000 (0.001) loss 0.6937 (1.1590) lr 1.5567e-04 eta 3:49:23
epoch [47/50] batch [1260/2000] time 2.033 (2.036) data 0.000 (0.001) loss 0.4457 (1.1604) lr 1.5567e-04 eta 3:48:42
epoch [47/50] batch [1280/2000] time 1.995 (2.036) data 0.000 (0.001) loss 0.3702 (1.1628) lr 1.5567e-04 eta 3:48:00
epoch [47/50] batch [1300/2000] time 1.971 (2.036) data 0.000 (0.001) loss 0.0476 (1.1669) lr 1.5567e-04 eta 3:47:19
epoch [47/50] batch [1320/2000] time 2.030 (2.036) data 0.000 (0.001) loss 0.0409 (1.1639) lr 1.5567e-04 eta 3:46:37
epoch [47/50] batch [1340/2000] time 2.057 (2.036) data 0.000 (0.001) loss 0.8702 (1.1672) lr 1.5567e-04 eta 3:45:57
epoch [47/50] batch [1360/2000] time 1.997 (2.036) data 0.000 (0.001) loss 1.9854 (1.1646) lr 1.5567e-04 eta 3:45:15
epoch [47/50] batch [1380/2000] time 2.031 (2.035) data 0.000 (0.001) loss 1.1160 (1.1702) lr 1.5567e-04 eta 3:44:34
epoch [47/50] batch [1400/2000] time 2.055 (2.035) data 0.000 (0.001) loss 2.4030 (1.1707) lr 1.5567e-04 eta 3:43:53
epoch [47/50] batch [1420/2000] time 2.054 (2.035) data 0.000 (0.001) loss 0.5041 (1.1672) lr 1.5567e-04 eta 3:43:12
epoch [47/50] batch [1440/2000] time 2.054 (2.035) data 0.000 (0.001) loss 0.5630 (1.1617) lr 1.5567e-04 eta 3:42:31
epoch [47/50] batch [1460/2000] time 2.029 (2.035) data 0.000 (0.001) loss 0.8098 (1.1566) lr 1.5567e-04 eta 3:41:50
epoch [47/50] batch [1480/2000] time 2.033 (2.035) data 0.000 (0.001) loss 0.1379 (1.1583) lr 1.5567e-04 eta 3:41:09
epoch [47/50] batch [1500/2000] time 1.995 (2.035) data 0.000 (0.001) loss 0.8416 (1.1582) lr 1.5567e-04 eta 3:40:28
epoch [47/50] batch [1520/2000] time 2.054 (2.035) data 0.000 (0.001) loss 0.1027 (1.1567) lr 1.5567e-04 eta 3:39:47
epoch [47/50] batch [1540/2000] time 1.998 (2.035) data 0.000 (0.001) loss 0.0252 (1.1525) lr 1.5567e-04 eta 3:39:05
epoch [47/50] batch [1560/2000] time 2.029 (2.035) data 0.000 (0.001) loss 0.5181 (1.1552) lr 1.5567e-04 eta 3:38:25
epoch [47/50] batch [1580/2000] time 2.024 (2.035) data 0.000 (0.001) loss 1.5926 (1.1525) lr 1.5567e-04 eta 3:37:43
epoch [47/50] batch [1600/2000] time 2.052 (2.035) data 0.000 (0.001) loss 2.6633 (1.1506) lr 1.5567e-04 eta 3:37:02
epoch [47/50] batch [1620/2000] time 2.029 (2.035) data 0.000 (0.001) loss 0.4190 (1.1507) lr 1.5567e-04 eta 3:36:21
epoch [47/50] batch [1640/2000] time 2.053 (2.035) data 0.000 (0.001) loss 1.7076 (1.1480) lr 1.5567e-04 eta 3:35:40
epoch [47/50] batch [1660/2000] time 2.032 (2.035) data 0.000 (0.001) loss 1.3934 (1.1485) lr 1.5567e-04 eta 3:34:59
epoch [47/50] batch [1680/2000] time 1.976 (2.035) data 0.001 (0.001) loss 0.1293 (1.1479) lr 1.5567e-04 eta 3:34:18
epoch [47/50] batch [1700/2000] time 2.034 (2.035) data 0.000 (0.001) loss 2.7329 (1.1478) lr 1.5567e-04 eta 3:33:37
epoch [47/50] batch [1720/2000] time 2.053 (2.035) data 0.000 (0.001) loss 0.9784 (1.1514) lr 1.5567e-04 eta 3:32:57
epoch [47/50] batch [1740/2000] time 2.053 (2.035) data 0.000 (0.001) loss 1.6499 (1.1562) lr 1.5567e-04 eta 3:32:16
epoch [47/50] batch [1760/2000] time 2.053 (2.034) data 0.000 (0.001) loss 0.1278 (1.1564) lr 1.5567e-04 eta 3:31:34
epoch [47/50] batch [1780/2000] time 1.996 (2.034) data 0.000 (0.001) loss 1.0649 (1.1575) lr 1.5567e-04 eta 3:30:54
epoch [47/50] batch [1800/2000] time 2.056 (2.034) data 0.000 (0.001) loss 1.2319 (1.1552) lr 1.5567e-04 eta 3:30:12
epoch [47/50] batch [1820/2000] time 2.001 (2.034) data 0.000 (0.001) loss 0.3467 (1.1552) lr 1.5567e-04 eta 3:29:32
epoch [47/50] batch [1840/2000] time 2.001 (2.034) data 0.000 (0.001) loss 0.0688 (1.1567) lr 1.5567e-04 eta 3:28:51
epoch [47/50] batch [1860/2000] time 2.060 (2.034) data 0.000 (0.001) loss 2.8180 (1.1570) lr 1.5567e-04 eta 3:28:10
epoch [47/50] batch [1880/2000] time 2.060 (2.034) data 0.000 (0.001) loss 1.4281 (1.1544) lr 1.5567e-04 eta 3:27:30
epoch [47/50] batch [1900/2000] time 2.059 (2.034) data 0.000 (0.001) loss 0.1031 (1.1547) lr 1.5567e-04 eta 3:26:49
epoch [47/50] batch [1920/2000] time 1.998 (2.034) data 0.000 (0.001) loss 0.4915 (1.1531) lr 1.5567e-04 eta 3:26:08
epoch [47/50] batch [1940/2000] time 2.001 (2.034) data 0.000 (0.001) loss 1.4757 (1.1538) lr 1.5567e-04 eta 3:25:27
epoch [47/50] batch [1960/2000] time 2.001 (2.034) data 0.000 (0.001) loss 0.2255 (1.1532) lr 1.5567e-04 eta 3:24:46
epoch [47/50] batch [1980/2000] time 2.001 (2.034) data 0.000 (0.001) loss 0.1057 (1.1536) lr 1.5567e-04 eta 3:24:06
epoch [47/50] batch [2000/2000] time 2.034 (2.034) data 0.000 (0.000) loss 2.1209 (1.1556) lr 1.2369e-04 eta 3:23:25
epoch [48/50] batch [20/2000] time 2.036 (2.064) data 0.000 (0.029) loss 1.5749 (1.2170) lr 1.2369e-04 eta 3:25:41
epoch [48/50] batch [40/2000] time 2.035 (2.052) data 0.000 (0.015) loss 2.7857 (1.1289) lr 1.2369e-04 eta 3:23:47
epoch [48/50] batch [60/2000] time 2.019 (2.049) data 0.001 (0.010) loss 1.7304 (1.1431) lr 1.2369e-04 eta 3:22:49
epoch [48/50] batch [80/2000] time 2.057 (2.047) data 0.000 (0.007) loss 1.9347 (1.1932) lr 1.2369e-04 eta 3:21:58
epoch [48/50] batch [100/2000] time 2.083 (2.048) data 0.000 (0.006) loss 1.9017 (1.2179) lr 1.2369e-04 eta 3:21:21
epoch [48/50] batch [120/2000] time 2.003 (2.048) data 0.000 (0.005) loss 0.3025 (1.1919) lr 1.2369e-04 eta 3:20:42
epoch [48/50] batch [140/2000] time 2.057 (2.046) data 0.000 (0.004) loss 1.5574 (1.1965) lr 1.2369e-04 eta 3:19:52
epoch [48/50] batch [160/2000] time 2.056 (2.045) data 0.000 (0.004) loss 0.1744 (1.2334) lr 1.2369e-04 eta 3:19:02
epoch [48/50] batch [180/2000] time 2.003 (2.043) data 0.000 (0.003) loss 3.4877 (1.2568) lr 1.2369e-04 eta 3:18:12
epoch [48/50] batch [200/2000] time 2.034 (2.042) data 0.000 (0.003) loss 1.9792 (1.2938) lr 1.2369e-04 eta 3:17:25
epoch [48/50] batch [220/2000] time 1.996 (2.041) data 0.000 (0.003) loss 0.6841 (1.3228) lr 1.2369e-04 eta 3:16:37
epoch [48/50] batch [240/2000] time 2.049 (2.040) data 0.000 (0.003) loss 0.1842 (1.3054) lr 1.2369e-04 eta 3:15:50
epoch [48/50] batch [260/2000] time 2.000 (2.039) data 0.000 (0.002) loss 0.7363 (1.2901) lr 1.2369e-04 eta 3:15:05
epoch [48/50] batch [280/2000] time 2.051 (2.039) data 0.000 (0.002) loss 0.7315 (1.2995) lr 1.2369e-04 eta 3:14:23
epoch [48/50] batch [300/2000] time 1.972 (2.038) data 0.000 (0.002) loss 1.4187 (1.3059) lr 1.2369e-04 eta 3:13:38
epoch [48/50] batch [320/2000] time 1.997 (2.038) data 0.000 (0.002) loss 1.4634 (1.3241) lr 1.2369e-04 eta 3:12:54
epoch [48/50] batch [340/2000] time 2.028 (2.037) data 0.000 (0.002) loss 2.0869 (1.3259) lr 1.2369e-04 eta 3:12:09
epoch [48/50] batch [360/2000] time 2.028 (2.037) data 0.000 (0.002) loss 0.9608 (1.3099) lr 1.2369e-04 eta 3:11:28
epoch [48/50] batch [380/2000] time 2.028 (2.036) data 0.000 (0.002) loss 0.7497 (1.3006) lr 1.2369e-04 eta 3:10:44
epoch [48/50] batch [400/2000] time 2.025 (2.036) data 0.000 (0.002) loss 0.9755 (1.2835) lr 1.2369e-04 eta 3:10:01
epoch [48/50] batch [420/2000] time 2.002 (2.036) data 0.000 (0.002) loss 1.1975 (1.2990) lr 1.2369e-04 eta 3:09:19
epoch [48/50] batch [440/2000] time 1.997 (2.035) data 0.000 (0.002) loss 2.8937 (1.2845) lr 1.2369e-04 eta 3:08:36
epoch [48/50] batch [460/2000] time 2.027 (2.035) data 0.000 (0.001) loss 0.8975 (1.2763) lr 1.2369e-04 eta 3:07:53
epoch [48/50] batch [480/2000] time 2.050 (2.035) data 0.000 (0.001) loss 1.5063 (1.2705) lr 1.2369e-04 eta 3:07:12
epoch [48/50] batch [500/2000] time 1.996 (2.034) data 0.000 (0.001) loss 2.3598 (1.2665) lr 1.2369e-04 eta 3:06:28
epoch [48/50] batch [520/2000] time 2.027 (2.034) data 0.000 (0.001) loss 0.2672 (1.2619) lr 1.2369e-04 eta 3:05:47
epoch [48/50] batch [540/2000] time 2.048 (2.034) data 0.000 (0.001) loss 0.5704 (1.2509) lr 1.2369e-04 eta 3:05:04
epoch [48/50] batch [560/2000] time 1.974 (2.033) data 0.000 (0.001) loss 1.3388 (1.2423) lr 1.2369e-04 eta 3:04:21
epoch [48/50] batch [580/2000] time 1.998 (2.033) data 0.000 (0.001) loss 1.7230 (1.2426) lr 1.2369e-04 eta 3:03:39
epoch [48/50] batch [600/2000] time 2.026 (2.033) data 0.001 (0.001) loss 0.1122 (1.2360) lr 1.2369e-04 eta 3:02:58
epoch [48/50] batch [620/2000] time 2.054 (2.033) data 0.000 (0.001) loss 1.2871 (1.2416) lr 1.2369e-04 eta 3:02:17
epoch [48/50] batch [640/2000] time 1.996 (2.033) data 0.000 (0.001) loss 1.9410 (1.2440) lr 1.2369e-04 eta 3:01:36
epoch [48/50] batch [660/2000] time 2.048 (2.033) data 0.000 (0.001) loss 1.7599 (1.2529) lr 1.2369e-04 eta 3:00:55
epoch [48/50] batch [680/2000] time 2.049 (2.032) data 0.000 (0.001) loss 1.7856 (1.2550) lr 1.2369e-04 eta 3:00:12
epoch [48/50] batch [700/2000] time 1.996 (2.032) data 0.000 (0.001) loss 1.0493 (1.2456) lr 1.2369e-04 eta 2:59:30
epoch [48/50] batch [720/2000] time 2.030 (2.032) data 0.000 (0.001) loss 0.2889 (1.2453) lr 1.2369e-04 eta 2:58:49
epoch [48/50] batch [740/2000] time 2.051 (2.032) data 0.000 (0.001) loss 0.2235 (1.2476) lr 1.2369e-04 eta 2:58:08
epoch [48/50] batch [760/2000] time 2.053 (2.032) data 0.000 (0.001) loss 0.5200 (1.2490) lr 1.2369e-04 eta 2:57:26
epoch [48/50] batch [780/2000] time 2.050 (2.032) data 0.000 (0.001) loss 0.2694 (1.2377) lr 1.2369e-04 eta 2:56:45
epoch [48/50] batch [800/2000] time 2.064 (2.032) data 0.000 (0.001) loss 3.3167 (1.2409) lr 1.2369e-04 eta 2:56:04
epoch [48/50] batch [820/2000] time 2.047 (2.032) data 0.000 (0.001) loss 0.0344 (1.2383) lr 1.2369e-04 eta 2:55:23
epoch [48/50] batch [840/2000] time 2.025 (2.031) data 0.000 (0.001) loss 1.3015 (1.2380) lr 1.2369e-04 eta 2:54:41
epoch [48/50] batch [860/2000] time 1.995 (2.031) data 0.000 (0.001) loss 2.5861 (1.2358) lr 1.2369e-04 eta 2:54:00
epoch [48/50] batch [880/2000] time 2.025 (2.031) data 0.000 (0.001) loss 1.0823 (1.2315) lr 1.2369e-04 eta 2:53:19
epoch [48/50] batch [900/2000] time 2.056 (2.031) data 0.000 (0.001) loss 0.0972 (1.2295) lr 1.2369e-04 eta 2:52:38
epoch [48/50] batch [920/2000] time 2.003 (2.031) data 0.000 (0.001) loss 0.9108 (1.2259) lr 1.2369e-04 eta 2:51:58
epoch [48/50] batch [940/2000] time 2.052 (2.031) data 0.000 (0.001) loss 0.7478 (1.2209) lr 1.2369e-04 eta 2:51:17
epoch [48/50] batch [960/2000] time 2.000 (2.031) data 0.000 (0.001) loss 1.4761 (1.2215) lr 1.2369e-04 eta 2:50:37
epoch [48/50] batch [980/2000] time 2.049 (2.031) data 0.000 (0.001) loss 0.0818 (1.2182) lr 1.2369e-04 eta 2:49:56
epoch [48/50] batch [1000/2000] time 2.047 (2.031) data 0.000 (0.001) loss 2.8786 (1.2135) lr 1.2369e-04 eta 2:49:15
epoch [48/50] batch [1020/2000] time 2.053 (2.031) data 0.000 (0.001) loss 0.0679 (1.2201) lr 1.2369e-04 eta 2:48:35
epoch [48/50] batch [1040/2000] time 2.031 (2.031) data 0.000 (0.001) loss 0.2719 (1.2240) lr 1.2369e-04 eta 2:47:53
epoch [48/50] batch [1060/2000] time 2.000 (2.031) data 0.000 (0.001) loss 1.2555 (1.2304) lr 1.2369e-04 eta 2:47:13
epoch [48/50] batch [1080/2000] time 2.051 (2.031) data 0.000 (0.001) loss 2.3049 (1.2266) lr 1.2369e-04 eta 2:46:33
epoch [48/50] batch [1100/2000] time 2.049 (2.031) data 0.000 (0.001) loss 1.2961 (1.2178) lr 1.2369e-04 eta 2:45:51
epoch [48/50] batch [1120/2000] time 2.030 (2.031) data 0.000 (0.001) loss 1.0630 (1.2186) lr 1.2369e-04 eta 2:45:10
epoch [48/50] batch [1140/2000] time 1.996 (2.031) data 0.001 (0.001) loss 1.3019 (1.2168) lr 1.2369e-04 eta 2:44:29
epoch [48/50] batch [1160/2000] time 2.030 (2.031) data 0.000 (0.001) loss 0.9425 (1.2156) lr 1.2369e-04 eta 2:43:48
epoch [48/50] batch [1180/2000] time 2.033 (2.031) data 0.000 (0.001) loss 1.1349 (1.2158) lr 1.2369e-04 eta 2:43:07
epoch [48/50] batch [1200/2000] time 2.054 (2.031) data 0.000 (0.001) loss 1.7453 (1.2134) lr 1.2369e-04 eta 2:42:27
epoch [48/50] batch [1220/2000] time 2.030 (2.031) data 0.000 (0.001) loss 1.3713 (1.2094) lr 1.2369e-04 eta 2:41:46
epoch [48/50] batch [1240/2000] time 2.053 (2.031) data 0.000 (0.001) loss 1.1808 (1.2044) lr 1.2369e-04 eta 2:41:06
epoch [48/50] batch [1260/2000] time 2.049 (2.031) data 0.000 (0.001) loss 1.8874 (1.2017) lr 1.2369e-04 eta 2:40:25
epoch [48/50] batch [1280/2000] time 2.048 (2.031) data 0.000 (0.001) loss 2.1924 (1.1958) lr 1.2369e-04 eta 2:39:44
epoch [48/50] batch [1300/2000] time 1.973 (2.031) data 0.000 (0.001) loss 0.8643 (1.1917) lr 1.2369e-04 eta 2:39:03
epoch [48/50] batch [1320/2000] time 2.050 (2.030) data 0.000 (0.001) loss 1.4022 (1.1914) lr 1.2369e-04 eta 2:38:22
epoch [48/50] batch [1340/2000] time 1.995 (2.030) data 0.000 (0.001) loss 1.1631 (1.1854) lr 1.2369e-04 eta 2:37:41
epoch [48/50] batch [1360/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.9847 (1.1848) lr 1.2369e-04 eta 2:37:00
epoch [48/50] batch [1380/2000] time 2.049 (2.030) data 0.000 (0.001) loss 0.2696 (1.1873) lr 1.2369e-04 eta 2:36:20
epoch [48/50] batch [1400/2000] time 1.994 (2.030) data 0.000 (0.001) loss 0.3363 (1.1906) lr 1.2369e-04 eta 2:35:38
epoch [48/50] batch [1420/2000] time 2.048 (2.030) data 0.000 (0.001) loss 2.4648 (1.1888) lr 1.2369e-04 eta 2:34:57
epoch [48/50] batch [1440/2000] time 1.972 (2.030) data 0.000 (0.001) loss 1.6159 (1.1859) lr 1.2369e-04 eta 2:34:16
epoch [48/50] batch [1460/2000] time 2.025 (2.030) data 0.000 (0.001) loss 0.2638 (1.1840) lr 1.2369e-04 eta 2:33:35
epoch [48/50] batch [1480/2000] time 2.047 (2.030) data 0.000 (0.001) loss 0.8628 (1.1822) lr 1.2369e-04 eta 2:32:54
epoch [48/50] batch [1500/2000] time 1.995 (2.030) data 0.000 (0.001) loss 0.5186 (1.1803) lr 1.2369e-04 eta 2:32:13
epoch [48/50] batch [1520/2000] time 2.030 (2.030) data 0.000 (0.001) loss 0.2751 (1.1753) lr 1.2369e-04 eta 2:31:32
epoch [48/50] batch [1540/2000] time 2.051 (2.030) data 0.000 (0.001) loss 0.1945 (1.1745) lr 1.2369e-04 eta 2:30:52
epoch [48/50] batch [1560/2000] time 1.996 (2.030) data 0.000 (0.001) loss 0.2567 (1.1728) lr 1.2369e-04 eta 2:30:11
epoch [48/50] batch [1580/2000] time 1.994 (2.030) data 0.000 (0.001) loss 0.5583 (1.1726) lr 1.2369e-04 eta 2:29:30
epoch [48/50] batch [1600/2000] time 1.993 (2.030) data 0.000 (0.001) loss 0.5516 (1.1683) lr 1.2369e-04 eta 2:28:49
epoch [48/50] batch [1620/2000] time 2.051 (2.030) data 0.000 (0.001) loss 1.0960 (1.1647) lr 1.2369e-04 eta 2:28:09
epoch [48/50] batch [1640/2000] time 2.046 (2.030) data 0.000 (0.001) loss 1.1938 (1.1613) lr 1.2369e-04 eta 2:27:29
epoch [48/50] batch [1660/2000] time 2.046 (2.030) data 0.000 (0.001) loss 1.9425 (1.1624) lr 1.2369e-04 eta 2:26:48
epoch [48/50] batch [1680/2000] time 2.047 (2.030) data 0.001 (0.001) loss 1.8832 (1.1665) lr 1.2369e-04 eta 2:26:07
epoch [48/50] batch [1700/2000] time 1.974 (2.029) data 0.000 (0.001) loss 3.4728 (1.1712) lr 1.2369e-04 eta 2:25:26
epoch [48/50] batch [1720/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.9141 (1.1711) lr 1.2369e-04 eta 2:24:45
epoch [48/50] batch [1740/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.8953 (1.1673) lr 1.2369e-04 eta 2:24:04
epoch [48/50] batch [1760/2000] time 2.048 (2.029) data 0.000 (0.001) loss 2.5701 (1.1690) lr 1.2369e-04 eta 2:23:23
epoch [48/50] batch [1780/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.2200 (1.1677) lr 1.2369e-04 eta 2:22:43
epoch [48/50] batch [1800/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.9056 (1.1706) lr 1.2369e-04 eta 2:22:02
epoch [48/50] batch [1820/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.5801 (1.1668) lr 1.2369e-04 eta 2:21:22
epoch [48/50] batch [1840/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.8005 (1.1684) lr 1.2369e-04 eta 2:20:41
epoch [48/50] batch [1860/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.6632 (1.1703) lr 1.2369e-04 eta 2:20:01
epoch [48/50] batch [1880/2000] time 2.026 (2.029) data 0.000 (0.001) loss 1.8871 (1.1741) lr 1.2369e-04 eta 2:19:20
epoch [48/50] batch [1900/2000] time 2.050 (2.029) data 0.000 (0.001) loss 3.4789 (1.1752) lr 1.2369e-04 eta 2:18:39
epoch [48/50] batch [1920/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.1069 (1.1752) lr 1.2369e-04 eta 2:17:59
epoch [48/50] batch [1940/2000] time 2.029 (2.029) data 0.000 (0.001) loss 0.7176 (1.1724) lr 1.2369e-04 eta 2:17:18
epoch [48/50] batch [1960/2000] time 2.026 (2.029) data 0.000 (0.001) loss 0.8951 (1.1756) lr 1.2369e-04 eta 2:16:37
epoch [48/50] batch [1980/2000] time 2.046 (2.029) data 0.000 (0.000) loss 0.1742 (1.1718) lr 1.2369e-04 eta 2:15:57
epoch [48/50] batch [2000/2000] time 2.045 (2.029) data 0.000 (0.000) loss 1.4120 (1.1763) lr 9.5173e-05 eta 2:15:16
epoch [49/50] batch [20/2000] time 2.024 (2.049) data 0.000 (0.028) loss 1.0184 (0.9581) lr 9.5173e-05 eta 2:15:55
epoch [49/50] batch [40/2000] time 2.048 (2.033) data 0.000 (0.014) loss 2.7878 (0.9164) lr 9.5173e-05 eta 2:14:09
epoch [49/50] batch [60/2000] time 1.994 (2.030) data 0.001 (0.009) loss 1.4619 (1.0152) lr 9.5173e-05 eta 2:13:18
epoch [49/50] batch [80/2000] time 2.026 (2.030) data 0.000 (0.007) loss 0.7076 (0.9718) lr 9.5173e-05 eta 2:12:37
epoch [49/50] batch [100/2000] time 2.031 (2.030) data 0.000 (0.006) loss 1.1141 (1.0121) lr 9.5173e-05 eta 2:11:57
epoch [49/50] batch [120/2000] time 2.053 (2.031) data 0.000 (0.005) loss 1.2428 (1.0332) lr 9.5173e-05 eta 2:11:19
epoch [49/50] batch [140/2000] time 2.053 (2.031) data 0.000 (0.004) loss 1.5768 (1.0036) lr 9.5173e-05 eta 2:10:40
epoch [49/50] batch [160/2000] time 1.995 (2.032) data 0.000 (0.004) loss 0.2448 (1.0155) lr 9.5173e-05 eta 2:10:01
epoch [49/50] batch [180/2000] time 2.050 (2.031) data 0.000 (0.003) loss 2.1889 (1.0364) lr 9.5173e-05 eta 2:09:18
epoch [49/50] batch [200/2000] time 2.025 (2.031) data 0.000 (0.003) loss 1.1520 (1.0381) lr 9.5173e-05 eta 2:08:38
epoch [49/50] batch [220/2000] time 2.052 (2.031) data 0.000 (0.003) loss 2.2871 (1.0438) lr 9.5173e-05 eta 2:07:56
epoch [49/50] batch [240/2000] time 2.052 (2.030) data 0.000 (0.002) loss 0.7545 (1.0501) lr 9.5173e-05 eta 2:07:14
epoch [49/50] batch [260/2000] time 2.053 (2.030) data 0.000 (0.002) loss 1.5084 (1.0434) lr 9.5173e-05 eta 2:06:33
epoch [49/50] batch [280/2000] time 2.052 (2.030) data 0.000 (0.002) loss 0.9321 (1.0662) lr 9.5173e-05 eta 2:05:52
epoch [49/50] batch [300/2000] time 2.032 (2.030) data 0.000 (0.002) loss 1.9521 (1.0941) lr 9.5173e-05 eta 2:05:12
epoch [49/50] batch [320/2000] time 2.055 (2.030) data 0.000 (0.002) loss 1.3697 (1.1033) lr 9.5173e-05 eta 2:04:31
epoch [49/50] batch [340/2000] time 2.053 (2.030) data 0.000 (0.002) loss 1.5956 (1.0991) lr 9.5173e-05 eta 2:03:51
epoch [49/50] batch [360/2000] time 2.001 (2.030) data 0.000 (0.002) loss 1.0994 (1.0888) lr 9.5173e-05 eta 2:03:10
epoch [49/50] batch [380/2000] time 2.026 (2.030) data 0.000 (0.002) loss 0.2280 (1.0781) lr 9.5173e-05 eta 2:02:28
epoch [49/50] batch [400/2000] time 1.997 (2.030) data 0.000 (0.002) loss 1.0045 (1.0883) lr 9.5173e-05 eta 2:01:47
epoch [49/50] batch [420/2000] time 2.033 (2.030) data 0.000 (0.002) loss 0.8467 (1.0858) lr 9.5173e-05 eta 2:01:06
epoch [49/50] batch [440/2000] time 1.999 (2.030) data 0.000 (0.001) loss 0.2142 (1.0833) lr 9.5173e-05 eta 2:00:26
epoch [49/50] batch [460/2000] time 2.002 (2.030) data 0.000 (0.001) loss 0.9827 (1.0823) lr 9.5173e-05 eta 1:59:45
epoch [49/50] batch [480/2000] time 2.031 (2.030) data 0.000 (0.001) loss 0.9595 (1.0870) lr 9.5173e-05 eta 1:59:05
epoch [49/50] batch [500/2000] time 1.995 (2.030) data 0.000 (0.001) loss 2.2010 (1.0859) lr 9.5173e-05 eta 1:58:23
epoch [49/50] batch [520/2000] time 2.049 (2.030) data 0.000 (0.001) loss 1.9972 (1.0944) lr 9.5173e-05 eta 1:57:43
epoch [49/50] batch [540/2000] time 2.048 (2.030) data 0.000 (0.001) loss 1.5464 (1.0991) lr 9.5173e-05 eta 1:57:02
epoch [49/50] batch [560/2000] time 2.049 (2.029) data 0.000 (0.001) loss 0.1682 (1.1016) lr 9.5173e-05 eta 1:56:20
epoch [49/50] batch [580/2000] time 2.049 (2.029) data 0.000 (0.001) loss 1.8855 (1.1027) lr 9.5173e-05 eta 1:55:39
epoch [49/50] batch [600/2000] time 2.048 (2.029) data 0.001 (0.001) loss 0.1209 (1.1036) lr 9.5173e-05 eta 1:54:57
epoch [49/50] batch [620/2000] time 2.050 (2.029) data 0.000 (0.001) loss 2.3900 (1.1215) lr 9.5173e-05 eta 1:54:17
epoch [49/50] batch [640/2000] time 2.028 (2.029) data 0.000 (0.001) loss 1.7076 (1.1233) lr 9.5173e-05 eta 1:53:36
epoch [49/50] batch [660/2000] time 2.052 (2.029) data 0.000 (0.001) loss 1.6315 (1.1218) lr 9.5173e-05 eta 1:52:56
epoch [49/50] batch [680/2000] time 2.027 (2.029) data 0.000 (0.001) loss 0.0878 (1.1184) lr 9.5173e-05 eta 1:52:15
epoch [49/50] batch [700/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.3087 (1.1245) lr 9.5173e-05 eta 1:51:35
epoch [49/50] batch [720/2000] time 2.047 (2.029) data 0.000 (0.001) loss 0.2407 (1.1268) lr 9.5173e-05 eta 1:50:53
epoch [49/50] batch [740/2000] time 2.048 (2.029) data 0.000 (0.001) loss 0.1437 (1.1217) lr 9.5173e-05 eta 1:50:13
epoch [49/50] batch [760/2000] time 2.027 (2.028) data 0.000 (0.001) loss 1.3101 (1.1243) lr 9.5173e-05 eta 1:49:32
epoch [49/50] batch [780/2000] time 1.976 (2.028) data 0.000 (0.001) loss 0.4557 (1.1268) lr 9.5173e-05 eta 1:48:51
epoch [49/50] batch [800/2000] time 2.003 (2.029) data 0.000 (0.001) loss 0.3533 (1.1362) lr 9.5173e-05 eta 1:48:11
epoch [49/50] batch [820/2000] time 2.002 (2.029) data 0.000 (0.001) loss 0.2235 (1.1330) lr 9.5173e-05 eta 1:47:30
epoch [49/50] batch [840/2000] time 2.055 (2.029) data 0.000 (0.001) loss 3.3415 (1.1421) lr 9.5173e-05 eta 1:46:50
epoch [49/50] batch [860/2000] time 2.000 (2.029) data 0.000 (0.001) loss 1.7991 (1.1452) lr 9.5173e-05 eta 1:46:10
epoch [49/50] batch [880/2000] time 2.056 (2.029) data 0.000 (0.001) loss 2.0552 (1.1509) lr 9.5173e-05 eta 1:45:30
epoch [49/50] batch [900/2000] time 2.054 (2.029) data 0.000 (0.001) loss 1.0894 (1.1442) lr 9.5173e-05 eta 1:44:50
epoch [49/50] batch [920/2000] time 2.057 (2.029) data 0.000 (0.001) loss 0.0786 (1.1347) lr 9.5173e-05 eta 1:44:09
epoch [49/50] batch [940/2000] time 2.055 (2.029) data 0.000 (0.001) loss 0.8055 (1.1280) lr 9.5173e-05 eta 1:43:29
epoch [49/50] batch [960/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.1966 (1.1260) lr 9.5173e-05 eta 1:42:48
epoch [49/50] batch [980/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.8728 (1.1295) lr 9.5173e-05 eta 1:42:08
epoch [49/50] batch [1000/2000] time 2.057 (2.029) data 0.000 (0.001) loss 1.7072 (1.1357) lr 9.5173e-05 eta 1:41:28
epoch [49/50] batch [1020/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.1554 (1.1357) lr 9.5173e-05 eta 1:40:47
epoch [49/50] batch [1040/2000] time 2.031 (2.029) data 0.000 (0.001) loss 1.8217 (1.1342) lr 9.5173e-05 eta 1:40:06
epoch [49/50] batch [1060/2000] time 2.053 (2.029) data 0.000 (0.001) loss 1.7962 (1.1340) lr 9.5173e-05 eta 1:39:26
epoch [49/50] batch [1080/2000] time 2.032 (2.029) data 0.000 (0.001) loss 1.5812 (1.1390) lr 9.5173e-05 eta 1:38:45
epoch [49/50] batch [1100/2000] time 2.054 (2.029) data 0.000 (0.001) loss 0.3083 (1.1375) lr 9.5173e-05 eta 1:38:05
epoch [49/50] batch [1120/2000] time 1.999 (2.029) data 0.000 (0.001) loss 1.0934 (1.1400) lr 9.5173e-05 eta 1:37:24
epoch [49/50] batch [1140/2000] time 2.032 (2.029) data 0.001 (0.001) loss 0.7751 (1.1395) lr 9.5173e-05 eta 1:36:44
epoch [49/50] batch [1160/2000] time 2.035 (2.030) data 0.000 (0.001) loss 2.5937 (1.1409) lr 9.5173e-05 eta 1:36:03
epoch [49/50] batch [1180/2000] time 2.002 (2.030) data 0.000 (0.001) loss 0.3206 (1.1453) lr 9.5173e-05 eta 1:35:23
epoch [49/50] batch [1200/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.5445 (1.1475) lr 9.5173e-05 eta 1:34:43
epoch [49/50] batch [1220/2000] time 2.059 (2.030) data 0.000 (0.001) loss 1.6367 (1.1530) lr 9.5173e-05 eta 1:34:02
epoch [49/50] batch [1240/2000] time 2.032 (2.030) data 0.000 (0.001) loss 1.1950 (1.1584) lr 9.5173e-05 eta 1:33:22
epoch [49/50] batch [1260/2000] time 2.057 (2.030) data 0.000 (0.001) loss 0.4550 (1.1568) lr 9.5173e-05 eta 1:32:42
epoch [49/50] batch [1280/2000] time 2.006 (2.030) data 0.000 (0.001) loss 0.9651 (1.1540) lr 9.5173e-05 eta 1:32:01
epoch [49/50] batch [1300/2000] time 2.033 (2.030) data 0.000 (0.001) loss 1.3888 (1.1525) lr 9.5173e-05 eta 1:31:21
epoch [49/50] batch [1320/2000] time 1.997 (2.030) data 0.000 (0.001) loss 3.6414 (1.1556) lr 9.5173e-05 eta 1:30:40
epoch [49/50] batch [1340/2000] time 1.974 (2.030) data 0.000 (0.001) loss 0.2565 (1.1576) lr 9.5173e-05 eta 1:29:59
epoch [49/50] batch [1360/2000] time 2.053 (2.030) data 0.000 (0.001) loss 0.5095 (1.1552) lr 9.5173e-05 eta 1:29:19
epoch [49/50] batch [1380/2000] time 2.001 (2.030) data 0.000 (0.001) loss 4.0292 (1.1594) lr 9.5173e-05 eta 1:28:38
epoch [49/50] batch [1400/2000] time 2.034 (2.030) data 0.000 (0.001) loss 0.5854 (1.1613) lr 9.5173e-05 eta 1:27:58
epoch [49/50] batch [1420/2000] time 2.055 (2.030) data 0.000 (0.001) loss 2.7326 (1.1621) lr 9.5173e-05 eta 1:27:17
epoch [49/50] batch [1440/2000] time 2.058 (2.030) data 0.000 (0.001) loss 0.5097 (1.1596) lr 9.5173e-05 eta 1:26:37
epoch [49/50] batch [1460/2000] time 1.975 (2.030) data 0.000 (0.001) loss 3.0765 (1.1605) lr 9.5173e-05 eta 1:25:56
epoch [49/50] batch [1480/2000] time 2.052 (2.030) data 0.000 (0.001) loss 1.2416 (1.1566) lr 9.5173e-05 eta 1:25:16
epoch [49/50] batch [1500/2000] time 1.997 (2.030) data 0.000 (0.001) loss 0.3176 (1.1533) lr 9.5173e-05 eta 1:24:35
epoch [49/50] batch [1520/2000] time 1.998 (2.030) data 0.000 (0.001) loss 1.7723 (1.1607) lr 9.5173e-05 eta 1:23:55
epoch [49/50] batch [1540/2000] time 2.035 (2.030) data 0.000 (0.001) loss 1.9400 (1.1637) lr 9.5173e-05 eta 1:23:14
epoch [49/50] batch [1560/2000] time 2.056 (2.030) data 0.000 (0.001) loss 0.7990 (1.1614) lr 9.5173e-05 eta 1:22:34
epoch [49/50] batch [1580/2000] time 2.057 (2.030) data 0.000 (0.001) loss 0.8664 (1.1610) lr 9.5173e-05 eta 1:21:53
epoch [49/50] batch [1600/2000] time 2.032 (2.030) data 0.000 (0.001) loss 2.1272 (1.1637) lr 9.5173e-05 eta 1:21:13
epoch [49/50] batch [1620/2000] time 1.998 (2.030) data 0.000 (0.001) loss 3.1761 (1.1657) lr 9.5173e-05 eta 1:20:32
epoch [49/50] batch [1640/2000] time 2.054 (2.030) data 0.000 (0.001) loss 0.7965 (1.1654) lr 9.5173e-05 eta 1:19:51
epoch [49/50] batch [1660/2000] time 2.056 (2.031) data 0.000 (0.001) loss 0.5795 (1.1638) lr 9.5173e-05 eta 1:19:11
epoch [49/50] batch [1680/2000] time 2.032 (2.031) data 0.001 (0.001) loss 0.5875 (1.1641) lr 9.5173e-05 eta 1:18:30
epoch [49/50] batch [1700/2000] time 2.057 (2.031) data 0.000 (0.001) loss 0.7462 (1.1612) lr 9.5173e-05 eta 1:17:50
epoch [49/50] batch [1720/2000] time 2.033 (2.031) data 0.000 (0.001) loss 0.7107 (1.1621) lr 9.5173e-05 eta 1:17:09
epoch [49/50] batch [1740/2000] time 2.057 (2.031) data 0.000 (0.001) loss 2.2659 (1.1623) lr 9.5173e-05 eta 1:16:29
epoch [49/50] batch [1760/2000] time 2.059 (2.031) data 0.000 (0.001) loss 0.2105 (1.1632) lr 9.5173e-05 eta 1:15:48
epoch [49/50] batch [1780/2000] time 2.058 (2.031) data 0.000 (0.001) loss 0.2286 (1.1597) lr 9.5173e-05 eta 1:15:08
epoch [49/50] batch [1800/2000] time 2.002 (2.031) data 0.000 (0.001) loss 0.9653 (1.1553) lr 9.5173e-05 eta 1:14:27
epoch [49/50] batch [1820/2000] time 2.058 (2.031) data 0.000 (0.001) loss 0.8269 (1.1583) lr 9.5173e-05 eta 1:13:47
epoch [49/50] batch [1840/2000] time 2.034 (2.031) data 0.000 (0.000) loss 0.1348 (1.1569) lr 9.5173e-05 eta 1:13:07
epoch [49/50] batch [1860/2000] time 2.034 (2.031) data 0.000 (0.000) loss 2.7146 (1.1618) lr 9.5173e-05 eta 1:12:26
epoch [49/50] batch [1880/2000] time 2.001 (2.031) data 0.000 (0.000) loss 0.2034 (1.1625) lr 9.5173e-05 eta 1:11:45
epoch [49/50] batch [1900/2000] time 2.055 (2.031) data 0.000 (0.000) loss 2.2372 (1.1644) lr 9.5173e-05 eta 1:11:05
epoch [49/50] batch [1920/2000] time 1.998 (2.031) data 0.000 (0.000) loss 0.4537 (1.1655) lr 9.5173e-05 eta 1:10:24
epoch [49/50] batch [1940/2000] time 2.055 (2.031) data 0.000 (0.000) loss 0.6569 (1.1659) lr 9.5173e-05 eta 1:09:43
epoch [49/50] batch [1960/2000] time 2.031 (2.031) data 0.000 (0.000) loss 0.2915 (1.1660) lr 9.5173e-05 eta 1:09:03
epoch [49/50] batch [1980/2000] time 2.034 (2.031) data 0.000 (0.000) loss 0.4588 (1.1639) lr 9.5173e-05 eta 1:08:22
epoch [49/50] batch [2000/2000] time 2.057 (2.031) data 0.000 (0.000) loss 2.4129 (1.1624) lr 7.0224e-05 eta 1:07:41
epoch [50/50] batch [20/2000] time 2.049 (2.060) data 0.000 (0.027) loss 1.3173 (1.1559) lr 7.0224e-05 eta 1:07:59
epoch [50/50] batch [40/2000] time 1.998 (2.044) data 0.000 (0.014) loss 0.4282 (1.2710) lr 7.0224e-05 eta 1:06:46
epoch [50/50] batch [60/2000] time 2.024 (2.037) data 0.001 (0.009) loss 2.5134 (1.1030) lr 7.0224e-05 eta 1:05:51
epoch [50/50] batch [80/2000] time 2.047 (2.034) data 0.000 (0.007) loss 0.8115 (1.1363) lr 7.0224e-05 eta 1:05:05
epoch [50/50] batch [100/2000] time 2.025 (2.032) data 0.000 (0.006) loss 0.9344 (1.1731) lr 7.0224e-05 eta 1:04:20
epoch [50/50] batch [120/2000] time 2.025 (2.031) data 0.000 (0.005) loss 2.8024 (1.1546) lr 7.0224e-05 eta 1:03:37
epoch [50/50] batch [140/2000] time 2.026 (2.029) data 0.000 (0.004) loss 1.5011 (1.1468) lr 7.0224e-05 eta 1:02:53
epoch [50/50] batch [160/2000] time 2.055 (2.029) data 0.000 (0.004) loss 0.4259 (1.1398) lr 7.0224e-05 eta 1:02:14
epoch [50/50] batch [180/2000] time 2.000 (2.030) data 0.000 (0.003) loss 0.4164 (1.1504) lr 7.0224e-05 eta 1:01:34
epoch [50/50] batch [200/2000] time 2.046 (2.030) data 0.000 (0.003) loss 2.6321 (1.1632) lr 7.0224e-05 eta 1:00:54
epoch [50/50] batch [220/2000] time 2.054 (2.030) data 0.000 (0.003) loss 0.9597 (1.1782) lr 7.0224e-05 eta 1:00:13
epoch [50/50] batch [240/2000] time 2.054 (2.030) data 0.000 (0.002) loss 0.7363 (1.1811) lr 7.0224e-05 eta 0:59:32
epoch [50/50] batch [260/2000] time 1.995 (2.030) data 0.000 (0.002) loss 0.3424 (1.1777) lr 7.0224e-05 eta 0:58:51
epoch [50/50] batch [280/2000] time 2.049 (2.030) data 0.000 (0.002) loss 1.0023 (1.1790) lr 7.0224e-05 eta 0:58:11
epoch [50/50] batch [300/2000] time 2.026 (2.030) data 0.000 (0.002) loss 2.2457 (1.2043) lr 7.0224e-05 eta 0:57:30
epoch [50/50] batch [320/2000] time 2.051 (2.030) data 0.000 (0.002) loss 0.0542 (1.1924) lr 7.0224e-05 eta 0:56:50
epoch [50/50] batch [340/2000] time 2.048 (2.030) data 0.000 (0.002) loss 1.0209 (1.1708) lr 7.0224e-05 eta 0:56:09
epoch [50/50] batch [360/2000] time 2.023 (2.029) data 0.000 (0.002) loss 0.9849 (1.1690) lr 7.0224e-05 eta 0:55:28
epoch [50/50] batch [380/2000] time 1.993 (2.029) data 0.000 (0.002) loss 1.0298 (1.1822) lr 7.0224e-05 eta 0:54:47
epoch [50/50] batch [400/2000] time 2.049 (2.029) data 0.000 (0.002) loss 0.1786 (1.1980) lr 7.0224e-05 eta 0:54:06
epoch [50/50] batch [420/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.7008 (1.1890) lr 7.0224e-05 eta 0:53:26
epoch [50/50] batch [440/2000] time 2.028 (2.029) data 0.000 (0.001) loss 0.7858 (1.1820) lr 7.0224e-05 eta 0:52:44
epoch [50/50] batch [460/2000] time 2.054 (2.029) data 0.000 (0.001) loss 2.6145 (1.1754) lr 7.0224e-05 eta 0:52:04
epoch [50/50] batch [480/2000] time 2.000 (2.029) data 0.000 (0.001) loss 1.0552 (1.1790) lr 7.0224e-05 eta 0:51:23
epoch [50/50] batch [500/2000] time 2.030 (2.029) data 0.000 (0.001) loss 1.9117 (1.1836) lr 7.0224e-05 eta 0:50:43
epoch [50/50] batch [520/2000] time 2.044 (2.029) data 0.000 (0.001) loss 2.7240 (1.1748) lr 7.0224e-05 eta 0:50:02
epoch [50/50] batch [540/2000] time 1.991 (2.029) data 0.000 (0.001) loss 0.1280 (1.1672) lr 7.0224e-05 eta 0:49:21
epoch [50/50] batch [560/2000] time 2.046 (2.028) data 0.000 (0.001) loss 0.9428 (1.1835) lr 7.0224e-05 eta 0:48:40
epoch [50/50] batch [580/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.7891 (1.1766) lr 7.0224e-05 eta 0:48:00
epoch [50/50] batch [600/2000] time 2.001 (2.028) data 0.001 (0.001) loss 1.3911 (1.1704) lr 7.0224e-05 eta 0:47:19
epoch [50/50] batch [620/2000] time 1.977 (2.029) data 0.000 (0.001) loss 2.4164 (1.1723) lr 7.0224e-05 eta 0:46:39
epoch [50/50] batch [640/2000] time 2.003 (2.029) data 0.000 (0.001) loss 2.4937 (1.1835) lr 7.0224e-05 eta 0:45:58
epoch [50/50] batch [660/2000] time 2.057 (2.029) data 0.000 (0.001) loss 4.6988 (1.1963) lr 7.0224e-05 eta 0:45:18
epoch [50/50] batch [680/2000] time 1.998 (2.029) data 0.000 (0.001) loss 1.8704 (1.1915) lr 7.0224e-05 eta 0:44:38
epoch [50/50] batch [700/2000] time 2.001 (2.029) data 0.000 (0.001) loss 0.0372 (1.1841) lr 7.0224e-05 eta 0:43:57
epoch [50/50] batch [720/2000] time 1.996 (2.029) data 0.000 (0.001) loss 3.0054 (1.1800) lr 7.0224e-05 eta 0:43:17
epoch [50/50] batch [740/2000] time 2.053 (2.029) data 0.000 (0.001) loss 2.8816 (1.1778) lr 7.0224e-05 eta 0:42:36
epoch [50/50] batch [760/2000] time 2.051 (2.029) data 0.000 (0.001) loss 1.5388 (1.1818) lr 7.0224e-05 eta 0:41:55
epoch [50/50] batch [780/2000] time 2.051 (2.029) data 0.000 (0.001) loss 2.7399 (1.1801) lr 7.0224e-05 eta 0:41:15
epoch [50/50] batch [800/2000] time 2.048 (2.029) data 0.000 (0.001) loss 1.0413 (1.1854) lr 7.0224e-05 eta 0:40:34
epoch [50/50] batch [820/2000] time 2.047 (2.029) data 0.000 (0.001) loss 0.4361 (1.1840) lr 7.0224e-05 eta 0:39:54
epoch [50/50] batch [840/2000] time 2.046 (2.029) data 0.000 (0.001) loss 0.6530 (1.1846) lr 7.0224e-05 eta 0:39:13
epoch [50/50] batch [860/2000] time 2.045 (2.029) data 0.000 (0.001) loss 1.0637 (1.1831) lr 7.0224e-05 eta 0:38:33
epoch [50/50] batch [880/2000] time 2.047 (2.029) data 0.000 (0.001) loss 3.1363 (1.1815) lr 7.0224e-05 eta 0:37:52
epoch [50/50] batch [900/2000] time 2.050 (2.029) data 0.000 (0.001) loss 3.6862 (1.1795) lr 7.0224e-05 eta 0:37:11
epoch [50/50] batch [920/2000] time 1.997 (2.029) data 0.000 (0.001) loss 0.6851 (1.1837) lr 7.0224e-05 eta 0:36:30
epoch [50/50] batch [940/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.1885 (1.1817) lr 7.0224e-05 eta 0:35:50
epoch [50/50] batch [960/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.8904 (1.1835) lr 7.0224e-05 eta 0:35:09
epoch [50/50] batch [980/2000] time 2.033 (2.028) data 0.000 (0.001) loss 1.1198 (1.1789) lr 7.0224e-05 eta 0:34:28
epoch [50/50] batch [1000/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.0880 (1.1839) lr 7.0224e-05 eta 0:33:48
epoch [50/50] batch [1020/2000] time 1.997 (2.028) data 0.000 (0.001) loss 1.2362 (1.1810) lr 7.0224e-05 eta 0:33:07
epoch [50/50] batch [1040/2000] time 2.030 (2.028) data 0.000 (0.001) loss 0.0669 (1.1768) lr 7.0224e-05 eta 0:32:27
epoch [50/50] batch [1060/2000] time 2.028 (2.028) data 0.000 (0.001) loss 0.6406 (1.1793) lr 7.0224e-05 eta 0:31:46
epoch [50/50] batch [1080/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.3022 (1.1770) lr 7.0224e-05 eta 0:31:06
epoch [50/50] batch [1100/2000] time 1.995 (2.028) data 0.000 (0.001) loss 0.2599 (1.1732) lr 7.0224e-05 eta 0:30:25
epoch [50/50] batch [1120/2000] time 1.996 (2.028) data 0.000 (0.001) loss 1.6274 (1.1713) lr 7.0224e-05 eta 0:29:44
epoch [50/50] batch [1140/2000] time 1.997 (2.028) data 0.001 (0.001) loss 0.8725 (1.1720) lr 7.0224e-05 eta 0:29:04
epoch [50/50] batch [1160/2000] time 2.053 (2.028) data 0.000 (0.001) loss 2.1734 (1.1769) lr 7.0224e-05 eta 0:28:23
epoch [50/50] batch [1180/2000] time 2.025 (2.028) data 0.000 (0.001) loss 0.4174 (1.1730) lr 7.0224e-05 eta 0:27:43
epoch [50/50] batch [1200/2000] time 2.001 (2.028) data 0.000 (0.001) loss 1.8786 (1.1754) lr 7.0224e-05 eta 0:27:02
epoch [50/50] batch [1220/2000] time 2.050 (2.028) data 0.000 (0.001) loss 1.4336 (1.1797) lr 7.0224e-05 eta 0:26:22
epoch [50/50] batch [1240/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.4617 (1.1792) lr 7.0224e-05 eta 0:25:41
epoch [50/50] batch [1260/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.3840 (1.1770) lr 7.0224e-05 eta 0:25:00
epoch [50/50] batch [1280/2000] time 1.998 (2.028) data 0.000 (0.001) loss 0.9947 (1.1739) lr 7.0224e-05 eta 0:24:20
epoch [50/50] batch [1300/2000] time 2.046 (2.028) data 0.000 (0.001) loss 0.7289 (1.1718) lr 7.0224e-05 eta 0:23:39
epoch [50/50] batch [1320/2000] time 2.050 (2.028) data 0.000 (0.001) loss 4.6505 (1.1776) lr 7.0224e-05 eta 0:22:59
epoch [50/50] batch [1340/2000] time 1.994 (2.028) data 0.000 (0.001) loss 0.4391 (1.1731) lr 7.0224e-05 eta 0:22:18
epoch [50/50] batch [1360/2000] time 2.050 (2.028) data 0.000 (0.001) loss 0.1926 (1.1760) lr 7.0224e-05 eta 0:21:38
epoch [50/50] batch [1380/2000] time 2.049 (2.028) data 0.000 (0.001) loss 1.8163 (1.1750) lr 7.0224e-05 eta 0:20:57
epoch [50/50] batch [1400/2000] time 1.996 (2.028) data 0.000 (0.001) loss 0.3342 (1.1763) lr 7.0224e-05 eta 0:20:16
epoch [50/50] batch [1420/2000] time 2.026 (2.028) data 0.000 (0.001) loss 2.0286 (1.1752) lr 7.0224e-05 eta 0:19:36
epoch [50/50] batch [1440/2000] time 2.049 (2.028) data 0.000 (0.001) loss 0.7657 (1.1708) lr 7.0224e-05 eta 0:18:55
epoch [50/50] batch [1460/2000] time 1.974 (2.028) data 0.000 (0.001) loss 0.6200 (1.1682) lr 7.0224e-05 eta 0:18:15
epoch [50/50] batch [1480/2000] time 2.048 (2.028) data 0.000 (0.001) loss 2.1796 (1.1690) lr 7.0224e-05 eta 0:17:34
epoch [50/50] batch [1500/2000] time 1.972 (2.028) data 0.000 (0.001) loss 0.6934 (1.1696) lr 7.0224e-05 eta 0:16:54
epoch [50/50] batch [1520/2000] time 2.030 (2.028) data 0.000 (0.001) loss 3.0052 (1.1692) lr 7.0224e-05 eta 0:16:13
epoch [50/50] batch [1540/2000] time 1.970 (2.028) data 0.000 (0.001) loss 1.3427 (1.1687) lr 7.0224e-05 eta 0:15:32
epoch [50/50] batch [1560/2000] time 1.995 (2.028) data 0.000 (0.001) loss 1.5913 (1.1671) lr 7.0224e-05 eta 0:14:52
epoch [50/50] batch [1580/2000] time 1.971 (2.028) data 0.000 (0.001) loss 1.6318 (1.1668) lr 7.0224e-05 eta 0:14:11
epoch [50/50] batch [1600/2000] time 1.971 (2.028) data 0.000 (0.001) loss 0.5862 (1.1657) lr 7.0224e-05 eta 0:13:31
epoch [50/50] batch [1620/2000] time 2.051 (2.028) data 0.000 (0.001) loss 1.4975 (1.1621) lr 7.0224e-05 eta 0:12:50
epoch [50/50] batch [1640/2000] time 1.994 (2.028) data 0.000 (0.001) loss 0.4856 (1.1563) lr 7.0224e-05 eta 0:12:10
epoch [50/50] batch [1660/2000] time 1.996 (2.028) data 0.000 (0.001) loss 3.6638 (1.1558) lr 7.0224e-05 eta 0:11:29
epoch [50/50] batch [1680/2000] time 1.997 (2.028) data 0.001 (0.001) loss 3.2701 (1.1553) lr 7.0224e-05 eta 0:10:48
epoch [50/50] batch [1700/2000] time 2.051 (2.028) data 0.000 (0.001) loss 0.9710 (1.1577) lr 7.0224e-05 eta 0:10:08
epoch [50/50] batch [1720/2000] time 2.047 (2.028) data 0.000 (0.001) loss 0.5282 (1.1626) lr 7.0224e-05 eta 0:09:27
epoch [50/50] batch [1740/2000] time 1.997 (2.028) data 0.000 (0.001) loss 0.1917 (1.1621) lr 7.0224e-05 eta 0:08:47
epoch [50/50] batch [1760/2000] time 2.052 (2.028) data 0.000 (0.001) loss 0.6700 (1.1601) lr 7.0224e-05 eta 0:08:06
epoch [50/50] batch [1780/2000] time 2.067 (2.029) data 0.000 (0.001) loss 0.7022 (1.1608) lr 7.0224e-05 eta 0:07:26
epoch [50/50] batch [1800/2000] time 2.124 (2.030) data 0.000 (0.001) loss 0.0227 (1.1592) lr 7.0224e-05 eta 0:06:45
epoch [50/50] batch [1820/2000] time 1.999 (2.030) data 0.000 (0.000) loss 1.1834 (1.1632) lr 7.0224e-05 eta 0:06:05
epoch [50/50] batch [1840/2000] time 2.049 (2.030) data 0.000 (0.000) loss 0.6793 (1.1653) lr 7.0224e-05 eta 0:05:24
epoch [50/50] batch [1860/2000] time 2.049 (2.030) data 0.000 (0.000) loss 1.2869 (1.1644) lr 7.0224e-05 eta 0:04:44
epoch [50/50] batch [1880/2000] time 2.048 (2.030) data 0.000 (0.000) loss 0.2871 (1.1618) lr 7.0224e-05 eta 0:04:03
epoch [50/50] batch [1900/2000] time 2.052 (2.030) data 0.000 (0.000) loss 0.2882 (1.1630) lr 7.0224e-05 eta 0:03:22
epoch [50/50] batch [1920/2000] time 1.998 (2.030) data 0.000 (0.000) loss 0.3840 (1.1612) lr 7.0224e-05 eta 0:02:42
epoch [50/50] batch [1940/2000] time 2.051 (2.030) data 0.000 (0.000) loss 1.7835 (1.1639) lr 7.0224e-05 eta 0:02:01
epoch [50/50] batch [1960/2000] time 1.997 (2.030) data 0.000 (0.000) loss 1.1732 (1.1616) lr 7.0224e-05 eta 0:01:21
epoch [50/50] batch [1980/2000] time 1.975 (2.030) data 0.000 (0.000) loss 0.4903 (1.1634) lr 7.0224e-05 eta 0:00:40
epoch [50/50] batch [2000/2000] time 2.049 (2.030) data 0.000 (0.000) loss 1.0529 (1.1613) lr 4.8943e-05 eta 0:00:00
Checkpoint saved to output/base2new/train_base/imagenet/vit_b16_ep50_c4_BZ4_ProDA/seed2/prompt_learner/model.pth.tar-50
Finish training
Deploy the last-epoch model
Evaluate on the *test* set
=> result
* total: 25,000
* correct: 20,070
* accuracy: 80.28%
* error: 19.72%
* macro_f1: 79.96%
Elapsed: 2 days, 8:34:34
