***************
** Arguments **
***************
backbone: 
config_file: configs/trainers/ProDA/vit_b16_ep50_c4_BZ4_ProDA.yaml
dataset_config_file: configs/datasets/sun397.yaml
eval_only: False
head: 
load_epoch: None
model_dir: 
no_train: False
opts: ['DATASET.NUM_SHOTS', '16', 'DATASET.SUBSAMPLE_CLASSES', 'base']
output_dir: output/base2new/train_base/sun397/vit_b16_ep50_c4_BZ4_ProDA/seed2
resume: 
root: /mnt/hdd/DATA
seed: 2
source_domains: None
target_domains: None
trainer: ProDA
transforms: None
************
** Config **
************
DATALOADER:
  K_TRANSFORMS: 1
  NUM_WORKERS: 8
  RETURN_IMG0: False
  TEST:
    BATCH_SIZE: 100
    SAMPLER: SequentialSampler
  TRAIN_U:
    BATCH_SIZE: 32
    N_DOMAIN: 0
    N_INS: 16
    SAME_AS_X: True
    SAMPLER: RandomSampler
  TRAIN_X:
    BATCH_SIZE: 4
    N_DOMAIN: 0
    N_INS: 16
    SAMPLER: RandomSampler
DATASET:
  ALL_AS_UNLABELED: False
  CIFAR_C_LEVEL: 1
  CIFAR_C_TYPE: 
  NAME: SUN397
  NUM_LABELED: -1
  NUM_SHOTS: 16
  ROOT: /mnt/hdd/DATA
  SOURCE_DOMAINS: ()
  STL10_FOLD: -1
  SUBSAMPLE_CLASSES: base
  TARGET_DOMAINS: ()
  VAL_PERCENT: 0.1
INPUT:
  COLORJITTER_B: 0.4
  COLORJITTER_C: 0.4
  COLORJITTER_H: 0.1
  COLORJITTER_S: 0.4
  CROP_PADDING: 4
  CUTOUT_LEN: 16
  CUTOUT_N: 1
  GB_K: 21
  GB_P: 0.5
  GN_MEAN: 0.0
  GN_STD: 0.15
  INTERPOLATION: bicubic
  NO_TRANSFORM: False
  PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
  PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
  RANDAUGMENT_M: 10
  RANDAUGMENT_N: 2
  RGS_P: 0.2
  RRCROP_SCALE: (0.08, 1.0)
  SIZE: (224, 224)
  TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize')
MODEL:
  BACKBONE:
    NAME: ViT-B/16
    PRETRAINED: True
  HEAD:
    ACTIVATION: relu
    BN: True
    DROPOUT: 0.0
    HIDDEN_LAYERS: ()
    NAME: 
  INIT_WEIGHTS: 
OPTIM:
  ADAM_BETA1: 0.9
  ADAM_BETA2: 0.999
  BASE_LR_MULT: 0.1
  GAMMA: 0.1
  LR: 0.002
  LR_SCHEDULER: cosine
  MAX_EPOCH: 50
  MOMENTUM: 0.9
  NAME: sgd
  NEW_LAYERS: ()
  RMSPROP_ALPHA: 0.99
  SGD_DAMPNING: 0
  SGD_NESTEROV: False
  STAGED_LR: False
  STEPSIZE: (-1,)
  WARMUP_CONS_LR: 1e-05
  WARMUP_EPOCH: 5
  WARMUP_MIN_LR: 1e-05
  WARMUP_RECOUNT: True
  WARMUP_TYPE: constant
  WEIGHT_DECAY: 0.0005
OUTPUT_DIR: output/base2new/train_base/sun397/vit_b16_ep50_c4_BZ4_ProDA/seed2
RESUME: 
SEED: 2
TEST:
  COMPUTE_CMAT: False
  EVALUATOR: Classification
  FINAL_MODEL: last_step
  NO_TEST: False
  PER_CLASS_RESULT: False
  SPLIT: test
TRAIN:
  CHECKPOINT_FREQ: 0
  COUNT_ITER: train_x
  PRINT_FREQ: 20
TRAINER:
  CDAC:
    CLASS_LR_MULTI: 10
    P_THRESH: 0.95
    RAMPUP_COEF: 30
    RAMPUP_ITRS: 1000
    STRONG_TRANSFORMS: ()
    TOPK_MATCH: 5
  COCOOP:
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  COOP:
    CLASS_TOKEN_POSITION: end
    CSC: False
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  CROSSGRAD:
    ALPHA_D: 0.5
    ALPHA_F: 0.5
    EPS_D: 1.0
    EPS_F: 1.0
  DAEL:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DAELDG:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DDAIG:
    ALPHA: 0.5
    CLAMP: False
    CLAMP_MAX: 1.0
    CLAMP_MIN: -1.0
    G_ARCH: 
    LMDA: 0.3
    WARMUP: 0
  DOMAINMIX:
    ALPHA: 1.0
    BETA: 1.0
    TYPE: crossdomain
  ENTMIN:
    LMDA: 0.001
  FIXMATCH:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 1.0
  IVLP:
    CTX_INIT: a photo of a
    N_CTX_TEXT: 2
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_TEXT: 9
    PROMPT_DEPTH_VISION: 9
  M3SDA:
    LMDA: 0.5
    N_STEP_F: 4
  MAPLE:
    CTX_INIT: a photo of a
    N_CTX: 4
    PREC: fp16
    PROMPT_DEPTH: 9
  MCD:
    N_STEP_F: 4
  MEANTEACHER:
    EMA_ALPHA: 0.999
    RAMPUP: 5
    WEIGHT_U: 1.0
  MIXMATCH:
    MIXUP_BETA: 0.75
    RAMPUP: 20000
    TEMP: 2.0
    WEIGHT_U: 100.0
  MME:
    LMDA: 0.1
  NAME: ProDA
  ProDA:
    N_CTX: 4
    N_PROMPT: 32
    PREC: fp16
  SE:
    CONF_THRE: 0.95
    EMA_ALPHA: 0.999
    RAMPUP: 300
  VPT:
    CTX_INIT: a photo of a
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_VISION: 1
USE_CUDA: True
VERBOSE: True
VERSION: 1
Collecting env info ...
** System info **
PyTorch version: 2.2.1+cu121
Is debug build: False
CUDA used to build PyTorch: 12.1
ROCM used to build PyTorch: N/A

OS: Debian GNU/Linux 12 (bookworm) (x86_64)
GCC version: (Debian 12.2.0-14) 12.2.0
Clang version: Could not collect
CMake version: Could not collect
Libc version: glibc-2.36

Python version: 3.11.2 (main, Mar 13 2023, 12:18:29) [GCC 12.2.0] (64-bit runtime)
Python platform: Linux-6.5.13-3-pve-x86_64-with-glibc2.36
Is CUDA available: True
CUDA runtime version: 11.8.89
CUDA_MODULE_LOADING set to: LAZY
GPU models and configuration: 
GPU 0: NVIDIA A800 80GB PCIe
GPU 1: NVIDIA A800 80GB PCIe

Nvidia driver version: 525.147.05
cuDNN version: Could not collect
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True

CPU:
Architecture:                       x86_64
CPU op-mode(s):                     32-bit, 64-bit
Address sizes:                      46 bits physical, 57 bits virtual
Byte Order:                         Little Endian
CPU(s):                             64
On-line CPU(s) list:                18,20,22,23,25-27,29,31,32,34,37,46-49
Off-line CPU(s) list:               0-17,19,21,24,28,30,33,35,36,38-45,50-63
Vendor ID:                          GenuineIntel
Model name:                         Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz
CPU family:                         6
Model:                              106
Thread(s) per core:                 2
Core(s) per socket:                 16
Socket(s):                          2
Stepping:                           6
CPU(s) scaling MHz:                 98%
CPU max MHz:                        3500.0000
CPU min MHz:                        800.0000
BogoMIPS:                           5800.00
Flags:                              fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect wbnoinvd dtherm ida arat pln pts vnmi avx512vbmi umip pku ospke avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid fsrm md_clear pconfig flush_l1d arch_capabilities
Virtualization:                     VT-x
L1d cache:                          1.5 MiB (32 instances)
L1i cache:                          1 MiB (32 instances)
L2 cache:                           40 MiB (32 instances)
L3 cache:                           48 MiB (2 instances)
NUMA node(s):                       2
NUMA node0 CPU(s):                  0-15,32-47
NUMA node1 CPU(s):                  16-31,48-63
Vulnerability Gather data sampling: Vulnerable: No microcode
Vulnerability Itlb multihit:        Not affected
Vulnerability L1tf:                 Not affected
Vulnerability Mds:                  Not affected
Vulnerability Meltdown:             Not affected
Vulnerability Mmio stale data:      Mitigation; Clear CPU buffers; SMT vulnerable
Vulnerability Retbleed:             Not affected
Vulnerability Spec rstack overflow: Not affected
Vulnerability Spec store bypass:    Mitigation; Speculative Store Bypass disabled via prctl
Vulnerability Spectre v1:           Mitigation; usercopy/swapgs barriers and __user pointer sanitization
Vulnerability Spectre v2:           Mitigation; Enhanced / Automatic IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
Vulnerability Srbds:                Not affected
Vulnerability Tsx async abort:      Not affected

Versions of relevant libraries:
[pip3] flake8==3.7.9
[pip3] flake8==3.7.9
[pip3] numpy==1.26.4
[pip3] torch==2.2.1
[pip3] torchaudio==2.2.1
[pip3] torchvision==0.17.1
[pip3] triton==2.2.0
[conda] Could not collect
        Pillow (10.2.0)

Loading trainer: ProDA
Loading dataset: SUN397
Reading split from /mnt/hdd/DATA/sun397/split_zhou_SUN397.json
Loading preprocessed few-shot data from /mnt/hdd/DATA/sun397/split_fewshot/shot_16_shuffled-seed_2.pkl
SUBSAMPLE BASE CLASSES!
Building transform_train
+ random resized crop (size=(224, 224), scale=(0.08, 1.0))
+ random flip
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
Building transform_test
+ resize the smaller edge to 224
+ 224x224 center crop
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
---------  ------
Dataset    SUN397
# classes  199
# train_x  3,184
# val      796
# test     9,950
---------  ------
Loading CLIP (backbone: ViT-B/16)
Building custom CLIP
Turning off gradients in both the image and the text encoder
Parameters to be updated: {'prompt_learner.ctx'}
Loading evaluator: Classification
No checkpoint found, train from scratch
Initialize tensorboard (log_dir=output/base2new/train_base/sun397/vit_b16_ep50_c4_BZ4_ProDA/seed2/tensorboard)
epoch [1/50] batch [20/796] time 0.831 (0.984) data 0.000 (0.045) loss 2.2451 (2.1976) lr 1.0000e-05 eta 10:52:25
epoch [1/50] batch [40/796] time 0.841 (0.908) data 0.000 (0.023) loss 1.2672 (2.1821) lr 1.0000e-05 eta 10:01:55
epoch [1/50] batch [60/796] time 0.820 (0.884) data 0.000 (0.015) loss 2.3168 (2.1426) lr 1.0000e-05 eta 9:45:30
epoch [1/50] batch [80/796] time 0.843 (0.871) data 0.000 (0.012) loss 2.6068 (2.1006) lr 1.0000e-05 eta 9:36:41
epoch [1/50] batch [100/796] time 0.843 (0.864) data 0.000 (0.009) loss 3.3220 (2.1036) lr 1.0000e-05 eta 9:31:22
epoch [1/50] batch [120/796] time 0.834 (0.858) data 0.000 (0.008) loss 2.6441 (2.0120) lr 1.0000e-05 eta 9:27:32
epoch [1/50] batch [140/796] time 0.842 (0.855) data 0.000 (0.007) loss 2.0205 (2.0041) lr 1.0000e-05 eta 9:24:49
epoch [1/50] batch [160/796] time 0.842 (0.852) data 0.000 (0.006) loss 1.2955 (1.9407) lr 1.0000e-05 eta 9:22:45
epoch [1/50] batch [180/796] time 0.821 (0.849) data 0.000 (0.005) loss 2.7560 (1.9584) lr 1.0000e-05 eta 9:20:56
epoch [1/50] batch [200/796] time 0.843 (0.848) data 0.000 (0.005) loss 2.3313 (1.9326) lr 1.0000e-05 eta 9:19:39
epoch [1/50] batch [220/796] time 0.810 (0.846) data 0.000 (0.004) loss 3.6052 (1.9445) lr 1.0000e-05 eta 9:18:20
epoch [1/50] batch [240/796] time 0.833 (0.845) data 0.000 (0.004) loss 1.0840 (1.9168) lr 1.0000e-05 eta 9:17:19
epoch [1/50] batch [260/796] time 0.821 (0.844) data 0.000 (0.004) loss 2.1643 (1.8808) lr 1.0000e-05 eta 9:16:14
epoch [1/50] batch [280/796] time 0.832 (0.843) data 0.000 (0.003) loss 1.2717 (1.8867) lr 1.0000e-05 eta 9:15:28
epoch [1/50] batch [300/796] time 0.811 (0.843) data 0.000 (0.003) loss 1.8349 (1.8746) lr 1.0000e-05 eta 9:14:45
epoch [1/50] batch [320/796] time 0.821 (0.842) data 0.000 (0.003) loss 2.9183 (1.8676) lr 1.0000e-05 eta 9:14:12
epoch [1/50] batch [340/796] time 0.842 (0.842) data 0.000 (0.003) loss 1.3489 (1.8653) lr 1.0000e-05 eta 9:13:33
epoch [1/50] batch [360/796] time 0.832 (0.841) data 0.000 (0.003) loss 1.6259 (1.8694) lr 1.0000e-05 eta 9:12:57
epoch [1/50] batch [380/796] time 0.832 (0.841) data 0.000 (0.003) loss 1.5573 (1.8644) lr 1.0000e-05 eta 9:12:23
epoch [1/50] batch [400/796] time 0.832 (0.840) data 0.000 (0.002) loss 0.6485 (1.8522) lr 1.0000e-05 eta 9:11:52
epoch [1/50] batch [420/796] time 0.843 (0.840) data 0.000 (0.002) loss 1.8912 (1.8472) lr 1.0000e-05 eta 9:11:19
epoch [1/50] batch [440/796] time 0.823 (0.840) data 0.000 (0.002) loss 1.2257 (1.8319) lr 1.0000e-05 eta 9:10:52
epoch [1/50] batch [460/796] time 0.833 (0.839) data 0.000 (0.002) loss 0.4099 (1.8163) lr 1.0000e-05 eta 9:10:23
epoch [1/50] batch [480/796] time 0.836 (0.839) data 0.000 (0.002) loss 1.9657 (1.8084) lr 1.0000e-05 eta 9:09:57
epoch [1/50] batch [500/796] time 0.823 (0.839) data 0.000 (0.002) loss 1.3619 (1.8035) lr 1.0000e-05 eta 9:09:28
epoch [1/50] batch [520/796] time 0.841 (0.839) data 0.000 (0.002) loss 0.8231 (1.8038) lr 1.0000e-05 eta 9:09:03
epoch [1/50] batch [540/796] time 0.821 (0.839) data 0.000 (0.002) loss 1.2271 (1.7971) lr 1.0000e-05 eta 9:08:40
epoch [1/50] batch [560/796] time 0.832 (0.838) data 0.000 (0.002) loss 2.0378 (1.7883) lr 1.0000e-05 eta 9:08:17
epoch [1/50] batch [580/796] time 0.843 (0.838) data 0.000 (0.002) loss 2.0851 (1.7793) lr 1.0000e-05 eta 9:07:53
epoch [1/50] batch [600/796] time 0.837 (0.838) data 0.001 (0.002) loss 3.6134 (1.7852) lr 1.0000e-05 eta 9:07:30
epoch [1/50] batch [620/796] time 0.843 (0.838) data 0.000 (0.002) loss 0.6877 (1.7875) lr 1.0000e-05 eta 9:07:06
epoch [1/50] batch [640/796] time 0.844 (0.838) data 0.000 (0.002) loss 0.6893 (1.7882) lr 1.0000e-05 eta 9:06:44
epoch [1/50] batch [660/796] time 0.842 (0.838) data 0.000 (0.002) loss 2.5042 (1.7837) lr 1.0000e-05 eta 9:06:21
epoch [1/50] batch [680/796] time 0.846 (0.838) data 0.000 (0.002) loss 1.4717 (1.7806) lr 1.0000e-05 eta 9:06:04
epoch [1/50] batch [700/796] time 0.841 (0.837) data 0.000 (0.001) loss 1.0853 (1.7766) lr 1.0000e-05 eta 9:05:43
epoch [1/50] batch [720/796] time 0.821 (0.837) data 0.000 (0.001) loss 1.7465 (1.7614) lr 1.0000e-05 eta 9:05:22
epoch [1/50] batch [740/796] time 0.821 (0.837) data 0.000 (0.001) loss 0.2799 (1.7526) lr 1.0000e-05 eta 9:05:01
epoch [1/50] batch [760/796] time 0.821 (0.837) data 0.000 (0.001) loss 0.7218 (1.7435) lr 1.0000e-05 eta 9:04:41
epoch [1/50] batch [780/796] time 0.821 (0.837) data 0.000 (0.001) loss 2.0697 (1.7369) lr 1.0000e-05 eta 9:04:19
epoch [2/50] batch [20/796] time 0.813 (0.861) data 0.000 (0.025) loss 1.3896 (1.6604) lr 1.0000e-05 eta 9:19:05
epoch [2/50] batch [40/796] time 0.841 (0.846) data 0.000 (0.013) loss 2.2717 (1.5653) lr 1.0000e-05 eta 9:09:38
epoch [2/50] batch [60/796] time 0.842 (0.842) data 0.001 (0.009) loss 1.5000 (1.5483) lr 1.0000e-05 eta 9:06:46
epoch [2/50] batch [80/796] time 0.821 (0.840) data 0.000 (0.006) loss 0.3991 (1.5459) lr 1.0000e-05 eta 9:04:55
epoch [2/50] batch [100/796] time 0.822 (0.839) data 0.000 (0.005) loss 1.5330 (1.5294) lr 1.0000e-05 eta 9:03:57
epoch [2/50] batch [120/796] time 0.833 (0.838) data 0.000 (0.004) loss 1.4718 (1.5275) lr 1.0000e-05 eta 9:02:53
epoch [2/50] batch [140/796] time 0.833 (0.837) data 0.000 (0.004) loss 1.0978 (1.5360) lr 1.0000e-05 eta 9:02:00
epoch [2/50] batch [160/796] time 0.821 (0.837) data 0.000 (0.003) loss 1.9443 (1.5372) lr 1.0000e-05 eta 9:01:38
epoch [2/50] batch [180/796] time 0.833 (0.836) data 0.000 (0.003) loss 2.6162 (1.5200) lr 1.0000e-05 eta 9:01:04
epoch [2/50] batch [200/796] time 0.811 (0.836) data 0.000 (0.003) loss 1.2890 (1.5581) lr 1.0000e-05 eta 9:00:34
epoch [2/50] batch [220/796] time 0.843 (0.836) data 0.000 (0.002) loss 2.7964 (1.5314) lr 1.0000e-05 eta 9:00:08
epoch [2/50] batch [240/796] time 0.820 (0.835) data 0.000 (0.002) loss 1.4641 (1.5066) lr 1.0000e-05 eta 8:59:39
epoch [2/50] batch [260/796] time 0.821 (0.835) data 0.000 (0.002) loss 3.0286 (1.5202) lr 1.0000e-05 eta 8:59:15
epoch [2/50] batch [280/796] time 0.833 (0.835) data 0.000 (0.002) loss 1.8492 (1.5197) lr 1.0000e-05 eta 8:58:54
epoch [2/50] batch [300/796] time 0.821 (0.835) data 0.000 (0.002) loss 0.9104 (1.4959) lr 1.0000e-05 eta 8:58:30
epoch [2/50] batch [320/796] time 0.844 (0.835) data 0.002 (0.002) loss 1.3385 (1.4948) lr 1.0000e-05 eta 8:58:10
epoch [2/50] batch [340/796] time 0.843 (0.835) data 0.000 (0.002) loss 1.2726 (1.4868) lr 1.0000e-05 eta 8:57:50
epoch [2/50] batch [360/796] time 0.841 (0.835) data 0.000 (0.002) loss 0.4237 (1.4871) lr 1.0000e-05 eta 8:57:31
epoch [2/50] batch [380/796] time 0.832 (0.834) data 0.000 (0.002) loss 1.8312 (1.4809) lr 1.0000e-05 eta 8:57:04
epoch [2/50] batch [400/796] time 0.835 (0.834) data 0.000 (0.001) loss 1.0240 (1.4711) lr 1.0000e-05 eta 8:56:44
epoch [2/50] batch [420/796] time 0.842 (0.834) data 0.000 (0.001) loss 3.2702 (1.4832) lr 1.0000e-05 eta 8:56:21
epoch [2/50] batch [440/796] time 0.820 (0.834) data 0.000 (0.001) loss 1.2985 (1.4850) lr 1.0000e-05 eta 8:56:01
epoch [2/50] batch [460/796] time 0.841 (0.834) data 0.000 (0.001) loss 1.3264 (1.4876) lr 1.0000e-05 eta 8:55:42
epoch [2/50] batch [480/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.5750 (1.4822) lr 1.0000e-05 eta 8:55:29
epoch [2/50] batch [500/796] time 0.843 (0.834) data 0.000 (0.001) loss 1.0484 (1.4723) lr 1.0000e-05 eta 8:55:08
epoch [2/50] batch [520/796] time 0.834 (0.834) data 0.000 (0.001) loss 0.3013 (1.4730) lr 1.0000e-05 eta 8:54:49
epoch [2/50] batch [540/796] time 0.820 (0.834) data 0.000 (0.001) loss 1.8068 (1.4704) lr 1.0000e-05 eta 8:54:35
epoch [2/50] batch [560/796] time 0.820 (0.834) data 0.000 (0.001) loss 2.3279 (1.4689) lr 1.0000e-05 eta 8:54:16
epoch [2/50] batch [580/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.0085 (1.4672) lr 1.0000e-05 eta 8:54:02
epoch [2/50] batch [600/796] time 0.831 (0.834) data 0.000 (0.001) loss 2.0188 (1.4689) lr 1.0000e-05 eta 8:53:41
epoch [2/50] batch [620/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.9780 (1.4851) lr 1.0000e-05 eta 8:53:24
epoch [2/50] batch [640/796] time 0.832 (0.834) data 0.000 (0.001) loss 1.2140 (1.4787) lr 1.0000e-05 eta 8:53:07
epoch [2/50] batch [660/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.2704 (1.4785) lr 1.0000e-05 eta 8:52:51
epoch [2/50] batch [680/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.8487 (1.4686) lr 1.0000e-05 eta 8:52:34
epoch [2/50] batch [700/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.0752 (1.4700) lr 1.0000e-05 eta 8:52:16
epoch [2/50] batch [720/796] time 0.841 (0.834) data 0.000 (0.001) loss 1.5111 (1.4686) lr 1.0000e-05 eta 8:51:55
epoch [2/50] batch [740/796] time 0.841 (0.834) data 0.000 (0.001) loss 1.4142 (1.4715) lr 1.0000e-05 eta 8:51:38
epoch [2/50] batch [760/796] time 0.835 (0.834) data 0.000 (0.001) loss 1.9980 (1.4744) lr 1.0000e-05 eta 8:51:21
epoch [2/50] batch [780/796] time 0.844 (0.834) data 0.000 (0.001) loss 1.1590 (1.4727) lr 1.0000e-05 eta 8:51:04
epoch [3/50] batch [20/796] time 0.834 (0.858) data 0.000 (0.026) loss 1.6345 (1.6414) lr 1.0000e-05 eta 9:06:24
epoch [3/50] batch [40/796] time 0.842 (0.848) data 0.000 (0.013) loss 0.4524 (1.4141) lr 1.0000e-05 eta 8:59:11
epoch [3/50] batch [60/796] time 0.820 (0.843) data 0.000 (0.009) loss 0.5553 (1.3798) lr 1.0000e-05 eta 8:56:10
epoch [3/50] batch [80/796] time 0.843 (0.841) data 0.000 (0.007) loss 1.5351 (1.4050) lr 1.0000e-05 eta 8:54:20
epoch [3/50] batch [100/796] time 0.834 (0.840) data 0.000 (0.005) loss 1.8991 (1.3895) lr 1.0000e-05 eta 8:53:17
epoch [3/50] batch [120/796] time 0.843 (0.838) data 0.000 (0.004) loss 0.8659 (1.4022) lr 1.0000e-05 eta 8:51:49
epoch [3/50] batch [140/796] time 0.822 (0.837) data 0.000 (0.004) loss 1.1838 (1.4229) lr 1.0000e-05 eta 8:50:48
epoch [3/50] batch [160/796] time 0.843 (0.836) data 0.000 (0.003) loss 1.1760 (1.4319) lr 1.0000e-05 eta 8:50:14
epoch [3/50] batch [180/796] time 0.842 (0.836) data 0.000 (0.003) loss 0.8370 (1.4254) lr 1.0000e-05 eta 8:49:59
epoch [3/50] batch [200/796] time 0.823 (0.836) data 0.000 (0.003) loss 3.1567 (1.4460) lr 1.0000e-05 eta 8:49:31
epoch [3/50] batch [220/796] time 0.833 (0.836) data 0.000 (0.002) loss 2.1037 (1.4304) lr 1.0000e-05 eta 8:49:07
epoch [3/50] batch [240/796] time 0.842 (0.836) data 0.000 (0.002) loss 0.8321 (1.3859) lr 1.0000e-05 eta 8:48:45
epoch [3/50] batch [260/796] time 0.820 (0.835) data 0.000 (0.002) loss 1.2523 (1.3867) lr 1.0000e-05 eta 8:48:18
epoch [3/50] batch [280/796] time 0.833 (0.835) data 0.000 (0.002) loss 1.2344 (1.4050) lr 1.0000e-05 eta 8:47:51
epoch [3/50] batch [300/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.9998 (1.3818) lr 1.0000e-05 eta 8:47:36
epoch [3/50] batch [320/796] time 0.842 (0.835) data 0.000 (0.002) loss 2.2835 (1.3909) lr 1.0000e-05 eta 8:47:11
epoch [3/50] batch [340/796] time 0.819 (0.835) data 0.000 (0.002) loss 0.6757 (1.3953) lr 1.0000e-05 eta 8:46:48
epoch [3/50] batch [360/796] time 0.811 (0.835) data 0.000 (0.002) loss 0.2226 (1.3825) lr 1.0000e-05 eta 8:46:25
epoch [3/50] batch [380/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.9462 (1.3914) lr 1.0000e-05 eta 8:46:11
epoch [3/50] batch [400/796] time 0.833 (0.835) data 0.000 (0.001) loss 1.9234 (1.4089) lr 1.0000e-05 eta 8:45:55
epoch [3/50] batch [420/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.7311 (1.4085) lr 1.0000e-05 eta 8:45:34
epoch [3/50] batch [440/796] time 0.833 (0.834) data 0.000 (0.001) loss 2.1566 (1.4115) lr 1.0000e-05 eta 8:45:17
epoch [3/50] batch [460/796] time 0.843 (0.834) data 0.000 (0.001) loss 1.6628 (1.4239) lr 1.0000e-05 eta 8:44:59
epoch [3/50] batch [480/796] time 0.836 (0.834) data 0.000 (0.001) loss 0.0552 (1.4230) lr 1.0000e-05 eta 8:44:37
epoch [3/50] batch [500/796] time 0.843 (0.834) data 0.000 (0.001) loss 1.3037 (1.4185) lr 1.0000e-05 eta 8:44:19
epoch [3/50] batch [520/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.3034 (1.4110) lr 1.0000e-05 eta 8:44:03
epoch [3/50] batch [540/796] time 0.841 (0.834) data 0.000 (0.001) loss 1.4787 (1.4042) lr 1.0000e-05 eta 8:43:48
epoch [3/50] batch [560/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.5055 (1.3987) lr 1.0000e-05 eta 8:43:31
epoch [3/50] batch [580/796] time 0.843 (0.834) data 0.000 (0.001) loss 3.2427 (1.4053) lr 1.0000e-05 eta 8:43:12
epoch [3/50] batch [600/796] time 0.841 (0.834) data 0.000 (0.001) loss 1.0010 (1.4042) lr 1.0000e-05 eta 8:42:56
epoch [3/50] batch [620/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.6807 (1.3991) lr 1.0000e-05 eta 8:42:37
epoch [3/50] batch [640/796] time 0.842 (0.834) data 0.000 (0.001) loss 2.2414 (1.4019) lr 1.0000e-05 eta 8:42:19
epoch [3/50] batch [660/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.7991 (1.3981) lr 1.0000e-05 eta 8:42:02
epoch [3/50] batch [680/796] time 0.846 (0.834) data 0.000 (0.001) loss 2.3842 (1.4037) lr 1.0000e-05 eta 8:41:45
epoch [3/50] batch [700/796] time 0.822 (0.834) data 0.000 (0.001) loss 0.4816 (1.3990) lr 1.0000e-05 eta 8:41:28
epoch [3/50] batch [720/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.4228 (1.3985) lr 1.0000e-05 eta 8:41:10
epoch [3/50] batch [740/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.7788 (1.4012) lr 1.0000e-05 eta 8:40:53
epoch [3/50] batch [760/796] time 0.844 (0.834) data 0.000 (0.001) loss 1.8777 (1.4016) lr 1.0000e-05 eta 8:40:36
epoch [3/50] batch [780/796] time 0.844 (0.834) data 0.000 (0.001) loss 2.0949 (1.4023) lr 1.0000e-05 eta 8:40:19
epoch [4/50] batch [20/796] time 0.842 (0.860) data 0.000 (0.027) loss 1.7853 (1.3664) lr 1.0000e-05 eta 8:55:48
epoch [4/50] batch [40/796] time 0.846 (0.848) data 0.000 (0.014) loss 1.9000 (1.3298) lr 1.0000e-05 eta 8:48:07
epoch [4/50] batch [60/796] time 0.846 (0.843) data 0.000 (0.009) loss 2.2714 (1.3243) lr 1.0000e-05 eta 8:45:06
epoch [4/50] batch [80/796] time 0.841 (0.841) data 0.000 (0.007) loss 0.5126 (1.3253) lr 1.0000e-05 eta 8:43:10
epoch [4/50] batch [100/796] time 0.832 (0.839) data 0.000 (0.006) loss 1.4494 (1.3431) lr 1.0000e-05 eta 8:41:36
epoch [4/50] batch [120/796] time 0.811 (0.838) data 0.000 (0.005) loss 0.9603 (1.3665) lr 1.0000e-05 eta 8:40:38
epoch [4/50] batch [140/796] time 0.833 (0.837) data 0.000 (0.004) loss 0.8016 (1.3426) lr 1.0000e-05 eta 8:40:02
epoch [4/50] batch [160/796] time 0.843 (0.837) data 0.000 (0.004) loss 1.3598 (1.3636) lr 1.0000e-05 eta 8:39:32
epoch [4/50] batch [180/796] time 0.843 (0.837) data 0.000 (0.003) loss 1.4620 (1.3391) lr 1.0000e-05 eta 8:39:06
epoch [4/50] batch [200/796] time 0.842 (0.836) data 0.000 (0.003) loss 0.3009 (1.3467) lr 1.0000e-05 eta 8:38:47
epoch [4/50] batch [220/796] time 0.842 (0.836) data 0.000 (0.003) loss 3.0205 (1.3765) lr 1.0000e-05 eta 8:38:06
epoch [4/50] batch [240/796] time 0.821 (0.836) data 0.000 (0.002) loss 0.3572 (1.3718) lr 1.0000e-05 eta 8:37:39
epoch [4/50] batch [260/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.5520 (1.3573) lr 1.0000e-05 eta 8:37:12
epoch [4/50] batch [280/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.2526 (1.3617) lr 1.0000e-05 eta 8:36:48
epoch [4/50] batch [300/796] time 0.832 (0.835) data 0.000 (0.002) loss 1.1053 (1.3596) lr 1.0000e-05 eta 8:36:19
epoch [4/50] batch [320/796] time 0.831 (0.835) data 0.000 (0.002) loss 2.7700 (1.3719) lr 1.0000e-05 eta 8:35:54
epoch [4/50] batch [340/796] time 0.820 (0.834) data 0.000 (0.002) loss 0.5333 (1.3649) lr 1.0000e-05 eta 8:35:33
epoch [4/50] batch [360/796] time 0.832 (0.834) data 0.000 (0.002) loss 0.8351 (1.3585) lr 1.0000e-05 eta 8:35:10
epoch [4/50] batch [380/796] time 0.820 (0.834) data 0.000 (0.002) loss 1.4933 (1.3606) lr 1.0000e-05 eta 8:34:46
epoch [4/50] batch [400/796] time 0.843 (0.834) data 0.000 (0.002) loss 1.5843 (1.3550) lr 1.0000e-05 eta 8:34:25
epoch [4/50] batch [420/796] time 0.832 (0.834) data 0.000 (0.001) loss 2.3874 (1.3662) lr 1.0000e-05 eta 8:34:08
epoch [4/50] batch [440/796] time 0.841 (0.834) data 0.000 (0.001) loss 3.2402 (1.3566) lr 1.0000e-05 eta 8:33:50
epoch [4/50] batch [460/796] time 0.821 (0.834) data 0.000 (0.001) loss 1.9805 (1.3577) lr 1.0000e-05 eta 8:33:34
epoch [4/50] batch [480/796] time 0.843 (0.834) data 0.000 (0.001) loss 1.2135 (1.3512) lr 1.0000e-05 eta 8:33:14
epoch [4/50] batch [500/796] time 0.834 (0.834) data 0.000 (0.001) loss 1.9011 (1.3449) lr 1.0000e-05 eta 8:32:59
epoch [4/50] batch [520/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.5513 (1.3354) lr 1.0000e-05 eta 8:32:40
epoch [4/50] batch [540/796] time 0.832 (0.834) data 0.000 (0.001) loss 1.7206 (1.3306) lr 1.0000e-05 eta 8:32:27
epoch [4/50] batch [560/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.8894 (1.3222) lr 1.0000e-05 eta 8:32:07
epoch [4/50] batch [580/796] time 0.842 (0.834) data 0.000 (0.001) loss 2.7445 (1.3270) lr 1.0000e-05 eta 8:31:47
epoch [4/50] batch [600/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.0401 (1.3153) lr 1.0000e-05 eta 8:31:30
epoch [4/50] batch [620/796] time 0.834 (0.834) data 0.000 (0.001) loss 0.9372 (1.3127) lr 1.0000e-05 eta 8:31:14
epoch [4/50] batch [640/796] time 0.822 (0.834) data 0.000 (0.001) loss 1.1006 (1.3093) lr 1.0000e-05 eta 8:30:54
epoch [4/50] batch [660/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.9544 (1.3007) lr 1.0000e-05 eta 8:30:37
epoch [4/50] batch [680/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.5032 (1.3007) lr 1.0000e-05 eta 8:30:17
epoch [4/50] batch [700/796] time 0.833 (0.834) data 0.000 (0.001) loss 2.2261 (1.2975) lr 1.0000e-05 eta 8:30:01
epoch [4/50] batch [720/796] time 0.820 (0.834) data 0.000 (0.001) loss 1.6872 (1.3007) lr 1.0000e-05 eta 8:29:44
epoch [4/50] batch [740/796] time 0.842 (0.833) data 0.000 (0.001) loss 0.1316 (1.2995) lr 1.0000e-05 eta 8:29:22
epoch [4/50] batch [760/796] time 0.831 (0.833) data 0.000 (0.001) loss 0.5672 (1.3019) lr 1.0000e-05 eta 8:29:04
epoch [4/50] batch [780/796] time 0.834 (0.833) data 0.000 (0.001) loss 2.0036 (1.3034) lr 1.0000e-05 eta 8:28:47
epoch [5/50] batch [20/796] time 0.811 (0.860) data 0.000 (0.028) loss 1.4332 (1.4500) lr 1.0000e-05 eta 8:44:21
epoch [5/50] batch [40/796] time 0.821 (0.847) data 0.000 (0.014) loss 1.1111 (1.4076) lr 1.0000e-05 eta 8:36:10
epoch [5/50] batch [60/796] time 0.820 (0.841) data 0.000 (0.010) loss 0.6043 (1.4328) lr 1.0000e-05 eta 8:32:40
epoch [5/50] batch [80/796] time 0.841 (0.840) data 0.000 (0.007) loss 1.3332 (1.4205) lr 1.0000e-05 eta 8:31:41
epoch [5/50] batch [100/796] time 0.832 (0.839) data 0.000 (0.006) loss 2.2195 (1.4277) lr 1.0000e-05 eta 8:30:36
epoch [5/50] batch [120/796] time 0.834 (0.838) data 0.000 (0.005) loss 0.8896 (1.3868) lr 1.0000e-05 eta 8:29:39
epoch [5/50] batch [140/796] time 0.820 (0.837) data 0.000 (0.004) loss 2.0899 (1.3759) lr 1.0000e-05 eta 8:28:58
epoch [5/50] batch [160/796] time 0.821 (0.837) data 0.000 (0.004) loss 1.3468 (1.3558) lr 1.0000e-05 eta 8:28:31
epoch [5/50] batch [180/796] time 0.821 (0.836) data 0.000 (0.003) loss 1.1708 (1.3241) lr 1.0000e-05 eta 8:27:50
epoch [5/50] batch [200/796] time 0.833 (0.836) data 0.000 (0.003) loss 0.4502 (1.2937) lr 1.0000e-05 eta 8:27:25
epoch [5/50] batch [220/796] time 0.841 (0.836) data 0.000 (0.003) loss 2.6491 (1.3034) lr 1.0000e-05 eta 8:26:51
epoch [5/50] batch [240/796] time 0.846 (0.836) data 0.000 (0.003) loss 1.1055 (1.2886) lr 1.0000e-05 eta 8:26:38
epoch [5/50] batch [260/796] time 0.819 (0.835) data 0.000 (0.002) loss 1.5089 (1.2916) lr 1.0000e-05 eta 8:26:13
epoch [5/50] batch [280/796] time 0.842 (0.835) data 0.000 (0.002) loss 2.8783 (1.2893) lr 1.0000e-05 eta 8:25:52
epoch [5/50] batch [300/796] time 0.833 (0.835) data 0.000 (0.002) loss 1.4030 (1.2884) lr 1.0000e-05 eta 8:25:31
epoch [5/50] batch [320/796] time 0.843 (0.835) data 0.000 (0.002) loss 2.0444 (1.3032) lr 1.0000e-05 eta 8:25:03
epoch [5/50] batch [340/796] time 0.843 (0.835) data 0.000 (0.002) loss 1.0228 (1.2813) lr 1.0000e-05 eta 8:24:38
epoch [5/50] batch [360/796] time 0.841 (0.835) data 0.000 (0.002) loss 0.9596 (1.2757) lr 1.0000e-05 eta 8:24:20
epoch [5/50] batch [380/796] time 0.843 (0.834) data 0.000 (0.002) loss 0.9184 (1.2779) lr 1.0000e-05 eta 8:23:58
epoch [5/50] batch [400/796] time 0.833 (0.834) data 0.000 (0.002) loss 0.3681 (1.2864) lr 1.0000e-05 eta 8:23:34
epoch [5/50] batch [420/796] time 0.819 (0.834) data 0.000 (0.002) loss 1.3307 (1.2859) lr 1.0000e-05 eta 8:23:11
epoch [5/50] batch [440/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.1084 (1.2945) lr 1.0000e-05 eta 8:22:50
epoch [5/50] batch [460/796] time 0.811 (0.834) data 0.000 (0.001) loss 2.0293 (1.3019) lr 1.0000e-05 eta 8:22:32
epoch [5/50] batch [480/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.2967 (1.3002) lr 1.0000e-05 eta 8:22:17
epoch [5/50] batch [500/796] time 0.821 (0.834) data 0.000 (0.001) loss 3.4348 (1.2963) lr 1.0000e-05 eta 8:22:04
epoch [5/50] batch [520/796] time 0.843 (0.834) data 0.000 (0.001) loss 2.7116 (1.2995) lr 1.0000e-05 eta 8:21:49
epoch [5/50] batch [540/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.9862 (1.2903) lr 1.0000e-05 eta 8:21:34
epoch [5/50] batch [560/796] time 0.811 (0.834) data 0.000 (0.001) loss 2.5867 (1.2908) lr 1.0000e-05 eta 8:21:16
epoch [5/50] batch [580/796] time 0.831 (0.834) data 0.000 (0.001) loss 0.6260 (1.2909) lr 1.0000e-05 eta 8:20:56
epoch [5/50] batch [600/796] time 0.832 (0.834) data 0.000 (0.001) loss 1.5374 (1.2840) lr 1.0000e-05 eta 8:20:40
epoch [5/50] batch [620/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.3671 (1.2870) lr 1.0000e-05 eta 8:20:20
epoch [5/50] batch [640/796] time 0.833 (0.834) data 0.000 (0.001) loss 2.3348 (1.2912) lr 1.0000e-05 eta 8:20:03
epoch [5/50] batch [660/796] time 0.810 (0.834) data 0.000 (0.001) loss 1.2175 (1.2899) lr 1.0000e-05 eta 8:19:47
epoch [5/50] batch [680/796] time 0.830 (0.834) data 0.000 (0.001) loss 0.9618 (1.2906) lr 1.0000e-05 eta 8:19:30
epoch [5/50] batch [700/796] time 0.832 (0.834) data 0.000 (0.001) loss 1.6161 (1.2816) lr 1.0000e-05 eta 8:19:12
epoch [5/50] batch [720/796] time 0.821 (0.834) data 0.000 (0.001) loss 1.2638 (1.2848) lr 1.0000e-05 eta 8:18:56
epoch [5/50] batch [740/796] time 0.841 (0.834) data 0.000 (0.001) loss 1.6868 (1.2834) lr 1.0000e-05 eta 8:18:37
epoch [5/50] batch [760/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.3575 (1.2799) lr 1.0000e-05 eta 8:18:18
epoch [5/50] batch [780/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.8059 (1.2800) lr 1.0000e-05 eta 8:17:59
epoch [6/50] batch [20/796] time 0.811 (0.857) data 0.000 (0.026) loss 2.3991 (1.6189) lr 2.0000e-03 eta 8:31:27
epoch [6/50] batch [40/796] time 0.820 (0.846) data 0.000 (0.013) loss 0.8307 (1.5459) lr 2.0000e-03 eta 8:24:16
epoch [6/50] batch [60/796] time 0.833 (0.842) data 0.000 (0.009) loss 2.1966 (1.5353) lr 2.0000e-03 eta 8:22:02
epoch [6/50] batch [80/796] time 0.836 (0.840) data 0.000 (0.007) loss 2.0064 (1.4916) lr 2.0000e-03 eta 8:20:23
epoch [6/50] batch [100/796] time 0.820 (0.838) data 0.000 (0.005) loss 0.7601 (1.4477) lr 2.0000e-03 eta 8:19:03
epoch [6/50] batch [120/796] time 0.820 (0.838) data 0.000 (0.004) loss 1.3020 (1.4238) lr 2.0000e-03 eta 8:18:20
epoch [6/50] batch [140/796] time 0.842 (0.837) data 0.000 (0.004) loss 2.3727 (1.4435) lr 2.0000e-03 eta 8:17:48
epoch [6/50] batch [160/796] time 0.842 (0.837) data 0.000 (0.003) loss 0.5681 (1.4508) lr 2.0000e-03 eta 8:17:33
epoch [6/50] batch [180/796] time 0.842 (0.837) data 0.000 (0.003) loss 0.9675 (1.4213) lr 2.0000e-03 eta 8:17:00
epoch [6/50] batch [200/796] time 0.837 (0.836) data 0.000 (0.003) loss 1.2670 (1.4123) lr 2.0000e-03 eta 8:16:25
epoch [6/50] batch [220/796] time 0.820 (0.836) data 0.000 (0.002) loss 0.7435 (1.3985) lr 2.0000e-03 eta 8:16:05
epoch [6/50] batch [240/796] time 0.821 (0.836) data 0.000 (0.002) loss 0.9284 (1.3943) lr 2.0000e-03 eta 8:15:35
epoch [6/50] batch [260/796] time 0.821 (0.835) data 0.000 (0.002) loss 0.7997 (1.3481) lr 2.0000e-03 eta 8:15:10
epoch [6/50] batch [280/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.8410 (1.3269) lr 2.0000e-03 eta 8:14:45
epoch [6/50] batch [300/796] time 0.832 (0.835) data 0.000 (0.002) loss 1.3858 (1.3127) lr 2.0000e-03 eta 8:14:25
epoch [6/50] batch [320/796] time 0.812 (0.835) data 0.000 (0.002) loss 1.4500 (1.3034) lr 2.0000e-03 eta 8:14:01
epoch [6/50] batch [340/796] time 0.841 (0.835) data 0.000 (0.002) loss 1.3095 (1.2928) lr 2.0000e-03 eta 8:13:43
epoch [6/50] batch [360/796] time 0.832 (0.835) data 0.000 (0.002) loss 3.5125 (1.2830) lr 2.0000e-03 eta 8:13:19
epoch [6/50] batch [380/796] time 0.844 (0.835) data 0.000 (0.002) loss 0.0990 (1.2660) lr 2.0000e-03 eta 8:13:05
epoch [6/50] batch [400/796] time 0.842 (0.835) data 0.000 (0.001) loss 2.1093 (1.2472) lr 2.0000e-03 eta 8:12:49
epoch [6/50] batch [420/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.7585 (1.2491) lr 2.0000e-03 eta 8:12:29
epoch [6/50] batch [440/796] time 0.821 (0.835) data 0.000 (0.001) loss 1.0932 (1.2382) lr 2.0000e-03 eta 8:12:08
epoch [6/50] batch [460/796] time 0.832 (0.835) data 0.000 (0.001) loss 0.9026 (1.2338) lr 2.0000e-03 eta 8:11:52
epoch [6/50] batch [480/796] time 0.832 (0.835) data 0.000 (0.001) loss 1.9766 (1.2284) lr 2.0000e-03 eta 8:11:35
epoch [6/50] batch [500/796] time 0.842 (0.835) data 0.000 (0.001) loss 1.0308 (1.2240) lr 2.0000e-03 eta 8:11:20
epoch [6/50] batch [520/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.8630 (1.2152) lr 2.0000e-03 eta 8:10:57
epoch [6/50] batch [540/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.6549 (1.2119) lr 2.0000e-03 eta 8:10:38
epoch [6/50] batch [560/796] time 0.820 (0.834) data 0.000 (0.001) loss 1.3207 (1.2075) lr 2.0000e-03 eta 8:10:19
epoch [6/50] batch [580/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.4382 (1.2010) lr 2.0000e-03 eta 8:09:59
epoch [6/50] batch [600/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.9035 (1.1883) lr 2.0000e-03 eta 8:09:43
epoch [6/50] batch [620/796] time 0.841 (0.834) data 0.000 (0.001) loss 1.0921 (1.1854) lr 2.0000e-03 eta 8:09:27
epoch [6/50] batch [640/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.9615 (1.1854) lr 2.0000e-03 eta 8:09:13
epoch [6/50] batch [660/796] time 0.833 (0.834) data 0.000 (0.001) loss 1.4933 (1.1738) lr 2.0000e-03 eta 8:08:58
epoch [6/50] batch [680/796] time 0.832 (0.834) data 0.000 (0.001) loss 1.6010 (1.1621) lr 2.0000e-03 eta 8:08:40
epoch [6/50] batch [700/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.1395 (1.1629) lr 2.0000e-03 eta 8:08:24
epoch [6/50] batch [720/796] time 0.843 (0.834) data 0.000 (0.001) loss 1.5390 (1.1589) lr 2.0000e-03 eta 8:08:02
epoch [6/50] batch [740/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.3299 (1.1536) lr 2.0000e-03 eta 8:07:46
epoch [6/50] batch [760/796] time 0.848 (0.834) data 0.006 (0.001) loss 1.6981 (1.1570) lr 2.0000e-03 eta 8:07:29
epoch [6/50] batch [780/796] time 0.813 (0.834) data 0.000 (0.001) loss 1.0375 (1.1576) lr 2.0000e-03 eta 8:07:14
epoch [7/50] batch [20/796] time 0.842 (0.862) data 0.000 (0.028) loss 0.3856 (1.1206) lr 1.9980e-03 eta 8:22:59
epoch [7/50] batch [40/796] time 0.821 (0.847) data 0.000 (0.014) loss 0.9625 (1.0342) lr 1.9980e-03 eta 8:13:48
epoch [7/50] batch [60/796] time 0.812 (0.843) data 0.002 (0.009) loss 2.3516 (1.0660) lr 1.9980e-03 eta 8:11:11
epoch [7/50] batch [80/796] time 0.843 (0.841) data 0.000 (0.007) loss 1.2055 (1.0633) lr 1.9980e-03 eta 8:09:40
epoch [7/50] batch [100/796] time 0.842 (0.840) data 0.000 (0.006) loss 0.4151 (1.0119) lr 1.9980e-03 eta 8:08:44
epoch [7/50] batch [120/796] time 0.821 (0.838) data 0.000 (0.005) loss 1.1933 (1.0169) lr 1.9980e-03 eta 8:07:44
epoch [7/50] batch [140/796] time 0.820 (0.838) data 0.000 (0.004) loss 0.3238 (1.0080) lr 1.9980e-03 eta 8:07:04
epoch [7/50] batch [160/796] time 0.833 (0.837) data 0.000 (0.004) loss 0.4258 (0.9809) lr 1.9980e-03 eta 8:06:35
epoch [7/50] batch [180/796] time 0.820 (0.837) data 0.000 (0.003) loss 1.2199 (0.9737) lr 1.9980e-03 eta 8:06:06
epoch [7/50] batch [200/796] time 0.843 (0.837) data 0.000 (0.003) loss 0.4758 (0.9938) lr 1.9980e-03 eta 8:05:37
epoch [7/50] batch [220/796] time 0.843 (0.836) data 0.000 (0.003) loss 1.2081 (0.9949) lr 1.9980e-03 eta 8:05:11
epoch [7/50] batch [240/796] time 0.843 (0.836) data 0.000 (0.002) loss 0.5561 (0.9799) lr 1.9980e-03 eta 8:04:52
epoch [7/50] batch [260/796] time 0.822 (0.836) data 0.000 (0.002) loss 0.7474 (1.0161) lr 1.9980e-03 eta 8:04:29
epoch [7/50] batch [280/796] time 0.843 (0.836) data 0.000 (0.002) loss 0.4804 (1.0006) lr 1.9980e-03 eta 8:04:09
epoch [7/50] batch [300/796] time 0.842 (0.836) data 0.000 (0.002) loss 0.4503 (1.0078) lr 1.9980e-03 eta 8:03:46
epoch [7/50] batch [320/796] time 0.843 (0.836) data 0.000 (0.002) loss 1.2577 (1.0087) lr 1.9980e-03 eta 8:03:24
epoch [7/50] batch [340/796] time 0.821 (0.836) data 0.000 (0.002) loss 0.3272 (0.9930) lr 1.9980e-03 eta 8:03:05
epoch [7/50] batch [360/796] time 0.832 (0.836) data 0.000 (0.002) loss 0.4298 (0.9851) lr 1.9980e-03 eta 8:02:44
epoch [7/50] batch [380/796] time 0.833 (0.835) data 0.000 (0.002) loss 1.1070 (0.9885) lr 1.9980e-03 eta 8:02:19
epoch [7/50] batch [400/796] time 0.832 (0.835) data 0.000 (0.002) loss 1.2520 (0.9843) lr 1.9980e-03 eta 8:02:04
epoch [7/50] batch [420/796] time 0.810 (0.835) data 0.000 (0.002) loss 0.7602 (0.9749) lr 1.9980e-03 eta 8:01:40
epoch [7/50] batch [440/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.4184 (0.9692) lr 1.9980e-03 eta 8:01:20
epoch [7/50] batch [460/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.8367 (0.9581) lr 1.9980e-03 eta 8:01:02
epoch [7/50] batch [480/796] time 0.838 (0.835) data 0.000 (0.001) loss 0.9007 (0.9690) lr 1.9980e-03 eta 8:00:43
epoch [7/50] batch [500/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.3878 (0.9820) lr 1.9980e-03 eta 8:00:25
epoch [7/50] batch [520/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.6583 (0.9879) lr 1.9980e-03 eta 8:00:06
epoch [7/50] batch [540/796] time 0.826 (0.835) data 0.000 (0.001) loss 1.0942 (0.9937) lr 1.9980e-03 eta 7:59:47
epoch [7/50] batch [560/796] time 0.832 (0.835) data 0.000 (0.001) loss 1.6971 (0.9995) lr 1.9980e-03 eta 7:59:28
epoch [7/50] batch [580/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.2682 (1.0066) lr 1.9980e-03 eta 7:59:07
epoch [7/50] batch [600/796] time 0.835 (0.835) data 0.000 (0.001) loss 2.1773 (1.0015) lr 1.9980e-03 eta 7:58:50
epoch [7/50] batch [620/796] time 0.833 (0.835) data 0.000 (0.001) loss 1.8222 (1.0021) lr 1.9980e-03 eta 7:58:30
epoch [7/50] batch [640/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.9068 (0.9963) lr 1.9980e-03 eta 7:58:15
epoch [7/50] batch [660/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.4666 (0.9932) lr 1.9980e-03 eta 7:58:00
epoch [7/50] batch [680/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.1037 (0.9951) lr 1.9980e-03 eta 7:57:42
epoch [7/50] batch [700/796] time 0.836 (0.835) data 0.000 (0.001) loss 0.8302 (0.9952) lr 1.9980e-03 eta 7:57:25
epoch [7/50] batch [720/796] time 0.839 (0.835) data 0.000 (0.001) loss 0.9111 (0.9909) lr 1.9980e-03 eta 7:57:08
epoch [7/50] batch [740/796] time 0.834 (0.835) data 0.000 (0.001) loss 0.2015 (0.9862) lr 1.9980e-03 eta 7:56:51
epoch [7/50] batch [760/796] time 0.819 (0.834) data 0.000 (0.001) loss 1.6261 (0.9892) lr 1.9980e-03 eta 7:56:30
epoch [7/50] batch [780/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.7536 (0.9994) lr 1.9980e-03 eta 7:56:13
epoch [8/50] batch [20/796] time 0.821 (0.858) data 0.000 (0.024) loss 1.0605 (0.9737) lr 1.9921e-03 eta 8:09:04
epoch [8/50] batch [40/796] time 0.832 (0.845) data 0.000 (0.012) loss 0.2595 (0.9276) lr 1.9921e-03 eta 8:01:29
epoch [8/50] batch [60/796] time 0.843 (0.841) data 0.000 (0.008) loss 1.4355 (0.9333) lr 1.9921e-03 eta 7:58:53
epoch [8/50] batch [80/796] time 0.842 (0.839) data 0.000 (0.006) loss 0.2977 (0.9268) lr 1.9921e-03 eta 7:57:29
epoch [8/50] batch [100/796] time 0.842 (0.838) data 0.000 (0.005) loss 0.5942 (0.9268) lr 1.9921e-03 eta 7:56:26
epoch [8/50] batch [120/796] time 0.820 (0.837) data 0.000 (0.004) loss 0.4830 (0.9126) lr 1.9921e-03 eta 7:55:36
epoch [8/50] batch [140/796] time 0.834 (0.836) data 0.000 (0.004) loss 0.3268 (0.9213) lr 1.9921e-03 eta 7:55:08
epoch [8/50] batch [160/796] time 0.842 (0.836) data 0.000 (0.003) loss 0.6853 (0.9726) lr 1.9921e-03 eta 7:54:45
epoch [8/50] batch [180/796] time 0.842 (0.836) data 0.000 (0.003) loss 0.4814 (0.9872) lr 1.9921e-03 eta 7:54:26
epoch [8/50] batch [200/796] time 0.843 (0.835) data 0.000 (0.003) loss 0.4319 (1.0018) lr 1.9921e-03 eta 7:53:49
epoch [8/50] batch [220/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.8922 (0.9914) lr 1.9921e-03 eta 7:53:26
epoch [8/50] batch [240/796] time 0.841 (0.835) data 0.000 (0.002) loss 1.4357 (0.9909) lr 1.9921e-03 eta 7:53:04
epoch [8/50] batch [260/796] time 0.819 (0.835) data 0.000 (0.002) loss 3.0100 (0.9939) lr 1.9921e-03 eta 7:52:45
epoch [8/50] batch [280/796] time 0.842 (0.835) data 0.000 (0.002) loss 2.0263 (1.0081) lr 1.9921e-03 eta 7:52:24
epoch [8/50] batch [300/796] time 0.847 (0.835) data 0.000 (0.002) loss 1.4075 (0.9967) lr 1.9921e-03 eta 7:52:07
epoch [8/50] batch [320/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.0432 (0.9939) lr 1.9921e-03 eta 7:51:57
epoch [8/50] batch [340/796] time 0.834 (0.835) data 0.000 (0.002) loss 0.1643 (0.9920) lr 1.9921e-03 eta 7:51:39
epoch [8/50] batch [360/796] time 0.810 (0.835) data 0.000 (0.002) loss 0.4724 (0.9954) lr 1.9921e-03 eta 7:51:16
epoch [8/50] batch [380/796] time 0.817 (0.835) data 0.000 (0.001) loss 0.2571 (0.9947) lr 1.9921e-03 eta 7:50:59
epoch [8/50] batch [400/796] time 0.842 (0.835) data 0.000 (0.001) loss 1.8670 (0.9885) lr 1.9921e-03 eta 7:50:36
epoch [8/50] batch [420/796] time 0.810 (0.835) data 0.000 (0.001) loss 0.4427 (0.9896) lr 1.9921e-03 eta 7:50:17
epoch [8/50] batch [440/796] time 0.841 (0.835) data 0.000 (0.001) loss 1.6551 (1.0027) lr 1.9921e-03 eta 7:50:01
epoch [8/50] batch [460/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.5667 (0.9988) lr 1.9921e-03 eta 7:49:41
epoch [8/50] batch [480/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.4763 (0.9981) lr 1.9921e-03 eta 7:49:23
epoch [8/50] batch [500/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.4802 (0.9849) lr 1.9921e-03 eta 7:49:06
epoch [8/50] batch [520/796] time 0.822 (0.834) data 0.000 (0.001) loss 0.3307 (0.9730) lr 1.9921e-03 eta 7:48:48
epoch [8/50] batch [540/796] time 0.842 (0.835) data 0.000 (0.001) loss 1.8380 (0.9743) lr 1.9921e-03 eta 7:48:36
epoch [8/50] batch [560/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.9716 (0.9754) lr 1.9921e-03 eta 7:48:20
epoch [8/50] batch [580/796] time 0.842 (0.835) data 0.000 (0.001) loss 1.9929 (0.9711) lr 1.9921e-03 eta 7:48:02
epoch [8/50] batch [600/796] time 0.841 (0.835) data 0.000 (0.001) loss 2.9695 (0.9828) lr 1.9921e-03 eta 7:47:46
epoch [8/50] batch [620/796] time 0.841 (0.835) data 0.000 (0.001) loss 2.0576 (0.9861) lr 1.9921e-03 eta 7:47:27
epoch [8/50] batch [640/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.2115 (0.9854) lr 1.9921e-03 eta 7:47:11
epoch [8/50] batch [660/796] time 0.844 (0.834) data 0.000 (0.001) loss 0.1405 (0.9891) lr 1.9921e-03 eta 7:46:52
epoch [8/50] batch [680/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.3637 (0.9968) lr 1.9921e-03 eta 7:46:34
epoch [8/50] batch [700/796] time 0.826 (0.834) data 0.000 (0.001) loss 0.2030 (0.9895) lr 1.9921e-03 eta 7:46:18
epoch [8/50] batch [720/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.9437 (0.9898) lr 1.9921e-03 eta 7:46:01
epoch [8/50] batch [740/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.8520 (0.9811) lr 1.9921e-03 eta 7:45:42
epoch [8/50] batch [760/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.2355 (0.9835) lr 1.9921e-03 eta 7:45:24
epoch [8/50] batch [780/796] time 0.822 (0.834) data 0.000 (0.001) loss 0.1193 (0.9735) lr 1.9921e-03 eta 7:45:06
epoch [9/50] batch [20/796] time 0.843 (0.857) data 0.000 (0.025) loss 0.3517 (0.9905) lr 1.9823e-03 eta 7:56:58
epoch [9/50] batch [40/796] time 0.820 (0.844) data 0.000 (0.012) loss 1.9153 (0.9823) lr 1.9823e-03 eta 7:49:56
epoch [9/50] batch [60/796] time 0.832 (0.841) data 0.001 (0.008) loss 0.8540 (0.9893) lr 1.9823e-03 eta 7:47:55
epoch [9/50] batch [80/796] time 0.846 (0.840) data 0.000 (0.006) loss 0.0524 (0.9478) lr 1.9823e-03 eta 7:46:46
epoch [9/50] batch [100/796] time 0.843 (0.839) data 0.000 (0.005) loss 0.4541 (0.9081) lr 1.9823e-03 eta 7:45:55
epoch [9/50] batch [120/796] time 0.832 (0.837) data 0.000 (0.004) loss 0.9774 (0.9258) lr 1.9823e-03 eta 7:44:47
epoch [9/50] batch [140/796] time 0.843 (0.836) data 0.000 (0.004) loss 0.4847 (0.9254) lr 1.9823e-03 eta 7:44:01
epoch [9/50] batch [160/796] time 0.842 (0.836) data 0.000 (0.003) loss 0.6519 (0.9310) lr 1.9823e-03 eta 7:43:35
epoch [9/50] batch [180/796] time 0.822 (0.835) data 0.000 (0.003) loss 0.5790 (0.9092) lr 1.9823e-03 eta 7:42:58
epoch [9/50] batch [200/796] time 0.832 (0.835) data 0.000 (0.003) loss 0.7759 (0.9341) lr 1.9823e-03 eta 7:42:22
epoch [9/50] batch [220/796] time 0.821 (0.834) data 0.000 (0.002) loss 0.6436 (0.9325) lr 1.9823e-03 eta 7:41:51
epoch [9/50] batch [240/796] time 0.843 (0.834) data 0.000 (0.002) loss 1.0333 (0.9424) lr 1.9823e-03 eta 7:41:33
epoch [9/50] batch [260/796] time 0.832 (0.834) data 0.000 (0.002) loss 0.7712 (0.9468) lr 1.9823e-03 eta 7:41:20
epoch [9/50] batch [280/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.9511 (0.9547) lr 1.9823e-03 eta 7:40:56
epoch [9/50] batch [300/796] time 0.843 (0.834) data 0.000 (0.002) loss 2.0172 (0.9616) lr 1.9823e-03 eta 7:40:41
epoch [9/50] batch [320/796] time 0.841 (0.834) data 0.000 (0.002) loss 1.4828 (0.9605) lr 1.9823e-03 eta 7:40:21
epoch [9/50] batch [340/796] time 0.821 (0.834) data 0.000 (0.002) loss 0.8617 (0.9473) lr 1.9823e-03 eta 7:40:01
epoch [9/50] batch [360/796] time 0.832 (0.834) data 0.000 (0.002) loss 0.9809 (0.9540) lr 1.9823e-03 eta 7:39:49
epoch [9/50] batch [380/796] time 0.837 (0.834) data 0.000 (0.001) loss 0.5662 (0.9502) lr 1.9823e-03 eta 7:39:29
epoch [9/50] batch [400/796] time 0.848 (0.834) data 0.000 (0.001) loss 1.2713 (0.9442) lr 1.9823e-03 eta 7:39:09
epoch [9/50] batch [420/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.2413 (0.9429) lr 1.9823e-03 eta 7:38:52
epoch [9/50] batch [440/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.2153 (0.9320) lr 1.9823e-03 eta 7:38:33
epoch [9/50] batch [460/796] time 0.834 (0.834) data 0.000 (0.001) loss 0.1849 (0.9255) lr 1.9823e-03 eta 7:38:16
epoch [9/50] batch [480/796] time 0.817 (0.834) data 0.000 (0.001) loss 0.4687 (0.9156) lr 1.9823e-03 eta 7:38:01
epoch [9/50] batch [500/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.4747 (0.9314) lr 1.9823e-03 eta 7:37:46
epoch [9/50] batch [520/796] time 0.843 (0.834) data 0.000 (0.001) loss 1.1326 (0.9355) lr 1.9823e-03 eta 7:37:25
epoch [9/50] batch [540/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.8210 (0.9378) lr 1.9823e-03 eta 7:37:10
epoch [9/50] batch [560/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.4642 (0.9328) lr 1.9823e-03 eta 7:36:50
epoch [9/50] batch [580/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.4095 (0.9388) lr 1.9823e-03 eta 7:36:35
epoch [9/50] batch [600/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.7088 (0.9476) lr 1.9823e-03 eta 7:36:19
epoch [9/50] batch [620/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.3317 (0.9393) lr 1.9823e-03 eta 7:36:00
epoch [9/50] batch [640/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.1691 (0.9522) lr 1.9823e-03 eta 7:35:41
epoch [9/50] batch [660/796] time 0.822 (0.834) data 0.000 (0.001) loss 1.6327 (0.9535) lr 1.9823e-03 eta 7:35:22
epoch [9/50] batch [680/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.3897 (0.9507) lr 1.9823e-03 eta 7:35:08
epoch [9/50] batch [700/796] time 0.831 (0.834) data 0.000 (0.001) loss 0.1436 (0.9468) lr 1.9823e-03 eta 7:34:51
epoch [9/50] batch [720/796] time 0.823 (0.834) data 0.000 (0.001) loss 0.0350 (0.9474) lr 1.9823e-03 eta 7:34:33
epoch [9/50] batch [740/796] time 0.809 (0.834) data 0.000 (0.001) loss 0.9488 (0.9522) lr 1.9823e-03 eta 7:34:16
epoch [9/50] batch [760/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.6892 (0.9490) lr 1.9823e-03 eta 7:34:01
epoch [9/50] batch [780/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.8842 (0.9469) lr 1.9823e-03 eta 7:33:44
epoch [10/50] batch [20/796] time 0.821 (0.861) data 0.000 (0.026) loss 1.8512 (1.0946) lr 1.9686e-03 eta 7:48:09
epoch [10/50] batch [40/796] time 0.820 (0.846) data 0.000 (0.013) loss 0.9020 (0.9944) lr 1.9686e-03 eta 7:39:27
epoch [10/50] batch [60/796] time 0.843 (0.842) data 0.000 (0.009) loss 1.5571 (0.9744) lr 1.9686e-03 eta 7:37:05
epoch [10/50] batch [80/796] time 0.832 (0.840) data 0.000 (0.007) loss 2.0538 (0.9222) lr 1.9686e-03 eta 7:35:39
epoch [10/50] batch [100/796] time 0.820 (0.839) data 0.000 (0.005) loss 1.8476 (0.8913) lr 1.9686e-03 eta 7:34:47
epoch [10/50] batch [120/796] time 0.822 (0.837) data 0.000 (0.004) loss 0.2500 (0.8983) lr 1.9686e-03 eta 7:33:49
epoch [10/50] batch [140/796] time 0.820 (0.837) data 0.000 (0.004) loss 2.0858 (0.8998) lr 1.9686e-03 eta 7:33:19
epoch [10/50] batch [160/796] time 0.843 (0.837) data 0.000 (0.003) loss 2.7487 (0.8924) lr 1.9686e-03 eta 7:32:52
epoch [10/50] batch [180/796] time 0.842 (0.836) data 0.000 (0.003) loss 1.8848 (0.9011) lr 1.9686e-03 eta 7:32:24
epoch [10/50] batch [200/796] time 0.841 (0.836) data 0.000 (0.003) loss 0.4371 (0.8803) lr 1.9686e-03 eta 7:32:02
epoch [10/50] batch [220/796] time 0.829 (0.836) data 0.000 (0.003) loss 0.5812 (0.8910) lr 1.9686e-03 eta 7:31:38
epoch [10/50] batch [240/796] time 0.819 (0.835) data 0.000 (0.002) loss 1.2520 (0.9058) lr 1.9686e-03 eta 7:31:04
epoch [10/50] batch [260/796] time 0.810 (0.835) data 0.000 (0.002) loss 0.1548 (0.9012) lr 1.9686e-03 eta 7:30:40
epoch [10/50] batch [280/796] time 0.820 (0.835) data 0.000 (0.002) loss 1.5670 (0.9084) lr 1.9686e-03 eta 7:30:10
epoch [10/50] batch [300/796] time 0.832 (0.835) data 0.000 (0.002) loss 2.9497 (0.9074) lr 1.9686e-03 eta 7:29:56
epoch [10/50] batch [320/796] time 0.841 (0.835) data 0.000 (0.002) loss 1.0550 (0.9143) lr 1.9686e-03 eta 7:29:35
epoch [10/50] batch [340/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.1114 (0.9005) lr 1.9686e-03 eta 7:29:18
epoch [10/50] batch [360/796] time 0.844 (0.835) data 0.000 (0.002) loss 0.9729 (0.9056) lr 1.9686e-03 eta 7:29:04
epoch [10/50] batch [380/796] time 0.833 (0.835) data 0.000 (0.002) loss 1.5748 (0.9232) lr 1.9686e-03 eta 7:28:44
epoch [10/50] batch [400/796] time 0.821 (0.835) data 0.000 (0.001) loss 1.1576 (0.9175) lr 1.9686e-03 eta 7:28:29
epoch [10/50] batch [420/796] time 0.841 (0.835) data 0.000 (0.001) loss 2.0024 (0.9163) lr 1.9686e-03 eta 7:28:12
epoch [10/50] batch [440/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.0254 (0.9094) lr 1.9686e-03 eta 7:27:58
epoch [10/50] batch [460/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.5556 (0.9108) lr 1.9686e-03 eta 7:27:39
epoch [10/50] batch [480/796] time 0.821 (0.835) data 0.000 (0.001) loss 1.6383 (0.9153) lr 1.9686e-03 eta 7:27:20
epoch [10/50] batch [500/796] time 0.842 (0.835) data 0.000 (0.001) loss 1.1959 (0.9168) lr 1.9686e-03 eta 7:27:00
epoch [10/50] batch [520/796] time 0.834 (0.835) data 0.000 (0.001) loss 0.0452 (0.9271) lr 1.9686e-03 eta 7:26:41
epoch [10/50] batch [540/796] time 0.812 (0.834) data 0.000 (0.001) loss 0.5257 (0.9235) lr 1.9686e-03 eta 7:26:21
epoch [10/50] batch [560/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.6890 (0.9195) lr 1.9686e-03 eta 7:26:04
epoch [10/50] batch [580/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.9484 (0.9137) lr 1.9686e-03 eta 7:25:45
epoch [10/50] batch [600/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.6300 (0.9165) lr 1.9686e-03 eta 7:25:29
epoch [10/50] batch [620/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.0333 (0.9111) lr 1.9686e-03 eta 7:25:12
epoch [10/50] batch [640/796] time 0.819 (0.834) data 0.000 (0.001) loss 1.1936 (0.9083) lr 1.9686e-03 eta 7:24:57
epoch [10/50] batch [660/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.9589 (0.8995) lr 1.9686e-03 eta 7:24:37
epoch [10/50] batch [680/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.7222 (0.9121) lr 1.9686e-03 eta 7:24:20
epoch [10/50] batch [700/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.0682 (0.9155) lr 1.9686e-03 eta 7:24:03
epoch [10/50] batch [720/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.3980 (0.9123) lr 1.9686e-03 eta 7:23:44
epoch [10/50] batch [740/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.7875 (0.9180) lr 1.9686e-03 eta 7:23:26
epoch [10/50] batch [760/796] time 0.822 (0.834) data 0.000 (0.001) loss 0.4522 (0.9232) lr 1.9686e-03 eta 7:23:09
epoch [10/50] batch [780/796] time 0.842 (0.834) data 0.000 (0.001) loss 2.0107 (0.9219) lr 1.9686e-03 eta 7:22:52
epoch [11/50] batch [20/796] time 0.820 (0.864) data 0.000 (0.032) loss 0.4272 (0.8610) lr 1.9511e-03 eta 7:37:57
epoch [11/50] batch [40/796] time 0.849 (0.849) data 0.000 (0.016) loss 0.2069 (0.8727) lr 1.9511e-03 eta 7:30:03
epoch [11/50] batch [60/796] time 0.842 (0.844) data 0.000 (0.011) loss 1.1529 (0.9313) lr 1.9511e-03 eta 7:27:05
epoch [11/50] batch [80/796] time 0.821 (0.841) data 0.000 (0.008) loss 1.0740 (0.9585) lr 1.9511e-03 eta 7:25:11
epoch [11/50] batch [100/796] time 0.845 (0.840) data 0.000 (0.007) loss 0.5630 (0.9049) lr 1.9511e-03 eta 7:24:17
epoch [11/50] batch [120/796] time 0.821 (0.839) data 0.000 (0.006) loss 3.2992 (0.9528) lr 1.9511e-03 eta 7:23:17
epoch [11/50] batch [140/796] time 0.818 (0.838) data 0.000 (0.005) loss 0.7050 (0.9488) lr 1.9511e-03 eta 7:22:30
epoch [11/50] batch [160/796] time 0.809 (0.837) data 0.000 (0.004) loss 0.5536 (0.9278) lr 1.9511e-03 eta 7:21:49
epoch [11/50] batch [180/796] time 0.843 (0.836) data 0.000 (0.004) loss 0.0620 (0.8881) lr 1.9511e-03 eta 7:21:20
epoch [11/50] batch [200/796] time 0.821 (0.836) data 0.000 (0.003) loss 0.1518 (0.8919) lr 1.9511e-03 eta 7:20:48
epoch [11/50] batch [220/796] time 0.834 (0.836) data 0.000 (0.003) loss 0.4598 (0.9004) lr 1.9511e-03 eta 7:20:27
epoch [11/50] batch [240/796] time 0.811 (0.835) data 0.000 (0.003) loss 0.3562 (0.8966) lr 1.9511e-03 eta 7:19:59
epoch [11/50] batch [260/796] time 0.842 (0.835) data 0.000 (0.003) loss 0.1907 (0.8886) lr 1.9511e-03 eta 7:19:39
epoch [11/50] batch [280/796] time 0.821 (0.835) data 0.000 (0.003) loss 0.3709 (0.8669) lr 1.9511e-03 eta 7:19:18
epoch [11/50] batch [300/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.3236 (0.8607) lr 1.9511e-03 eta 7:19:03
epoch [11/50] batch [320/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.6882 (0.8603) lr 1.9511e-03 eta 7:18:41
epoch [11/50] batch [340/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.4450 (0.8820) lr 1.9511e-03 eta 7:18:24
epoch [11/50] batch [360/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.4134 (0.8782) lr 1.9511e-03 eta 7:17:58
epoch [11/50] batch [380/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.9508 (0.8735) lr 1.9511e-03 eta 7:17:35
epoch [11/50] batch [400/796] time 0.824 (0.835) data 0.000 (0.002) loss 1.4935 (0.8774) lr 1.9511e-03 eta 7:17:19
epoch [11/50] batch [420/796] time 0.821 (0.835) data 0.000 (0.002) loss 1.5245 (0.8876) lr 1.9511e-03 eta 7:17:04
epoch [11/50] batch [440/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.6546 (0.8869) lr 1.9511e-03 eta 7:16:45
epoch [11/50] batch [460/796] time 0.848 (0.834) data 0.000 (0.002) loss 1.2976 (0.8794) lr 1.9511e-03 eta 7:16:25
epoch [11/50] batch [480/796] time 0.822 (0.834) data 0.000 (0.002) loss 2.7287 (0.8800) lr 1.9511e-03 eta 7:16:08
epoch [11/50] batch [500/796] time 0.842 (0.834) data 0.000 (0.001) loss 2.7014 (0.8830) lr 1.9511e-03 eta 7:15:52
epoch [11/50] batch [520/796] time 0.820 (0.834) data 0.000 (0.001) loss 1.1295 (0.8865) lr 1.9511e-03 eta 7:15:33
epoch [11/50] batch [540/796] time 0.844 (0.834) data 0.000 (0.001) loss 0.4308 (0.8816) lr 1.9511e-03 eta 7:15:19
epoch [11/50] batch [560/796] time 0.834 (0.834) data 0.003 (0.001) loss 0.2761 (0.8832) lr 1.9511e-03 eta 7:14:59
epoch [11/50] batch [580/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.7184 (0.8810) lr 1.9511e-03 eta 7:14:38
epoch [11/50] batch [600/796] time 0.809 (0.834) data 0.000 (0.001) loss 0.8310 (0.8846) lr 1.9511e-03 eta 7:14:22
epoch [11/50] batch [620/796] time 0.834 (0.834) data 0.000 (0.001) loss 0.5037 (0.8837) lr 1.9511e-03 eta 7:14:02
epoch [11/50] batch [640/796] time 0.842 (0.834) data 0.000 (0.001) loss 3.2436 (0.8906) lr 1.9511e-03 eta 7:13:47
epoch [11/50] batch [660/796] time 0.844 (0.834) data 0.002 (0.001) loss 0.5962 (0.8980) lr 1.9511e-03 eta 7:13:29
epoch [11/50] batch [680/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.6213 (0.9083) lr 1.9511e-03 eta 7:13:11
epoch [11/50] batch [700/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.8713 (0.9021) lr 1.9511e-03 eta 7:12:55
epoch [11/50] batch [720/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.5454 (0.9004) lr 1.9511e-03 eta 7:12:38
epoch [11/50] batch [740/796] time 0.846 (0.834) data 0.000 (0.001) loss 0.1859 (0.8931) lr 1.9511e-03 eta 7:12:22
epoch [11/50] batch [760/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.1143 (0.8922) lr 1.9511e-03 eta 7:12:06
epoch [11/50] batch [780/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.6193 (0.8894) lr 1.9511e-03 eta 7:11:47
epoch [12/50] batch [20/796] time 0.832 (0.860) data 0.000 (0.026) loss 2.1465 (1.0286) lr 1.9298e-03 eta 7:24:38
epoch [12/50] batch [40/796] time 0.833 (0.851) data 0.000 (0.013) loss 1.8496 (1.0115) lr 1.9298e-03 eta 7:19:31
epoch [12/50] batch [60/796] time 0.819 (0.845) data 0.000 (0.009) loss 0.6000 (0.9154) lr 1.9298e-03 eta 7:16:19
epoch [12/50] batch [80/796] time 0.834 (0.842) data 0.000 (0.007) loss 0.7417 (0.8488) lr 1.9298e-03 eta 7:14:46
epoch [12/50] batch [100/796] time 0.834 (0.841) data 0.000 (0.005) loss 0.2500 (0.8789) lr 1.9298e-03 eta 7:13:31
epoch [12/50] batch [120/796] time 0.842 (0.839) data 0.000 (0.005) loss 2.1786 (0.8609) lr 1.9298e-03 eta 7:12:26
epoch [12/50] batch [140/796] time 0.844 (0.838) data 0.000 (0.004) loss 0.5456 (0.8690) lr 1.9298e-03 eta 7:11:41
epoch [12/50] batch [160/796] time 0.842 (0.837) data 0.000 (0.003) loss 0.0825 (0.8475) lr 1.9298e-03 eta 7:10:57
epoch [12/50] batch [180/796] time 0.821 (0.837) data 0.000 (0.003) loss 1.3817 (0.8632) lr 1.9298e-03 eta 7:10:33
epoch [12/50] batch [200/796] time 0.832 (0.837) data 0.000 (0.003) loss 0.1947 (0.8551) lr 1.9298e-03 eta 7:10:03
epoch [12/50] batch [220/796] time 0.833 (0.836) data 0.000 (0.003) loss 0.7695 (0.8516) lr 1.9298e-03 eta 7:09:34
epoch [12/50] batch [240/796] time 0.842 (0.836) data 0.000 (0.002) loss 0.9728 (0.8660) lr 1.9298e-03 eta 7:09:03
epoch [12/50] batch [260/796] time 0.832 (0.836) data 0.000 (0.002) loss 0.9583 (0.8726) lr 1.9298e-03 eta 7:08:45
epoch [12/50] batch [280/796] time 0.820 (0.836) data 0.000 (0.002) loss 0.0106 (0.8853) lr 1.9298e-03 eta 7:08:24
epoch [12/50] batch [300/796] time 0.843 (0.835) data 0.000 (0.002) loss 2.5624 (0.8908) lr 1.9298e-03 eta 7:07:59
epoch [12/50] batch [320/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.6493 (0.8734) lr 1.9298e-03 eta 7:07:39
epoch [12/50] batch [340/796] time 0.841 (0.835) data 0.000 (0.002) loss 2.3510 (0.8740) lr 1.9298e-03 eta 7:07:18
epoch [12/50] batch [360/796] time 0.844 (0.835) data 0.000 (0.002) loss 0.3894 (0.8742) lr 1.9298e-03 eta 7:06:59
epoch [12/50] batch [380/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.7134 (0.8805) lr 1.9298e-03 eta 7:06:40
epoch [12/50] batch [400/796] time 0.841 (0.835) data 0.000 (0.001) loss 0.2605 (0.9005) lr 1.9298e-03 eta 7:06:20
epoch [12/50] batch [420/796] time 0.842 (0.835) data 0.000 (0.001) loss 1.0420 (0.8952) lr 1.9298e-03 eta 7:05:58
epoch [12/50] batch [440/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.2811 (0.8875) lr 1.9298e-03 eta 7:05:40
epoch [12/50] batch [460/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.2418 (0.8785) lr 1.9298e-03 eta 7:05:24
epoch [12/50] batch [480/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.7521 (0.8805) lr 1.9298e-03 eta 7:05:05
epoch [12/50] batch [500/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.3520 (0.8814) lr 1.9298e-03 eta 7:04:49
epoch [12/50] batch [520/796] time 0.820 (0.834) data 0.000 (0.001) loss 1.4342 (0.8715) lr 1.9298e-03 eta 7:04:31
epoch [12/50] batch [540/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.9649 (0.8826) lr 1.9298e-03 eta 7:04:09
epoch [12/50] batch [560/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.0880 (0.8902) lr 1.9298e-03 eta 7:03:50
epoch [12/50] batch [580/796] time 0.844 (0.834) data 0.000 (0.001) loss 1.9170 (0.8938) lr 1.9298e-03 eta 7:03:31
epoch [12/50] batch [600/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.7273 (0.8959) lr 1.9298e-03 eta 7:03:11
epoch [12/50] batch [620/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.8215 (0.8898) lr 1.9298e-03 eta 7:02:52
epoch [12/50] batch [640/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.3159 (0.8873) lr 1.9298e-03 eta 7:02:34
epoch [12/50] batch [660/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.8454 (0.8907) lr 1.9298e-03 eta 7:02:17
epoch [12/50] batch [680/796] time 0.820 (0.834) data 0.000 (0.001) loss 2.6467 (0.9012) lr 1.9298e-03 eta 7:01:58
epoch [12/50] batch [700/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.0412 (0.8957) lr 1.9298e-03 eta 7:01:44
epoch [12/50] batch [720/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.0367 (0.8930) lr 1.9298e-03 eta 7:01:26
epoch [12/50] batch [740/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.5838 (0.8920) lr 1.9298e-03 eta 7:01:12
epoch [12/50] batch [760/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.6297 (0.8925) lr 1.9298e-03 eta 7:00:54
epoch [12/50] batch [780/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.2820 (0.8930) lr 1.9298e-03 eta 7:00:38
epoch [13/50] batch [20/796] time 0.820 (0.857) data 0.000 (0.025) loss 0.7849 (0.9300) lr 1.9048e-03 eta 7:11:57
epoch [13/50] batch [40/796] time 0.836 (0.846) data 0.000 (0.013) loss 0.4093 (0.8291) lr 1.9048e-03 eta 7:05:42
epoch [13/50] batch [60/796] time 0.810 (0.841) data 0.000 (0.008) loss 0.2293 (0.8561) lr 1.9048e-03 eta 7:03:22
epoch [13/50] batch [80/796] time 0.821 (0.839) data 0.000 (0.006) loss 1.0468 (0.8347) lr 1.9048e-03 eta 7:02:01
epoch [13/50] batch [100/796] time 0.832 (0.838) data 0.000 (0.005) loss 0.3915 (0.8401) lr 1.9048e-03 eta 7:01:11
epoch [13/50] batch [120/796] time 0.832 (0.838) data 0.000 (0.004) loss 2.0967 (0.8556) lr 1.9048e-03 eta 7:00:33
epoch [13/50] batch [140/796] time 0.833 (0.837) data 0.000 (0.004) loss 0.2226 (0.8521) lr 1.9048e-03 eta 6:59:53
epoch [13/50] batch [160/796] time 0.820 (0.836) data 0.000 (0.003) loss 0.1500 (0.8623) lr 1.9048e-03 eta 6:59:15
epoch [13/50] batch [180/796] time 0.842 (0.836) data 0.000 (0.003) loss 2.2040 (0.8927) lr 1.9048e-03 eta 6:58:46
epoch [13/50] batch [200/796] time 0.832 (0.836) data 0.000 (0.003) loss 1.4185 (0.9022) lr 1.9048e-03 eta 6:58:26
epoch [13/50] batch [220/796] time 0.831 (0.835) data 0.000 (0.002) loss 0.5345 (0.9058) lr 1.9048e-03 eta 6:58:07
epoch [13/50] batch [240/796] time 0.837 (0.835) data 0.000 (0.002) loss 2.0049 (0.9002) lr 1.9048e-03 eta 6:57:42
epoch [13/50] batch [260/796] time 0.837 (0.835) data 0.000 (0.002) loss 0.0752 (0.8808) lr 1.9048e-03 eta 6:57:20
epoch [13/50] batch [280/796] time 0.833 (0.835) data 0.000 (0.002) loss 0.5667 (0.8787) lr 1.9048e-03 eta 6:57:02
epoch [13/50] batch [300/796] time 0.820 (0.835) data 0.000 (0.002) loss 0.4115 (0.8718) lr 1.9048e-03 eta 6:56:42
epoch [13/50] batch [320/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.7195 (0.8773) lr 1.9048e-03 eta 6:56:23
epoch [13/50] batch [340/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.9226 (0.8923) lr 1.9048e-03 eta 6:56:05
epoch [13/50] batch [360/796] time 0.833 (0.835) data 0.000 (0.002) loss 2.2212 (0.9044) lr 1.9048e-03 eta 6:55:50
epoch [13/50] batch [380/796] time 0.810 (0.835) data 0.000 (0.001) loss 0.5125 (0.9115) lr 1.9048e-03 eta 6:55:30
epoch [13/50] batch [400/796] time 0.810 (0.835) data 0.000 (0.001) loss 0.4492 (0.9144) lr 1.9048e-03 eta 6:55:15
epoch [13/50] batch [420/796] time 0.842 (0.835) data 0.000 (0.001) loss 1.2248 (0.9151) lr 1.9048e-03 eta 6:54:56
epoch [13/50] batch [440/796] time 0.813 (0.835) data 0.000 (0.001) loss 1.3366 (0.9051) lr 1.9048e-03 eta 6:54:36
epoch [13/50] batch [460/796] time 0.820 (0.834) data 0.000 (0.001) loss 2.4383 (0.9109) lr 1.9048e-03 eta 6:54:17
epoch [13/50] batch [480/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.4485 (0.9205) lr 1.9048e-03 eta 6:53:59
epoch [13/50] batch [500/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.4011 (0.9123) lr 1.9048e-03 eta 6:53:43
epoch [13/50] batch [520/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.1241 (0.9103) lr 1.9048e-03 eta 6:53:27
epoch [13/50] batch [540/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.8915 (0.9099) lr 1.9048e-03 eta 6:53:14
epoch [13/50] batch [560/796] time 0.826 (0.835) data 0.000 (0.001) loss 3.0942 (0.9176) lr 1.9048e-03 eta 6:52:57
epoch [13/50] batch [580/796] time 0.820 (0.835) data 0.000 (0.001) loss 1.8255 (0.9137) lr 1.9048e-03 eta 6:52:40
epoch [13/50] batch [600/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.4717 (0.9136) lr 1.9048e-03 eta 6:52:19
epoch [13/50] batch [620/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.5394 (0.9089) lr 1.9048e-03 eta 6:52:01
epoch [13/50] batch [640/796] time 0.820 (0.834) data 0.000 (0.001) loss 2.0045 (0.9128) lr 1.9048e-03 eta 6:51:44
epoch [13/50] batch [660/796] time 0.818 (0.834) data 0.000 (0.001) loss 1.6060 (0.9110) lr 1.9048e-03 eta 6:51:24
epoch [13/50] batch [680/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.5362 (0.9043) lr 1.9048e-03 eta 6:51:08
epoch [13/50] batch [700/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.1228 (0.8933) lr 1.9048e-03 eta 6:50:49
epoch [13/50] batch [720/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.9966 (0.8896) lr 1.9048e-03 eta 6:50:30
epoch [13/50] batch [740/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.4073 (0.8901) lr 1.9048e-03 eta 6:50:11
epoch [13/50] batch [760/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.4583 (0.8943) lr 1.9048e-03 eta 6:49:55
epoch [13/50] batch [780/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.1928 (0.8879) lr 1.9048e-03 eta 6:49:38
epoch [14/50] batch [20/796] time 0.844 (0.860) data 0.000 (0.024) loss 0.6359 (0.5684) lr 1.8763e-03 eta 7:01:39
epoch [14/50] batch [40/796] time 0.842 (0.847) data 0.000 (0.012) loss 0.3336 (0.7348) lr 1.8763e-03 eta 6:55:03
epoch [14/50] batch [60/796] time 0.844 (0.843) data 0.000 (0.008) loss 0.0381 (0.7277) lr 1.8763e-03 eta 6:53:04
epoch [14/50] batch [80/796] time 0.821 (0.841) data 0.000 (0.006) loss 1.6808 (0.7601) lr 1.8763e-03 eta 6:51:28
epoch [14/50] batch [100/796] time 0.832 (0.839) data 0.000 (0.005) loss 1.6694 (0.8074) lr 1.8763e-03 eta 6:50:32
epoch [14/50] batch [120/796] time 0.820 (0.838) data 0.000 (0.004) loss 0.3649 (0.8041) lr 1.8763e-03 eta 6:49:47
epoch [14/50] batch [140/796] time 0.842 (0.838) data 0.000 (0.004) loss 0.0495 (0.8275) lr 1.8763e-03 eta 6:49:16
epoch [14/50] batch [160/796] time 0.834 (0.837) data 0.002 (0.003) loss 0.8703 (0.8561) lr 1.8763e-03 eta 6:48:38
epoch [14/50] batch [180/796] time 0.834 (0.837) data 0.000 (0.003) loss 0.9144 (0.8705) lr 1.8763e-03 eta 6:48:11
epoch [14/50] batch [200/796] time 0.835 (0.837) data 0.000 (0.003) loss 1.9708 (0.8695) lr 1.8763e-03 eta 6:47:59
epoch [14/50] batch [220/796] time 0.833 (0.836) data 0.000 (0.002) loss 1.4322 (0.8740) lr 1.8763e-03 eta 6:47:28
epoch [14/50] batch [240/796] time 0.821 (0.836) data 0.000 (0.002) loss 0.3537 (0.8545) lr 1.8763e-03 eta 6:47:01
epoch [14/50] batch [260/796] time 0.820 (0.836) data 0.000 (0.002) loss 0.1032 (0.8506) lr 1.8763e-03 eta 6:46:45
epoch [14/50] batch [280/796] time 0.820 (0.836) data 0.000 (0.002) loss 0.1470 (0.8604) lr 1.8763e-03 eta 6:46:23
epoch [14/50] batch [300/796] time 0.820 (0.836) data 0.000 (0.002) loss 0.7850 (0.8812) lr 1.8763e-03 eta 6:46:03
epoch [14/50] batch [320/796] time 0.831 (0.835) data 0.000 (0.002) loss 1.6824 (0.8996) lr 1.8763e-03 eta 6:45:38
epoch [14/50] batch [340/796] time 0.820 (0.836) data 0.000 (0.002) loss 0.4602 (0.8955) lr 1.8763e-03 eta 6:45:24
epoch [14/50] batch [360/796] time 0.842 (0.836) data 0.000 (0.002) loss 0.0536 (0.8862) lr 1.8763e-03 eta 6:45:07
epoch [14/50] batch [380/796] time 0.821 (0.836) data 0.000 (0.001) loss 0.5861 (0.9011) lr 1.8763e-03 eta 6:44:50
epoch [14/50] batch [400/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.5429 (0.9122) lr 1.8763e-03 eta 6:44:31
epoch [14/50] batch [420/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.8826 (0.9055) lr 1.8763e-03 eta 6:44:10
epoch [14/50] batch [440/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.2455 (0.9064) lr 1.8763e-03 eta 6:43:51
epoch [14/50] batch [460/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.6904 (0.9004) lr 1.8763e-03 eta 6:43:33
epoch [14/50] batch [480/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.4516 (0.8959) lr 1.8763e-03 eta 6:43:16
epoch [14/50] batch [500/796] time 0.832 (0.835) data 0.000 (0.001) loss 1.1425 (0.9005) lr 1.8763e-03 eta 6:42:56
epoch [14/50] batch [520/796] time 0.835 (0.835) data 0.000 (0.001) loss 4.1946 (0.9058) lr 1.8763e-03 eta 6:42:37
epoch [14/50] batch [540/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.1015 (0.9111) lr 1.8763e-03 eta 6:42:19
epoch [14/50] batch [560/796] time 0.829 (0.835) data 0.000 (0.001) loss 0.0477 (0.9135) lr 1.8763e-03 eta 6:42:01
epoch [14/50] batch [580/796] time 0.837 (0.835) data 0.000 (0.001) loss 0.7289 (0.9156) lr 1.8763e-03 eta 6:41:41
epoch [14/50] batch [600/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.8517 (0.9173) lr 1.8763e-03 eta 6:41:22
epoch [14/50] batch [620/796] time 0.845 (0.835) data 0.000 (0.001) loss 2.0386 (0.9214) lr 1.8763e-03 eta 6:41:05
epoch [14/50] batch [640/796] time 0.821 (0.835) data 0.000 (0.001) loss 2.1844 (0.9219) lr 1.8763e-03 eta 6:40:45
epoch [14/50] batch [660/796] time 0.845 (0.835) data 0.002 (0.001) loss 0.2393 (0.9150) lr 1.8763e-03 eta 6:40:29
epoch [14/50] batch [680/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.3665 (0.9155) lr 1.8763e-03 eta 6:40:14
epoch [14/50] batch [700/796] time 0.842 (0.835) data 0.000 (0.001) loss 2.2018 (0.9167) lr 1.8763e-03 eta 6:40:00
epoch [14/50] batch [720/796] time 0.832 (0.835) data 0.000 (0.001) loss 0.4181 (0.9075) lr 1.8763e-03 eta 6:39:41
epoch [14/50] batch [740/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.2310 (0.9120) lr 1.8763e-03 eta 6:39:23
epoch [14/50] batch [760/796] time 0.843 (0.835) data 0.000 (0.001) loss 2.2743 (0.9111) lr 1.8763e-03 eta 6:39:04
epoch [14/50] batch [780/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.7199 (0.9131) lr 1.8763e-03 eta 6:38:48
epoch [15/50] batch [20/796] time 0.834 (0.860) data 0.000 (0.027) loss 1.0158 (0.7178) lr 1.8443e-03 eta 6:50:25
epoch [15/50] batch [40/796] time 0.844 (0.846) data 0.000 (0.014) loss 1.5128 (0.7931) lr 1.8443e-03 eta 6:43:40
epoch [15/50] batch [60/796] time 0.821 (0.842) data 0.000 (0.009) loss 0.2194 (0.7706) lr 1.8443e-03 eta 6:41:12
epoch [15/50] batch [80/796] time 0.812 (0.840) data 0.000 (0.007) loss 0.6750 (0.7923) lr 1.8443e-03 eta 6:40:02
epoch [15/50] batch [100/796] time 0.820 (0.838) data 0.000 (0.006) loss 0.1658 (0.7708) lr 1.8443e-03 eta 6:38:58
epoch [15/50] batch [120/796] time 0.843 (0.838) data 0.000 (0.005) loss 0.0991 (0.8304) lr 1.8443e-03 eta 6:38:27
epoch [15/50] batch [140/796] time 0.833 (0.837) data 0.000 (0.004) loss 2.1363 (0.8606) lr 1.8443e-03 eta 6:37:54
epoch [15/50] batch [160/796] time 0.847 (0.837) data 0.000 (0.004) loss 0.6254 (0.8399) lr 1.8443e-03 eta 6:37:23
epoch [15/50] batch [180/796] time 0.843 (0.836) data 0.000 (0.003) loss 1.3900 (0.8892) lr 1.8443e-03 eta 6:36:59
epoch [15/50] batch [200/796] time 0.832 (0.836) data 0.000 (0.003) loss 0.1484 (0.8863) lr 1.8443e-03 eta 6:36:28
epoch [15/50] batch [220/796] time 0.842 (0.836) data 0.000 (0.003) loss 1.9070 (0.8898) lr 1.8443e-03 eta 6:36:03
epoch [15/50] batch [240/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.2026 (0.8677) lr 1.8443e-03 eta 6:35:36
epoch [15/50] batch [260/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.4522 (0.8589) lr 1.8443e-03 eta 6:35:20
epoch [15/50] batch [280/796] time 0.820 (0.835) data 0.000 (0.002) loss 0.0605 (0.8817) lr 1.8443e-03 eta 6:34:56
epoch [15/50] batch [300/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.3245 (0.8635) lr 1.8443e-03 eta 6:34:42
epoch [15/50] batch [320/796] time 0.821 (0.835) data 0.000 (0.002) loss 0.2230 (0.8719) lr 1.8443e-03 eta 6:34:25
epoch [15/50] batch [340/796] time 0.844 (0.835) data 0.000 (0.002) loss 1.1952 (0.8747) lr 1.8443e-03 eta 6:34:05
epoch [15/50] batch [360/796] time 0.834 (0.835) data 0.000 (0.002) loss 0.4678 (0.8747) lr 1.8443e-03 eta 6:33:45
epoch [15/50] batch [380/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.3518 (0.8752) lr 1.8443e-03 eta 6:33:29
epoch [15/50] batch [400/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.2876 (0.8690) lr 1.8443e-03 eta 6:33:11
epoch [15/50] batch [420/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.7551 (0.8711) lr 1.8443e-03 eta 6:32:52
epoch [15/50] batch [440/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.2899 (0.8743) lr 1.8443e-03 eta 6:32:37
epoch [15/50] batch [460/796] time 0.845 (0.835) data 0.000 (0.001) loss 0.6546 (0.8739) lr 1.8443e-03 eta 6:32:23
epoch [15/50] batch [480/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.2736 (0.8654) lr 1.8443e-03 eta 6:32:02
epoch [15/50] batch [500/796] time 0.813 (0.835) data 0.000 (0.001) loss 0.1084 (0.8664) lr 1.8443e-03 eta 6:31:42
epoch [15/50] batch [520/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.3641 (0.8642) lr 1.8443e-03 eta 6:31:24
epoch [15/50] batch [540/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.4553 (0.8626) lr 1.8443e-03 eta 6:31:05
epoch [15/50] batch [560/796] time 0.832 (0.835) data 0.000 (0.001) loss 0.6255 (0.8666) lr 1.8443e-03 eta 6:30:48
epoch [15/50] batch [580/796] time 0.844 (0.835) data 0.008 (0.001) loss 0.1307 (0.8683) lr 1.8443e-03 eta 6:30:31
epoch [15/50] batch [600/796] time 0.834 (0.835) data 0.000 (0.001) loss 0.4850 (0.8778) lr 1.8443e-03 eta 6:30:16
epoch [15/50] batch [620/796] time 0.825 (0.835) data 0.000 (0.001) loss 0.5587 (0.8850) lr 1.8443e-03 eta 6:29:58
epoch [15/50] batch [640/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.6154 (0.8825) lr 1.8443e-03 eta 6:29:41
epoch [15/50] batch [660/796] time 0.834 (0.835) data 0.000 (0.001) loss 1.1293 (0.8861) lr 1.8443e-03 eta 6:29:25
epoch [15/50] batch [680/796] time 0.820 (0.835) data 0.000 (0.001) loss 2.5064 (0.8970) lr 1.8443e-03 eta 6:29:07
epoch [15/50] batch [700/796] time 0.827 (0.835) data 0.000 (0.001) loss 0.6699 (0.9002) lr 1.8443e-03 eta 6:28:53
epoch [15/50] batch [720/796] time 0.833 (0.835) data 0.000 (0.001) loss 1.5777 (0.9000) lr 1.8443e-03 eta 6:28:34
epoch [15/50] batch [740/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.5306 (0.9057) lr 1.8443e-03 eta 6:28:16
epoch [15/50] batch [760/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.6763 (0.9066) lr 1.8443e-03 eta 6:27:58
epoch [15/50] batch [780/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.2059 (0.9001) lr 1.8443e-03 eta 6:27:41
epoch [16/50] batch [20/796] time 0.833 (0.865) data 0.000 (0.027) loss 1.3389 (0.8532) lr 1.8090e-03 eta 6:41:14
epoch [16/50] batch [40/796] time 0.836 (0.850) data 0.000 (0.014) loss 0.4898 (0.7909) lr 1.8090e-03 eta 6:34:00
epoch [16/50] batch [60/796] time 0.820 (0.844) data 0.000 (0.009) loss 0.5205 (0.8743) lr 1.8090e-03 eta 6:30:52
epoch [16/50] batch [80/796] time 0.842 (0.842) data 0.000 (0.007) loss 1.5107 (0.8998) lr 1.8090e-03 eta 6:29:38
epoch [16/50] batch [100/796] time 0.843 (0.839) data 0.000 (0.006) loss 1.4581 (0.9242) lr 1.8090e-03 eta 6:28:18
epoch [16/50] batch [120/796] time 0.832 (0.838) data 0.000 (0.005) loss 3.0271 (0.9477) lr 1.8090e-03 eta 6:27:39
epoch [16/50] batch [140/796] time 0.833 (0.838) data 0.000 (0.004) loss 0.9959 (0.9306) lr 1.8090e-03 eta 6:26:56
epoch [16/50] batch [160/796] time 0.845 (0.837) data 0.000 (0.004) loss 2.1738 (0.9292) lr 1.8090e-03 eta 6:26:30
epoch [16/50] batch [180/796] time 0.833 (0.837) data 0.000 (0.003) loss 2.0632 (0.9238) lr 1.8090e-03 eta 6:26:06
epoch [16/50] batch [200/796] time 0.844 (0.837) data 0.000 (0.003) loss 0.5732 (0.9491) lr 1.8090e-03 eta 6:25:40
epoch [16/50] batch [220/796] time 0.811 (0.836) data 0.000 (0.003) loss 2.6874 (0.9703) lr 1.8090e-03 eta 6:25:13
epoch [16/50] batch [240/796] time 0.843 (0.836) data 0.000 (0.002) loss 1.6534 (0.9689) lr 1.8090e-03 eta 6:24:56
epoch [16/50] batch [260/796] time 0.844 (0.836) data 0.000 (0.002) loss 0.8584 (0.9611) lr 1.8090e-03 eta 6:24:34
epoch [16/50] batch [280/796] time 0.821 (0.836) data 0.000 (0.002) loss 0.3583 (0.9445) lr 1.8090e-03 eta 6:24:07
epoch [16/50] batch [300/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.1583 (0.9336) lr 1.8090e-03 eta 6:23:43
epoch [16/50] batch [320/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.2513 (0.9117) lr 1.8090e-03 eta 6:23:22
epoch [16/50] batch [340/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.8609 (0.8993) lr 1.8090e-03 eta 6:23:03
epoch [16/50] batch [360/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.4308 (0.8797) lr 1.8090e-03 eta 6:22:45
epoch [16/50] batch [380/796] time 0.844 (0.835) data 0.000 (0.002) loss 1.1553 (0.8750) lr 1.8090e-03 eta 6:22:28
epoch [16/50] batch [400/796] time 0.821 (0.835) data 0.000 (0.002) loss 0.1238 (0.8618) lr 1.8090e-03 eta 6:22:15
epoch [16/50] batch [420/796] time 0.849 (0.835) data 0.000 (0.001) loss 1.6014 (0.8534) lr 1.8090e-03 eta 6:21:58
epoch [16/50] batch [440/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.5005 (0.8486) lr 1.8090e-03 eta 6:21:38
epoch [16/50] batch [460/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.2942 (0.8748) lr 1.8090e-03 eta 6:21:18
epoch [16/50] batch [480/796] time 0.822 (0.835) data 0.000 (0.001) loss 1.4357 (0.8842) lr 1.8090e-03 eta 6:20:55
epoch [16/50] batch [500/796] time 0.834 (0.835) data 0.000 (0.001) loss 0.0400 (0.8839) lr 1.8090e-03 eta 6:20:40
epoch [16/50] batch [520/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.6120 (0.8849) lr 1.8090e-03 eta 6:20:21
epoch [16/50] batch [540/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.0957 (0.8767) lr 1.8090e-03 eta 6:20:04
epoch [16/50] batch [560/796] time 0.819 (0.835) data 0.000 (0.001) loss 1.9423 (0.8687) lr 1.8090e-03 eta 6:19:46
epoch [16/50] batch [580/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.4094 (0.8696) lr 1.8090e-03 eta 6:19:29
epoch [16/50] batch [600/796] time 0.822 (0.835) data 0.000 (0.001) loss 0.5326 (0.8599) lr 1.8090e-03 eta 6:19:11
epoch [16/50] batch [620/796] time 0.821 (0.835) data 0.000 (0.001) loss 1.0784 (0.8572) lr 1.8090e-03 eta 6:18:53
epoch [16/50] batch [640/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.8035 (0.8503) lr 1.8090e-03 eta 6:18:35
epoch [16/50] batch [660/796] time 0.832 (0.835) data 0.000 (0.001) loss 0.0449 (0.8546) lr 1.8090e-03 eta 6:18:19
epoch [16/50] batch [680/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.8720 (0.8497) lr 1.8090e-03 eta 6:18:02
epoch [16/50] batch [700/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.2408 (0.8557) lr 1.8090e-03 eta 6:17:48
epoch [16/50] batch [720/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.1964 (0.8525) lr 1.8090e-03 eta 6:17:31
epoch [16/50] batch [740/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.1804 (0.8572) lr 1.8090e-03 eta 6:17:14
epoch [16/50] batch [760/796] time 0.819 (0.835) data 0.000 (0.001) loss 0.9068 (0.8620) lr 1.8090e-03 eta 6:16:57
epoch [16/50] batch [780/796] time 0.836 (0.835) data 0.000 (0.001) loss 0.7832 (0.8603) lr 1.8090e-03 eta 6:16:41
epoch [17/50] batch [20/796] time 0.843 (0.861) data 0.000 (0.025) loss 3.3465 (0.9568) lr 1.7705e-03 eta 6:28:14
epoch [17/50] batch [40/796] time 0.842 (0.849) data 0.000 (0.013) loss 0.5190 (0.7712) lr 1.7705e-03 eta 6:22:09
epoch [17/50] batch [60/796] time 0.833 (0.843) data 0.000 (0.009) loss 0.4474 (0.8324) lr 1.7705e-03 eta 6:19:31
epoch [17/50] batch [80/796] time 0.842 (0.841) data 0.000 (0.006) loss 2.0685 (0.8509) lr 1.7705e-03 eta 6:18:19
epoch [17/50] batch [100/796] time 0.834 (0.840) data 0.000 (0.005) loss 0.9342 (0.8851) lr 1.7705e-03 eta 6:17:19
epoch [17/50] batch [120/796] time 0.811 (0.838) data 0.000 (0.004) loss 0.5877 (0.9255) lr 1.7705e-03 eta 6:16:24
epoch [17/50] batch [140/796] time 0.841 (0.838) data 0.000 (0.004) loss 1.2015 (0.8764) lr 1.7705e-03 eta 6:15:49
epoch [17/50] batch [160/796] time 0.821 (0.837) data 0.000 (0.003) loss 0.4779 (0.8563) lr 1.7705e-03 eta 6:15:19
epoch [17/50] batch [180/796] time 0.836 (0.837) data 0.000 (0.003) loss 0.2027 (0.8420) lr 1.7705e-03 eta 6:14:56
epoch [17/50] batch [200/796] time 0.821 (0.836) data 0.000 (0.003) loss 0.3651 (0.8327) lr 1.7705e-03 eta 6:14:29
epoch [17/50] batch [220/796] time 0.825 (0.836) data 0.000 (0.002) loss 0.3033 (0.8329) lr 1.7705e-03 eta 6:14:08
epoch [17/50] batch [240/796] time 0.842 (0.836) data 0.000 (0.002) loss 0.1203 (0.8365) lr 1.7705e-03 eta 6:13:42
epoch [17/50] batch [260/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.4066 (0.8529) lr 1.7705e-03 eta 6:13:13
epoch [17/50] batch [280/796] time 0.840 (0.836) data 0.000 (0.002) loss 1.8628 (0.8596) lr 1.7705e-03 eta 6:12:59
epoch [17/50] batch [300/796] time 0.833 (0.835) data 0.000 (0.002) loss 1.1422 (0.8485) lr 1.7705e-03 eta 6:12:38
epoch [17/50] batch [320/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.4124 (0.8488) lr 1.7705e-03 eta 6:12:18
epoch [17/50] batch [340/796] time 0.820 (0.835) data 0.000 (0.002) loss 0.6801 (0.8570) lr 1.7705e-03 eta 6:11:58
epoch [17/50] batch [360/796] time 0.820 (0.835) data 0.000 (0.002) loss 1.1594 (0.8655) lr 1.7705e-03 eta 6:11:41
epoch [17/50] batch [380/796] time 0.827 (0.835) data 0.000 (0.002) loss 0.1719 (0.8683) lr 1.7705e-03 eta 6:11:20
epoch [17/50] batch [400/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.0055 (0.8583) lr 1.7705e-03 eta 6:11:01
epoch [17/50] batch [420/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.4864 (0.8536) lr 1.7705e-03 eta 6:10:42
epoch [17/50] batch [440/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.6438 (0.8525) lr 1.7705e-03 eta 6:10:23
epoch [17/50] batch [460/796] time 0.821 (0.835) data 0.000 (0.001) loss 1.6647 (0.8551) lr 1.7705e-03 eta 6:10:04
epoch [17/50] batch [480/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.1914 (0.8486) lr 1.7705e-03 eta 6:09:48
epoch [17/50] batch [500/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.3321 (0.8496) lr 1.7705e-03 eta 6:09:34
epoch [17/50] batch [520/796] time 0.835 (0.835) data 0.000 (0.001) loss 0.4248 (0.8463) lr 1.7705e-03 eta 6:09:17
epoch [17/50] batch [540/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.8115 (0.8399) lr 1.7705e-03 eta 6:08:59
epoch [17/50] batch [560/796] time 0.844 (0.835) data 0.000 (0.001) loss 2.4665 (0.8460) lr 1.7705e-03 eta 6:08:45
epoch [17/50] batch [580/796] time 0.841 (0.835) data 0.000 (0.001) loss 0.3424 (0.8455) lr 1.7705e-03 eta 6:08:28
epoch [17/50] batch [600/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.2306 (0.8514) lr 1.7705e-03 eta 6:08:11
epoch [17/50] batch [620/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.0704 (0.8477) lr 1.7705e-03 eta 6:07:55
epoch [17/50] batch [640/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.3917 (0.8479) lr 1.7705e-03 eta 6:07:34
epoch [17/50] batch [660/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.5498 (0.8423) lr 1.7705e-03 eta 6:07:17
epoch [17/50] batch [680/796] time 0.839 (0.835) data 0.000 (0.001) loss 1.5902 (0.8344) lr 1.7705e-03 eta 6:07:00
epoch [17/50] batch [700/796] time 0.842 (0.835) data 0.000 (0.001) loss 1.5635 (0.8352) lr 1.7705e-03 eta 6:06:44
epoch [17/50] batch [720/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.3740 (0.8389) lr 1.7705e-03 eta 6:06:24
epoch [17/50] batch [740/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.9559 (0.8405) lr 1.7705e-03 eta 6:06:09
epoch [17/50] batch [760/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.7934 (0.8417) lr 1.7705e-03 eta 6:05:51
epoch [17/50] batch [780/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.9854 (0.8376) lr 1.7705e-03 eta 6:05:34
epoch [18/50] batch [20/796] time 0.843 (0.861) data 0.000 (0.029) loss 3.0543 (1.2216) lr 1.7290e-03 eta 6:16:48
epoch [18/50] batch [40/796] time 0.820 (0.847) data 0.000 (0.015) loss 1.5135 (1.0571) lr 1.7290e-03 eta 6:10:18
epoch [18/50] batch [60/796] time 0.836 (0.843) data 0.000 (0.010) loss 1.5125 (0.9636) lr 1.7290e-03 eta 6:08:23
epoch [18/50] batch [80/796] time 0.844 (0.842) data 0.000 (0.007) loss 0.1646 (0.9021) lr 1.7290e-03 eta 6:07:22
epoch [18/50] batch [100/796] time 0.843 (0.840) data 0.000 (0.006) loss 1.8806 (0.8860) lr 1.7290e-03 eta 6:06:29
epoch [18/50] batch [120/796] time 0.843 (0.839) data 0.000 (0.005) loss 1.4688 (0.8630) lr 1.7290e-03 eta 6:05:47
epoch [18/50] batch [140/796] time 0.821 (0.838) data 0.000 (0.004) loss 0.4013 (0.8765) lr 1.7290e-03 eta 6:05:08
epoch [18/50] batch [160/796] time 0.821 (0.838) data 0.000 (0.004) loss 0.1179 (0.8532) lr 1.7290e-03 eta 6:04:39
epoch [18/50] batch [180/796] time 0.833 (0.838) data 0.000 (0.003) loss 0.7197 (0.8828) lr 1.7290e-03 eta 6:04:11
epoch [18/50] batch [200/796] time 0.821 (0.837) data 0.000 (0.003) loss 0.7157 (0.8629) lr 1.7290e-03 eta 6:03:43
epoch [18/50] batch [220/796] time 0.844 (0.837) data 0.000 (0.003) loss 0.5908 (0.8671) lr 1.7290e-03 eta 6:03:16
epoch [18/50] batch [240/796] time 0.821 (0.836) data 0.000 (0.003) loss 0.5318 (0.8543) lr 1.7290e-03 eta 6:02:49
epoch [18/50] batch [260/796] time 0.835 (0.837) data 0.000 (0.002) loss 1.1206 (0.8539) lr 1.7290e-03 eta 6:02:35
epoch [18/50] batch [280/796] time 0.843 (0.837) data 0.000 (0.002) loss 1.4221 (0.8436) lr 1.7290e-03 eta 6:02:21
epoch [18/50] batch [300/796] time 0.842 (0.837) data 0.000 (0.002) loss 0.7109 (0.8348) lr 1.7290e-03 eta 6:02:05
epoch [18/50] batch [320/796] time 0.834 (0.837) data 0.000 (0.002) loss 0.2036 (0.8414) lr 1.7290e-03 eta 6:01:50
epoch [18/50] batch [340/796] time 0.832 (0.837) data 0.000 (0.002) loss 0.6681 (0.8365) lr 1.7290e-03 eta 6:01:29
epoch [18/50] batch [360/796] time 0.833 (0.836) data 0.000 (0.002) loss 0.6003 (0.8271) lr 1.7290e-03 eta 6:01:08
epoch [18/50] batch [380/796] time 0.812 (0.836) data 0.000 (0.002) loss 1.3586 (0.8399) lr 1.7290e-03 eta 6:00:48
epoch [18/50] batch [400/796] time 0.838 (0.836) data 0.000 (0.002) loss 1.8092 (0.8383) lr 1.7290e-03 eta 6:00:28
epoch [18/50] batch [420/796] time 0.841 (0.836) data 0.000 (0.002) loss 1.3136 (0.8282) lr 1.7290e-03 eta 6:00:07
epoch [18/50] batch [440/796] time 0.842 (0.836) data 0.000 (0.002) loss 1.0209 (0.8272) lr 1.7290e-03 eta 5:59:49
epoch [18/50] batch [460/796] time 0.820 (0.836) data 0.000 (0.001) loss 0.5526 (0.8318) lr 1.7290e-03 eta 5:59:32
epoch [18/50] batch [480/796] time 0.842 (0.836) data 0.000 (0.001) loss 0.5685 (0.8302) lr 1.7290e-03 eta 5:59:11
epoch [18/50] batch [500/796] time 0.828 (0.836) data 0.000 (0.001) loss 0.2181 (0.8336) lr 1.7290e-03 eta 5:58:51
epoch [18/50] batch [520/796] time 0.832 (0.835) data 0.000 (0.001) loss 0.3380 (0.8320) lr 1.7290e-03 eta 5:58:30
epoch [18/50] batch [540/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.6505 (0.8336) lr 1.7290e-03 eta 5:58:15
epoch [18/50] batch [560/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.1521 (0.8316) lr 1.7290e-03 eta 5:57:54
epoch [18/50] batch [580/796] time 0.842 (0.835) data 0.000 (0.001) loss 1.1523 (0.8282) lr 1.7290e-03 eta 5:57:36
epoch [18/50] batch [600/796] time 0.820 (0.835) data 0.001 (0.001) loss 0.3226 (0.8364) lr 1.7290e-03 eta 5:57:17
epoch [18/50] batch [620/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.1274 (0.8330) lr 1.7290e-03 eta 5:56:58
epoch [18/50] batch [640/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.2070 (0.8444) lr 1.7290e-03 eta 5:56:41
epoch [18/50] batch [660/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.7534 (0.8478) lr 1.7290e-03 eta 5:56:24
epoch [18/50] batch [680/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.6555 (0.8480) lr 1.7290e-03 eta 5:56:06
epoch [18/50] batch [700/796] time 0.848 (0.835) data 0.000 (0.001) loss 0.7482 (0.8522) lr 1.7290e-03 eta 5:55:51
epoch [18/50] batch [720/796] time 0.832 (0.835) data 0.000 (0.001) loss 0.4434 (0.8501) lr 1.7290e-03 eta 5:55:32
epoch [18/50] batch [740/796] time 0.820 (0.835) data 0.000 (0.001) loss 1.2413 (0.8445) lr 1.7290e-03 eta 5:55:14
epoch [18/50] batch [760/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.5547 (0.8511) lr 1.7290e-03 eta 5:54:55
epoch [18/50] batch [780/796] time 0.832 (0.835) data 0.000 (0.001) loss 0.6000 (0.8534) lr 1.7290e-03 eta 5:54:38
epoch [19/50] batch [20/796] time 0.844 (0.864) data 0.000 (0.030) loss 1.6088 (0.7822) lr 1.6845e-03 eta 6:06:23
epoch [19/50] batch [40/796] time 0.842 (0.848) data 0.000 (0.015) loss 0.2920 (0.7560) lr 1.6845e-03 eta 5:59:17
epoch [19/50] batch [60/796] time 0.841 (0.843) data 0.000 (0.010) loss 1.0334 (0.7794) lr 1.6845e-03 eta 5:57:06
epoch [19/50] batch [80/796] time 0.843 (0.841) data 0.000 (0.008) loss 0.9101 (0.8911) lr 1.6845e-03 eta 5:56:06
epoch [19/50] batch [100/796] time 0.843 (0.840) data 0.000 (0.006) loss 0.6569 (0.8961) lr 1.6845e-03 eta 5:55:06
epoch [19/50] batch [120/796] time 0.842 (0.838) data 0.000 (0.005) loss 0.1142 (0.8651) lr 1.6845e-03 eta 5:54:15
epoch [19/50] batch [140/796] time 0.832 (0.838) data 0.000 (0.004) loss 0.3046 (0.8553) lr 1.6845e-03 eta 5:53:39
epoch [19/50] batch [160/796] time 0.820 (0.837) data 0.000 (0.004) loss 0.9455 (0.8637) lr 1.6845e-03 eta 5:53:13
epoch [19/50] batch [180/796] time 0.841 (0.837) data 0.000 (0.003) loss 1.0357 (0.8427) lr 1.6845e-03 eta 5:52:39
epoch [19/50] batch [200/796] time 0.832 (0.836) data 0.000 (0.003) loss 0.3212 (0.8237) lr 1.6845e-03 eta 5:52:08
epoch [19/50] batch [220/796] time 0.810 (0.836) data 0.000 (0.003) loss 1.1721 (0.8248) lr 1.6845e-03 eta 5:51:38
epoch [19/50] batch [240/796] time 0.838 (0.835) data 0.004 (0.003) loss 1.6559 (0.8542) lr 1.6845e-03 eta 5:51:16
epoch [19/50] batch [260/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.8346 (0.8652) lr 1.6845e-03 eta 5:50:57
epoch [19/50] batch [280/796] time 0.835 (0.835) data 0.000 (0.002) loss 1.7997 (0.8662) lr 1.6845e-03 eta 5:50:31
epoch [19/50] batch [300/796] time 0.841 (0.835) data 0.000 (0.002) loss 1.0958 (0.8611) lr 1.6845e-03 eta 5:50:14
epoch [19/50] batch [320/796] time 0.820 (0.835) data 0.000 (0.002) loss 1.5904 (0.8562) lr 1.6845e-03 eta 5:49:54
epoch [19/50] batch [340/796] time 0.820 (0.835) data 0.000 (0.002) loss 0.1466 (0.8609) lr 1.6845e-03 eta 5:49:35
epoch [19/50] batch [360/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.3048 (0.8631) lr 1.6845e-03 eta 5:49:17
epoch [19/50] batch [380/796] time 0.821 (0.834) data 0.000 (0.002) loss 0.9557 (0.8559) lr 1.6845e-03 eta 5:48:57
epoch [19/50] batch [400/796] time 0.832 (0.834) data 0.000 (0.002) loss 0.6468 (0.8475) lr 1.6845e-03 eta 5:48:37
epoch [19/50] batch [420/796] time 0.820 (0.834) data 0.000 (0.002) loss 0.1051 (0.8296) lr 1.6845e-03 eta 5:48:19
epoch [19/50] batch [440/796] time 0.821 (0.834) data 0.000 (0.002) loss 0.5533 (0.8345) lr 1.6845e-03 eta 5:48:00
epoch [19/50] batch [460/796] time 0.836 (0.834) data 0.000 (0.001) loss 0.4249 (0.8281) lr 1.6845e-03 eta 5:47:43
epoch [19/50] batch [480/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.6810 (0.8365) lr 1.6845e-03 eta 5:47:21
epoch [19/50] batch [500/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.1636 (0.8336) lr 1.6845e-03 eta 5:47:03
epoch [19/50] batch [520/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.0596 (0.8387) lr 1.6845e-03 eta 5:46:45
epoch [19/50] batch [540/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.7507 (0.8342) lr 1.6845e-03 eta 5:46:30
epoch [19/50] batch [560/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.4190 (0.8304) lr 1.6845e-03 eta 5:46:11
epoch [19/50] batch [580/796] time 0.844 (0.834) data 0.000 (0.001) loss 2.9119 (0.8312) lr 1.6845e-03 eta 5:45:54
epoch [19/50] batch [600/796] time 0.844 (0.834) data 0.000 (0.001) loss 1.0142 (0.8228) lr 1.6845e-03 eta 5:45:38
epoch [19/50] batch [620/796] time 0.835 (0.834) data 0.000 (0.001) loss 0.6204 (0.8292) lr 1.6845e-03 eta 5:45:23
epoch [19/50] batch [640/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.3885 (0.8353) lr 1.6845e-03 eta 5:45:06
epoch [19/50] batch [660/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.1108 (0.8327) lr 1.6845e-03 eta 5:44:49
epoch [19/50] batch [680/796] time 0.834 (0.834) data 0.000 (0.001) loss 0.5337 (0.8242) lr 1.6845e-03 eta 5:44:33
epoch [19/50] batch [700/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.6995 (0.8322) lr 1.6845e-03 eta 5:44:15
epoch [19/50] batch [720/796] time 0.820 (0.834) data 0.000 (0.001) loss 1.7666 (0.8353) lr 1.6845e-03 eta 5:43:59
epoch [19/50] batch [740/796] time 0.843 (0.834) data 0.000 (0.001) loss 1.1776 (0.8333) lr 1.6845e-03 eta 5:43:42
epoch [19/50] batch [760/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.5285 (0.8366) lr 1.6845e-03 eta 5:43:24
epoch [19/50] batch [780/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.3180 (0.8414) lr 1.6845e-03 eta 5:43:07
epoch [20/50] batch [20/796] time 0.841 (0.863) data 0.000 (0.027) loss 1.0615 (0.9388) lr 1.6374e-03 eta 5:54:46
epoch [20/50] batch [40/796] time 0.843 (0.850) data 0.000 (0.014) loss 0.9854 (0.8228) lr 1.6374e-03 eta 5:48:48
epoch [20/50] batch [60/796] time 0.842 (0.844) data 0.000 (0.009) loss 1.7468 (0.7743) lr 1.6374e-03 eta 5:46:16
epoch [20/50] batch [80/796] time 0.820 (0.841) data 0.000 (0.007) loss 0.9972 (0.8352) lr 1.6374e-03 eta 5:44:34
epoch [20/50] batch [100/796] time 0.841 (0.840) data 0.000 (0.006) loss 0.7627 (0.8587) lr 1.6374e-03 eta 5:43:51
epoch [20/50] batch [120/796] time 0.811 (0.838) data 0.000 (0.005) loss 0.5494 (0.8382) lr 1.6374e-03 eta 5:43:05
epoch [20/50] batch [140/796] time 0.821 (0.838) data 0.000 (0.004) loss 0.4261 (0.8076) lr 1.6374e-03 eta 5:42:38
epoch [20/50] batch [160/796] time 0.810 (0.837) data 0.000 (0.004) loss 0.6916 (0.8110) lr 1.6374e-03 eta 5:42:01
epoch [20/50] batch [180/796] time 0.841 (0.836) data 0.000 (0.003) loss 1.5808 (0.8170) lr 1.6374e-03 eta 5:41:29
epoch [20/50] batch [200/796] time 0.842 (0.836) data 0.000 (0.003) loss 1.9846 (0.8232) lr 1.6374e-03 eta 5:41:07
epoch [20/50] batch [220/796] time 0.842 (0.836) data 0.000 (0.003) loss 0.9183 (0.8166) lr 1.6374e-03 eta 5:40:44
epoch [20/50] batch [240/796] time 0.809 (0.836) data 0.000 (0.002) loss 1.4019 (0.8246) lr 1.6374e-03 eta 5:40:22
epoch [20/50] batch [260/796] time 0.841 (0.835) data 0.000 (0.002) loss 0.6810 (0.8283) lr 1.6374e-03 eta 5:39:57
epoch [20/50] batch [280/796] time 0.820 (0.835) data 0.000 (0.002) loss 2.8792 (0.8324) lr 1.6374e-03 eta 5:39:40
epoch [20/50] batch [300/796] time 0.844 (0.835) data 0.000 (0.002) loss 0.2753 (0.8282) lr 1.6374e-03 eta 5:39:22
epoch [20/50] batch [320/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.4031 (0.8412) lr 1.6374e-03 eta 5:39:05
epoch [20/50] batch [340/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.1331 (0.8477) lr 1.6374e-03 eta 5:38:43
epoch [20/50] batch [360/796] time 0.822 (0.835) data 0.000 (0.002) loss 0.4010 (0.8427) lr 1.6374e-03 eta 5:38:26
epoch [20/50] batch [380/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.4680 (0.8462) lr 1.6374e-03 eta 5:38:12
epoch [20/50] batch [400/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.6578 (0.8370) lr 1.6374e-03 eta 5:37:54
epoch [20/50] batch [420/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.4552 (0.8386) lr 1.6374e-03 eta 5:37:33
epoch [20/50] batch [440/796] time 0.832 (0.835) data 0.000 (0.001) loss 0.1328 (0.8333) lr 1.6374e-03 eta 5:37:14
epoch [20/50] batch [460/796] time 0.841 (0.835) data 0.000 (0.001) loss 0.4126 (0.8516) lr 1.6374e-03 eta 5:36:55
epoch [20/50] batch [480/796] time 0.848 (0.835) data 0.000 (0.001) loss 0.6344 (0.8544) lr 1.6374e-03 eta 5:36:39
epoch [20/50] batch [500/796] time 0.819 (0.835) data 0.000 (0.001) loss 0.8881 (0.8607) lr 1.6374e-03 eta 5:36:19
epoch [20/50] batch [520/796] time 0.845 (0.835) data 0.000 (0.001) loss 0.9832 (0.8597) lr 1.6374e-03 eta 5:36:01
epoch [20/50] batch [540/796] time 0.845 (0.835) data 0.000 (0.001) loss 0.4780 (0.8532) lr 1.6374e-03 eta 5:35:44
epoch [20/50] batch [560/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.1761 (0.8539) lr 1.6374e-03 eta 5:35:27
epoch [20/50] batch [580/796] time 0.832 (0.835) data 0.000 (0.001) loss 2.5200 (0.8592) lr 1.6374e-03 eta 5:35:08
epoch [20/50] batch [600/796] time 0.820 (0.834) data 0.001 (0.001) loss 1.2333 (0.8540) lr 1.6374e-03 eta 5:34:49
epoch [20/50] batch [620/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.2624 (0.8611) lr 1.6374e-03 eta 5:34:31
epoch [20/50] batch [640/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.6501 (0.8603) lr 1.6374e-03 eta 5:34:12
epoch [20/50] batch [660/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.4084 (0.8617) lr 1.6374e-03 eta 5:33:55
epoch [20/50] batch [680/796] time 0.829 (0.834) data 0.000 (0.001) loss 1.4835 (0.8630) lr 1.6374e-03 eta 5:33:37
epoch [20/50] batch [700/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.5073 (0.8639) lr 1.6374e-03 eta 5:33:19
epoch [20/50] batch [720/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.2742 (0.8629) lr 1.6374e-03 eta 5:33:01
epoch [20/50] batch [740/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.1728 (0.8620) lr 1.6374e-03 eta 5:32:45
epoch [20/50] batch [760/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.2727 (0.8536) lr 1.6374e-03 eta 5:32:26
epoch [20/50] batch [780/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.9252 (0.8582) lr 1.6374e-03 eta 5:32:11
epoch [21/50] batch [20/796] time 0.821 (0.863) data 0.000 (0.028) loss 1.3986 (1.0380) lr 1.5878e-03 eta 5:43:12
epoch [21/50] batch [40/796] time 0.833 (0.847) data 0.000 (0.014) loss 0.6188 (0.8923) lr 1.5878e-03 eta 5:36:34
epoch [21/50] batch [60/796] time 0.843 (0.843) data 0.001 (0.010) loss 1.7204 (0.9298) lr 1.5878e-03 eta 5:34:28
epoch [21/50] batch [80/796] time 0.841 (0.841) data 0.000 (0.007) loss 1.4945 (0.8936) lr 1.5878e-03 eta 5:33:25
epoch [21/50] batch [100/796] time 0.821 (0.839) data 0.000 (0.006) loss 0.3217 (0.8488) lr 1.5878e-03 eta 5:32:38
epoch [21/50] batch [120/796] time 0.831 (0.838) data 0.000 (0.005) loss 0.5265 (0.8849) lr 1.5878e-03 eta 5:31:53
epoch [21/50] batch [140/796] time 0.844 (0.838) data 0.000 (0.004) loss 0.1591 (0.8798) lr 1.5878e-03 eta 5:31:23
epoch [21/50] batch [160/796] time 0.820 (0.837) data 0.000 (0.004) loss 0.9128 (0.9099) lr 1.5878e-03 eta 5:30:54
epoch [21/50] batch [180/796] time 0.842 (0.837) data 0.000 (0.003) loss 0.5381 (0.8915) lr 1.5878e-03 eta 5:30:42
epoch [21/50] batch [200/796] time 0.833 (0.837) data 0.000 (0.003) loss 1.2457 (0.8929) lr 1.5878e-03 eta 5:30:12
epoch [21/50] batch [220/796] time 0.833 (0.836) data 0.000 (0.003) loss 0.0191 (0.8834) lr 1.5878e-03 eta 5:29:49
epoch [21/50] batch [240/796] time 0.811 (0.836) data 0.000 (0.003) loss 2.2861 (0.8872) lr 1.5878e-03 eta 5:29:27
epoch [21/50] batch [260/796] time 0.820 (0.836) data 0.000 (0.002) loss 3.5701 (0.8941) lr 1.5878e-03 eta 5:29:03
epoch [21/50] batch [280/796] time 0.821 (0.836) data 0.000 (0.002) loss 0.0800 (0.8782) lr 1.5878e-03 eta 5:28:42
epoch [21/50] batch [300/796] time 0.821 (0.835) data 0.000 (0.002) loss 0.6559 (0.8749) lr 1.5878e-03 eta 5:28:18
epoch [21/50] batch [320/796] time 0.821 (0.835) data 0.000 (0.002) loss 1.6868 (0.8780) lr 1.5878e-03 eta 5:27:56
epoch [21/50] batch [340/796] time 0.811 (0.835) data 0.000 (0.002) loss 1.0133 (0.8638) lr 1.5878e-03 eta 5:27:34
epoch [21/50] batch [360/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.7543 (0.8670) lr 1.5878e-03 eta 5:27:17
epoch [21/50] batch [380/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.6712 (0.8683) lr 1.5878e-03 eta 5:27:00
epoch [21/50] batch [400/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.1425 (0.8705) lr 1.5878e-03 eta 5:26:42
epoch [21/50] batch [420/796] time 0.847 (0.835) data 0.000 (0.002) loss 1.6252 (0.8761) lr 1.5878e-03 eta 5:26:24
epoch [21/50] batch [440/796] time 0.843 (0.835) data 0.001 (0.001) loss 0.3349 (0.8747) lr 1.5878e-03 eta 5:26:07
epoch [21/50] batch [460/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.4271 (0.8744) lr 1.5878e-03 eta 5:25:47
epoch [21/50] batch [480/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.1393 (0.8732) lr 1.5878e-03 eta 5:25:30
epoch [21/50] batch [500/796] time 0.811 (0.835) data 0.000 (0.001) loss 0.7895 (0.8575) lr 1.5878e-03 eta 5:25:11
epoch [21/50] batch [520/796] time 0.836 (0.834) data 0.000 (0.001) loss 1.7843 (0.8528) lr 1.5878e-03 eta 5:24:52
epoch [21/50] batch [540/796] time 0.831 (0.834) data 0.000 (0.001) loss 0.7184 (0.8450) lr 1.5878e-03 eta 5:24:36
epoch [21/50] batch [560/796] time 0.824 (0.834) data 0.000 (0.001) loss 0.8549 (0.8372) lr 1.5878e-03 eta 5:24:19
epoch [21/50] batch [580/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.4221 (0.8295) lr 1.5878e-03 eta 5:24:03
epoch [21/50] batch [600/796] time 0.820 (0.834) data 0.001 (0.001) loss 0.2232 (0.8247) lr 1.5878e-03 eta 5:23:44
epoch [21/50] batch [620/796] time 0.810 (0.834) data 0.000 (0.001) loss 0.2550 (0.8173) lr 1.5878e-03 eta 5:23:26
epoch [21/50] batch [640/796] time 0.844 (0.834) data 0.000 (0.001) loss 0.1305 (0.8147) lr 1.5878e-03 eta 5:23:09
epoch [21/50] batch [660/796] time 0.810 (0.834) data 0.000 (0.001) loss 0.3137 (0.8200) lr 1.5878e-03 eta 5:22:51
epoch [21/50] batch [680/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.3539 (0.8209) lr 1.5878e-03 eta 5:22:32
epoch [21/50] batch [700/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.3695 (0.8192) lr 1.5878e-03 eta 5:22:16
epoch [21/50] batch [720/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.7662 (0.8195) lr 1.5878e-03 eta 5:21:59
epoch [21/50] batch [740/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.1134 (0.8222) lr 1.5878e-03 eta 5:21:42
epoch [21/50] batch [760/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.4473 (0.8301) lr 1.5878e-03 eta 5:21:25
epoch [21/50] batch [780/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.7623 (0.8287) lr 1.5878e-03 eta 5:21:08
epoch [22/50] batch [20/796] time 0.843 (0.866) data 0.000 (0.027) loss 3.3114 (1.0179) lr 1.5358e-03 eta 5:32:45
epoch [22/50] batch [40/796] time 0.844 (0.850) data 0.000 (0.014) loss 0.6793 (0.9816) lr 1.5358e-03 eta 5:26:21
epoch [22/50] batch [60/796] time 0.842 (0.844) data 0.000 (0.009) loss 1.4832 (1.0501) lr 1.5358e-03 eta 5:23:58
epoch [22/50] batch [80/796] time 0.842 (0.843) data 0.000 (0.007) loss 0.1392 (0.9474) lr 1.5358e-03 eta 5:23:06
epoch [22/50] batch [100/796] time 0.821 (0.840) data 0.000 (0.006) loss 0.1996 (0.8760) lr 1.5358e-03 eta 5:21:56
epoch [22/50] batch [120/796] time 0.843 (0.840) data 0.000 (0.005) loss 1.1058 (0.8608) lr 1.5358e-03 eta 5:21:21
epoch [22/50] batch [140/796] time 0.842 (0.839) data 0.000 (0.004) loss 0.3679 (0.8435) lr 1.5358e-03 eta 5:20:46
epoch [22/50] batch [160/796] time 0.843 (0.838) data 0.000 (0.004) loss 0.1700 (0.8185) lr 1.5358e-03 eta 5:20:09
epoch [22/50] batch [180/796] time 0.842 (0.837) data 0.000 (0.003) loss 0.5461 (0.8263) lr 1.5358e-03 eta 5:19:41
epoch [22/50] batch [200/796] time 0.843 (0.837) data 0.000 (0.003) loss 1.3050 (0.8283) lr 1.5358e-03 eta 5:19:22
epoch [22/50] batch [220/796] time 0.834 (0.837) data 0.000 (0.003) loss 0.8535 (0.8181) lr 1.5358e-03 eta 5:18:53
epoch [22/50] batch [240/796] time 0.834 (0.837) data 0.000 (0.002) loss 0.3513 (0.8415) lr 1.5358e-03 eta 5:18:40
epoch [22/50] batch [260/796] time 0.843 (0.837) data 0.000 (0.002) loss 0.3068 (0.8715) lr 1.5358e-03 eta 5:18:19
epoch [22/50] batch [280/796] time 0.834 (0.837) data 0.000 (0.002) loss 0.6070 (0.8699) lr 1.5358e-03 eta 5:18:03
epoch [22/50] batch [300/796] time 0.842 (0.837) data 0.000 (0.002) loss 0.5535 (0.8643) lr 1.5358e-03 eta 5:17:43
epoch [22/50] batch [320/796] time 0.832 (0.836) data 0.000 (0.002) loss 0.7937 (0.8522) lr 1.5358e-03 eta 5:17:18
epoch [22/50] batch [340/796] time 0.848 (0.836) data 0.000 (0.002) loss 0.6374 (0.8427) lr 1.5358e-03 eta 5:16:59
epoch [22/50] batch [360/796] time 0.833 (0.836) data 0.000 (0.002) loss 1.9943 (0.8417) lr 1.5358e-03 eta 5:16:38
epoch [22/50] batch [380/796] time 0.843 (0.836) data 0.000 (0.002) loss 3.3637 (0.8503) lr 1.5358e-03 eta 5:16:22
epoch [22/50] batch [400/796] time 0.843 (0.836) data 0.000 (0.002) loss 4.3404 (0.8517) lr 1.5358e-03 eta 5:16:01
epoch [22/50] batch [420/796] time 0.842 (0.836) data 0.000 (0.001) loss 1.0157 (0.8583) lr 1.5358e-03 eta 5:15:43
epoch [22/50] batch [440/796] time 0.842 (0.836) data 0.000 (0.001) loss 0.2533 (0.8580) lr 1.5358e-03 eta 5:15:22
epoch [22/50] batch [460/796] time 0.820 (0.836) data 0.000 (0.001) loss 1.6935 (0.8554) lr 1.5358e-03 eta 5:15:05
epoch [22/50] batch [480/796] time 0.842 (0.836) data 0.000 (0.001) loss 0.1791 (0.8477) lr 1.5358e-03 eta 5:14:46
epoch [22/50] batch [500/796] time 0.820 (0.835) data 0.000 (0.001) loss 1.4444 (0.8488) lr 1.5358e-03 eta 5:14:27
epoch [22/50] batch [520/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.0815 (0.8412) lr 1.5358e-03 eta 5:14:09
epoch [22/50] batch [540/796] time 0.844 (0.835) data 0.000 (0.001) loss 1.0034 (0.8329) lr 1.5358e-03 eta 5:13:52
epoch [22/50] batch [560/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.3976 (0.8316) lr 1.5358e-03 eta 5:13:33
epoch [22/50] batch [580/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.2875 (0.8349) lr 1.5358e-03 eta 5:13:16
epoch [22/50] batch [600/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.5712 (0.8354) lr 1.5358e-03 eta 5:12:57
epoch [22/50] batch [620/796] time 0.845 (0.835) data 0.000 (0.001) loss 1.6043 (0.8411) lr 1.5358e-03 eta 5:12:39
epoch [22/50] batch [640/796] time 0.818 (0.835) data 0.000 (0.001) loss 1.9028 (0.8411) lr 1.5358e-03 eta 5:12:21
epoch [22/50] batch [660/796] time 0.822 (0.835) data 0.000 (0.001) loss 0.8374 (0.8400) lr 1.5358e-03 eta 5:12:03
epoch [22/50] batch [680/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.4971 (0.8403) lr 1.5358e-03 eta 5:11:47
epoch [22/50] batch [700/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.6952 (0.8358) lr 1.5358e-03 eta 5:11:27
epoch [22/50] batch [720/796] time 0.834 (0.835) data 0.000 (0.001) loss 1.6650 (0.8375) lr 1.5358e-03 eta 5:11:09
epoch [22/50] batch [740/796] time 0.845 (0.835) data 0.000 (0.001) loss 1.3348 (0.8382) lr 1.5358e-03 eta 5:10:53
epoch [22/50] batch [760/796] time 0.822 (0.835) data 0.000 (0.001) loss 0.9187 (0.8378) lr 1.5358e-03 eta 5:10:35
epoch [22/50] batch [780/796] time 0.822 (0.835) data 0.000 (0.001) loss 0.6412 (0.8428) lr 1.5358e-03 eta 5:10:17
epoch [23/50] batch [20/796] time 0.832 (0.864) data 0.000 (0.030) loss 1.2147 (0.8778) lr 1.4818e-03 eta 5:20:42
epoch [23/50] batch [40/796] time 0.843 (0.847) data 0.000 (0.015) loss 0.3997 (0.7769) lr 1.4818e-03 eta 5:14:03
epoch [23/50] batch [60/796] time 0.832 (0.842) data 0.001 (0.010) loss 0.9437 (0.8459) lr 1.4818e-03 eta 5:12:01
epoch [23/50] batch [80/796] time 0.826 (0.840) data 0.000 (0.008) loss 1.1956 (0.8942) lr 1.4818e-03 eta 5:11:03
epoch [23/50] batch [100/796] time 0.843 (0.839) data 0.000 (0.006) loss 1.3355 (0.9442) lr 1.4818e-03 eta 5:10:25
epoch [23/50] batch [120/796] time 0.842 (0.839) data 0.000 (0.005) loss 0.3167 (0.9741) lr 1.4818e-03 eta 5:10:07
epoch [23/50] batch [140/796] time 0.845 (0.838) data 0.000 (0.005) loss 0.1548 (0.9482) lr 1.4818e-03 eta 5:09:23
epoch [23/50] batch [160/796] time 0.845 (0.838) data 0.000 (0.004) loss 0.6675 (0.9782) lr 1.4818e-03 eta 5:09:01
epoch [23/50] batch [180/796] time 0.843 (0.837) data 0.000 (0.004) loss 1.1339 (0.9683) lr 1.4818e-03 eta 5:08:33
epoch [23/50] batch [200/796] time 0.847 (0.837) data 0.000 (0.003) loss 0.5251 (0.9562) lr 1.4818e-03 eta 5:08:14
epoch [23/50] batch [220/796] time 0.842 (0.837) data 0.000 (0.003) loss 1.1765 (0.9372) lr 1.4818e-03 eta 5:07:48
epoch [23/50] batch [240/796] time 0.844 (0.837) data 0.000 (0.003) loss 0.4272 (0.9174) lr 1.4818e-03 eta 5:07:23
epoch [23/50] batch [260/796] time 0.821 (0.836) data 0.000 (0.003) loss 1.2850 (0.9048) lr 1.4818e-03 eta 5:06:58
epoch [23/50] batch [280/796] time 0.847 (0.836) data 0.000 (0.002) loss 0.1796 (0.8996) lr 1.4818e-03 eta 5:06:42
epoch [23/50] batch [300/796] time 0.823 (0.836) data 0.000 (0.002) loss 0.7407 (0.8931) lr 1.4818e-03 eta 5:06:27
epoch [23/50] batch [320/796] time 0.843 (0.836) data 0.000 (0.002) loss 1.4074 (0.8824) lr 1.4818e-03 eta 5:06:12
epoch [23/50] batch [340/796] time 0.833 (0.836) data 0.000 (0.002) loss 0.1546 (0.8670) lr 1.4818e-03 eta 5:05:50
epoch [23/50] batch [360/796] time 0.847 (0.836) data 0.000 (0.002) loss 0.2222 (0.8567) lr 1.4818e-03 eta 5:05:33
epoch [23/50] batch [380/796] time 0.843 (0.836) data 0.000 (0.002) loss 1.0457 (0.8446) lr 1.4818e-03 eta 5:05:13
epoch [23/50] batch [400/796] time 0.821 (0.836) data 0.000 (0.002) loss 0.5124 (0.8399) lr 1.4818e-03 eta 5:04:53
epoch [23/50] batch [420/796] time 0.821 (0.836) data 0.000 (0.002) loss 0.1674 (0.8381) lr 1.4818e-03 eta 5:04:33
epoch [23/50] batch [440/796] time 0.844 (0.836) data 0.000 (0.002) loss 0.5256 (0.8443) lr 1.4818e-03 eta 5:04:19
epoch [23/50] batch [460/796] time 0.812 (0.836) data 0.000 (0.002) loss 0.4014 (0.8431) lr 1.4818e-03 eta 5:03:59
epoch [23/50] batch [480/796] time 0.833 (0.836) data 0.000 (0.001) loss 0.1318 (0.8331) lr 1.4818e-03 eta 5:03:41
epoch [23/50] batch [500/796] time 0.844 (0.836) data 0.000 (0.001) loss 0.2761 (0.8332) lr 1.4818e-03 eta 5:03:25
epoch [23/50] batch [520/796] time 0.820 (0.836) data 0.000 (0.001) loss 0.1293 (0.8317) lr 1.4818e-03 eta 5:03:08
epoch [23/50] batch [540/796] time 0.832 (0.836) data 0.000 (0.001) loss 2.4552 (0.8314) lr 1.4818e-03 eta 5:02:50
epoch [23/50] batch [560/796] time 0.834 (0.835) data 0.000 (0.001) loss 1.0844 (0.8337) lr 1.4818e-03 eta 5:02:31
epoch [23/50] batch [580/796] time 0.834 (0.835) data 0.000 (0.001) loss 0.0607 (0.8347) lr 1.4818e-03 eta 5:02:14
epoch [23/50] batch [600/796] time 0.842 (0.835) data 0.001 (0.001) loss 0.4630 (0.8306) lr 1.4818e-03 eta 5:01:56
epoch [23/50] batch [620/796] time 0.822 (0.835) data 0.000 (0.001) loss 0.8673 (0.8275) lr 1.4818e-03 eta 5:01:37
epoch [23/50] batch [640/796] time 0.812 (0.835) data 0.000 (0.001) loss 0.3007 (0.8263) lr 1.4818e-03 eta 5:01:19
epoch [23/50] batch [660/796] time 0.825 (0.835) data 0.000 (0.001) loss 0.3210 (0.8194) lr 1.4818e-03 eta 5:01:02
epoch [23/50] batch [680/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.6970 (0.8225) lr 1.4818e-03 eta 5:00:44
epoch [23/50] batch [700/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.3402 (0.8233) lr 1.4818e-03 eta 5:00:27
epoch [23/50] batch [720/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.4481 (0.8195) lr 1.4818e-03 eta 5:00:10
epoch [23/50] batch [740/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.7334 (0.8221) lr 1.4818e-03 eta 4:59:52
epoch [23/50] batch [760/796] time 0.834 (0.835) data 0.000 (0.001) loss 0.0785 (0.8195) lr 1.4818e-03 eta 4:59:34
epoch [23/50] batch [780/796] time 0.832 (0.835) data 0.000 (0.001) loss 0.3125 (0.8206) lr 1.4818e-03 eta 4:59:15
epoch [24/50] batch [20/796] time 0.842 (0.863) data 0.000 (0.028) loss 3.4143 (0.8603) lr 1.4258e-03 eta 5:08:59
epoch [24/50] batch [40/796] time 0.832 (0.848) data 0.000 (0.014) loss 0.9655 (0.9157) lr 1.4258e-03 eta 5:03:05
epoch [24/50] batch [60/796] time 0.833 (0.843) data 0.001 (0.010) loss 0.3682 (0.8257) lr 1.4258e-03 eta 5:01:12
epoch [24/50] batch [80/796] time 0.820 (0.840) data 0.000 (0.007) loss 2.0913 (0.8104) lr 1.4258e-03 eta 4:59:52
epoch [24/50] batch [100/796] time 0.835 (0.839) data 0.000 (0.006) loss 0.3444 (0.8570) lr 1.4258e-03 eta 4:59:05
epoch [24/50] batch [120/796] time 0.812 (0.838) data 0.000 (0.005) loss 2.8622 (0.8592) lr 1.4258e-03 eta 4:58:23
epoch [24/50] batch [140/796] time 0.848 (0.837) data 0.000 (0.004) loss 0.4901 (0.8623) lr 1.4258e-03 eta 4:57:54
epoch [24/50] batch [160/796] time 0.842 (0.837) data 0.000 (0.004) loss 0.9095 (0.8739) lr 1.4258e-03 eta 4:57:26
epoch [24/50] batch [180/796] time 0.842 (0.837) data 0.000 (0.003) loss 0.9429 (0.8666) lr 1.4258e-03 eta 4:57:07
epoch [24/50] batch [200/796] time 0.843 (0.836) data 0.000 (0.003) loss 0.4560 (0.8630) lr 1.4258e-03 eta 4:56:48
epoch [24/50] batch [220/796] time 0.820 (0.836) data 0.000 (0.003) loss 1.1545 (0.8473) lr 1.4258e-03 eta 4:56:29
epoch [24/50] batch [240/796] time 0.849 (0.836) data 0.000 (0.003) loss 0.5777 (0.8219) lr 1.4258e-03 eta 4:56:10
epoch [24/50] batch [260/796] time 0.844 (0.836) data 0.000 (0.002) loss 0.7154 (0.8300) lr 1.4258e-03 eta 4:55:48
epoch [24/50] batch [280/796] time 0.810 (0.836) data 0.000 (0.002) loss 0.5347 (0.8183) lr 1.4258e-03 eta 4:55:25
epoch [24/50] batch [300/796] time 0.821 (0.836) data 0.000 (0.002) loss 3.0602 (0.8186) lr 1.4258e-03 eta 4:55:06
epoch [24/50] batch [320/796] time 0.824 (0.835) data 0.000 (0.002) loss 0.6964 (0.8192) lr 1.4258e-03 eta 4:54:45
epoch [24/50] batch [340/796] time 0.823 (0.835) data 0.000 (0.002) loss 0.5887 (0.8205) lr 1.4258e-03 eta 4:54:30
epoch [24/50] batch [360/796] time 0.822 (0.835) data 0.000 (0.002) loss 0.5840 (0.8214) lr 1.4258e-03 eta 4:54:09
epoch [24/50] batch [380/796] time 0.843 (0.835) data 0.000 (0.002) loss 1.5718 (0.8182) lr 1.4258e-03 eta 4:53:51
epoch [24/50] batch [400/796] time 0.834 (0.835) data 0.000 (0.002) loss 0.0779 (0.8193) lr 1.4258e-03 eta 4:53:35
epoch [24/50] batch [420/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.4915 (0.8298) lr 1.4258e-03 eta 4:53:17
epoch [24/50] batch [440/796] time 0.822 (0.835) data 0.000 (0.001) loss 1.9252 (0.8277) lr 1.4258e-03 eta 4:52:57
epoch [24/50] batch [460/796] time 0.844 (0.835) data 0.000 (0.001) loss 1.0651 (0.8250) lr 1.4258e-03 eta 4:52:39
epoch [24/50] batch [480/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.8484 (0.8177) lr 1.4258e-03 eta 4:52:24
epoch [24/50] batch [500/796] time 0.842 (0.835) data 0.000 (0.001) loss 2.5702 (0.8211) lr 1.4258e-03 eta 4:52:05
epoch [24/50] batch [520/796] time 0.841 (0.835) data 0.000 (0.001) loss 0.6336 (0.8206) lr 1.4258e-03 eta 4:51:48
epoch [24/50] batch [540/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.0553 (0.8234) lr 1.4258e-03 eta 4:51:29
epoch [24/50] batch [560/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.4525 (0.8251) lr 1.4258e-03 eta 4:51:11
epoch [24/50] batch [580/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.9563 (0.8189) lr 1.4258e-03 eta 4:50:55
epoch [24/50] batch [600/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.2096 (0.8178) lr 1.4258e-03 eta 4:50:36
epoch [24/50] batch [620/796] time 0.842 (0.835) data 0.000 (0.001) loss 1.5369 (0.8134) lr 1.4258e-03 eta 4:50:20
epoch [24/50] batch [640/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.3914 (0.8204) lr 1.4258e-03 eta 4:50:03
epoch [24/50] batch [660/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.2263 (0.8248) lr 1.4258e-03 eta 4:49:47
epoch [24/50] batch [680/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.0744 (0.8240) lr 1.4258e-03 eta 4:49:29
epoch [24/50] batch [700/796] time 0.820 (0.835) data 0.000 (0.001) loss 1.9099 (0.8310) lr 1.4258e-03 eta 4:49:13
epoch [24/50] batch [720/796] time 0.834 (0.835) data 0.000 (0.001) loss 0.3026 (0.8283) lr 1.4258e-03 eta 4:48:56
epoch [24/50] batch [740/796] time 0.832 (0.835) data 0.000 (0.001) loss 0.5658 (0.8229) lr 1.4258e-03 eta 4:48:40
epoch [24/50] batch [760/796] time 0.846 (0.835) data 0.000 (0.001) loss 1.7991 (0.8195) lr 1.4258e-03 eta 4:48:22
epoch [24/50] batch [780/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.1150 (0.8226) lr 1.4258e-03 eta 4:48:05
epoch [25/50] batch [20/796] time 0.842 (0.864) data 0.000 (0.029) loss 0.4787 (0.6329) lr 1.3681e-03 eta 4:57:50
epoch [25/50] batch [40/796] time 0.843 (0.850) data 0.000 (0.015) loss 0.7360 (0.6743) lr 1.3681e-03 eta 4:52:39
epoch [25/50] batch [60/796] time 0.842 (0.844) data 0.000 (0.010) loss 0.8801 (0.6990) lr 1.3681e-03 eta 4:50:10
epoch [25/50] batch [80/796] time 0.842 (0.842) data 0.000 (0.007) loss 0.4551 (0.7370) lr 1.3681e-03 eta 4:49:19
epoch [25/50] batch [100/796] time 0.821 (0.840) data 0.000 (0.006) loss 0.7060 (0.7476) lr 1.3681e-03 eta 4:48:27
epoch [25/50] batch [120/796] time 0.820 (0.839) data 0.000 (0.005) loss 1.4070 (0.7958) lr 1.3681e-03 eta 4:47:42
epoch [25/50] batch [140/796] time 0.847 (0.838) data 0.000 (0.004) loss 0.0613 (0.7862) lr 1.3681e-03 eta 4:47:13
epoch [25/50] batch [160/796] time 0.833 (0.838) data 0.000 (0.004) loss 1.5526 (0.7961) lr 1.3681e-03 eta 4:46:41
epoch [25/50] batch [180/796] time 0.833 (0.837) data 0.000 (0.003) loss 1.5703 (0.8209) lr 1.3681e-03 eta 4:46:13
epoch [25/50] batch [200/796] time 0.844 (0.837) data 0.000 (0.003) loss 0.8824 (0.8401) lr 1.3681e-03 eta 4:45:52
epoch [25/50] batch [220/796] time 0.832 (0.837) data 0.000 (0.003) loss 0.5190 (0.8532) lr 1.3681e-03 eta 4:45:36
epoch [25/50] batch [240/796] time 0.843 (0.837) data 0.000 (0.003) loss 2.3481 (0.8558) lr 1.3681e-03 eta 4:45:12
epoch [25/50] batch [260/796] time 0.843 (0.837) data 0.000 (0.002) loss 0.2687 (0.8567) lr 1.3681e-03 eta 4:44:55
epoch [25/50] batch [280/796] time 0.844 (0.836) data 0.000 (0.002) loss 0.4747 (0.8575) lr 1.3681e-03 eta 4:44:34
epoch [25/50] batch [300/796] time 0.843 (0.836) data 0.000 (0.002) loss 0.2197 (0.8550) lr 1.3681e-03 eta 4:44:16
epoch [25/50] batch [320/796] time 0.847 (0.836) data 0.000 (0.002) loss 1.1955 (0.8583) lr 1.3681e-03 eta 4:43:56
epoch [25/50] batch [340/796] time 0.833 (0.836) data 0.000 (0.002) loss 0.0726 (0.8464) lr 1.3681e-03 eta 4:43:40
epoch [25/50] batch [360/796] time 0.822 (0.836) data 0.000 (0.002) loss 1.3280 (0.8467) lr 1.3681e-03 eta 4:43:18
epoch [25/50] batch [380/796] time 0.810 (0.836) data 0.000 (0.002) loss 1.0958 (0.8446) lr 1.3681e-03 eta 4:42:58
epoch [25/50] batch [400/796] time 0.832 (0.836) data 0.000 (0.002) loss 3.7483 (0.8631) lr 1.3681e-03 eta 4:42:37
epoch [25/50] batch [420/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.4936 (0.8560) lr 1.3681e-03 eta 4:42:19
epoch [25/50] batch [440/796] time 0.846 (0.835) data 0.000 (0.002) loss 0.6533 (0.8429) lr 1.3681e-03 eta 4:42:03
epoch [25/50] batch [460/796] time 0.811 (0.835) data 0.000 (0.001) loss 1.0654 (0.8434) lr 1.3681e-03 eta 4:41:41
epoch [25/50] batch [480/796] time 0.820 (0.835) data 0.000 (0.001) loss 2.7482 (0.8455) lr 1.3681e-03 eta 4:41:22
epoch [25/50] batch [500/796] time 0.838 (0.835) data 0.000 (0.001) loss 1.8577 (0.8437) lr 1.3681e-03 eta 4:41:06
epoch [25/50] batch [520/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.3122 (0.8497) lr 1.3681e-03 eta 4:40:49
epoch [25/50] batch [540/796] time 0.810 (0.835) data 0.000 (0.001) loss 0.7614 (0.8532) lr 1.3681e-03 eta 4:40:32
epoch [25/50] batch [560/796] time 0.851 (0.835) data 0.000 (0.001) loss 1.4528 (0.8507) lr 1.3681e-03 eta 4:40:15
epoch [25/50] batch [580/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.3233 (0.8496) lr 1.3681e-03 eta 4:39:57
epoch [25/50] batch [600/796] time 0.832 (0.835) data 0.001 (0.001) loss 0.3374 (0.8470) lr 1.3681e-03 eta 4:39:37
epoch [25/50] batch [620/796] time 0.820 (0.835) data 0.000 (0.001) loss 1.1651 (0.8471) lr 1.3681e-03 eta 4:39:18
epoch [25/50] batch [640/796] time 0.845 (0.835) data 0.000 (0.001) loss 0.3149 (0.8401) lr 1.3681e-03 eta 4:39:02
epoch [25/50] batch [660/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.0666 (0.8480) lr 1.3681e-03 eta 4:38:45
epoch [25/50] batch [680/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.8255 (0.8510) lr 1.3681e-03 eta 4:38:27
epoch [25/50] batch [700/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.7133 (0.8434) lr 1.3681e-03 eta 4:38:09
epoch [25/50] batch [720/796] time 0.834 (0.835) data 0.000 (0.001) loss 0.0095 (0.8371) lr 1.3681e-03 eta 4:37:52
epoch [25/50] batch [740/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.0549 (0.8365) lr 1.3681e-03 eta 4:37:35
epoch [25/50] batch [760/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.4120 (0.8374) lr 1.3681e-03 eta 4:37:18
epoch [25/50] batch [780/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.6252 (0.8289) lr 1.3681e-03 eta 4:37:01
epoch [26/50] batch [20/796] time 0.844 (0.861) data 0.000 (0.026) loss 1.7848 (0.7326) lr 1.3090e-03 eta 4:45:06
epoch [26/50] batch [40/796] time 0.843 (0.847) data 0.000 (0.013) loss 0.9123 (0.8003) lr 1.3090e-03 eta 4:40:12
epoch [26/50] batch [60/796] time 0.843 (0.842) data 0.000 (0.009) loss 0.4389 (0.8482) lr 1.3090e-03 eta 4:38:32
epoch [26/50] batch [80/796] time 0.820 (0.841) data 0.000 (0.007) loss 2.7698 (0.8485) lr 1.3090e-03 eta 4:37:39
epoch [26/50] batch [100/796] time 0.821 (0.838) data 0.000 (0.005) loss 1.1549 (0.8852) lr 1.3090e-03 eta 4:36:40
epoch [26/50] batch [120/796] time 0.843 (0.838) data 0.000 (0.004) loss 0.0950 (0.8929) lr 1.3090e-03 eta 4:36:15
epoch [26/50] batch [140/796] time 0.842 (0.837) data 0.000 (0.004) loss 0.4149 (0.8908) lr 1.3090e-03 eta 4:35:42
epoch [26/50] batch [160/796] time 0.842 (0.836) data 0.000 (0.003) loss 1.2142 (0.8733) lr 1.3090e-03 eta 4:35:12
epoch [26/50] batch [180/796] time 0.820 (0.836) data 0.000 (0.003) loss 0.4540 (0.8746) lr 1.3090e-03 eta 4:34:53
epoch [26/50] batch [200/796] time 0.842 (0.836) data 0.000 (0.003) loss 0.1216 (0.8576) lr 1.3090e-03 eta 4:34:34
epoch [26/50] batch [220/796] time 0.833 (0.836) data 0.000 (0.002) loss 0.6972 (0.8364) lr 1.3090e-03 eta 4:34:10
epoch [26/50] batch [240/796] time 0.848 (0.836) data 0.000 (0.002) loss 0.2174 (0.8198) lr 1.3090e-03 eta 4:33:50
epoch [26/50] batch [260/796] time 0.822 (0.836) data 0.000 (0.002) loss 1.4069 (0.8123) lr 1.3090e-03 eta 4:33:30
epoch [26/50] batch [280/796] time 0.821 (0.835) data 0.000 (0.002) loss 1.5729 (0.8105) lr 1.3090e-03 eta 4:33:11
epoch [26/50] batch [300/796] time 0.820 (0.836) data 0.000 (0.002) loss 0.3669 (0.8062) lr 1.3090e-03 eta 4:32:56
epoch [26/50] batch [320/796] time 0.833 (0.835) data 0.000 (0.002) loss 0.2041 (0.8038) lr 1.3090e-03 eta 4:32:36
epoch [26/50] batch [340/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.4152 (0.7922) lr 1.3090e-03 eta 4:32:18
epoch [26/50] batch [360/796] time 0.844 (0.835) data 0.000 (0.002) loss 1.3298 (0.7897) lr 1.3090e-03 eta 4:32:00
epoch [26/50] batch [380/796] time 0.834 (0.835) data 0.000 (0.002) loss 1.5018 (0.7842) lr 1.3090e-03 eta 4:31:44
epoch [26/50] batch [400/796] time 0.835 (0.835) data 0.000 (0.001) loss 0.4648 (0.7842) lr 1.3090e-03 eta 4:31:24
epoch [26/50] batch [420/796] time 0.823 (0.835) data 0.000 (0.001) loss 0.1331 (0.8009) lr 1.3090e-03 eta 4:31:09
epoch [26/50] batch [440/796] time 0.822 (0.835) data 0.000 (0.001) loss 0.5398 (0.7914) lr 1.3090e-03 eta 4:30:51
epoch [26/50] batch [460/796] time 0.845 (0.835) data 0.000 (0.001) loss 0.2923 (0.7892) lr 1.3090e-03 eta 4:30:36
epoch [26/50] batch [480/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.1540 (0.7924) lr 1.3090e-03 eta 4:30:21
epoch [26/50] batch [500/796] time 0.845 (0.835) data 0.000 (0.001) loss 0.9221 (0.7949) lr 1.3090e-03 eta 4:30:04
epoch [26/50] batch [520/796] time 0.835 (0.835) data 0.000 (0.001) loss 2.7874 (0.8020) lr 1.3090e-03 eta 4:29:48
epoch [26/50] batch [540/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.1362 (0.8127) lr 1.3090e-03 eta 4:29:29
epoch [26/50] batch [560/796] time 0.836 (0.835) data 0.000 (0.001) loss 0.1598 (0.8124) lr 1.3090e-03 eta 4:29:11
epoch [26/50] batch [580/796] time 0.822 (0.835) data 0.000 (0.001) loss 0.2785 (0.8172) lr 1.3090e-03 eta 4:28:51
epoch [26/50] batch [600/796] time 0.852 (0.835) data 0.000 (0.001) loss 3.2901 (0.8216) lr 1.3090e-03 eta 4:28:33
epoch [26/50] batch [620/796] time 0.832 (0.835) data 0.000 (0.001) loss 1.1846 (0.8300) lr 1.3090e-03 eta 4:28:15
epoch [26/50] batch [640/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.3605 (0.8291) lr 1.3090e-03 eta 4:27:55
epoch [26/50] batch [660/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.8452 (0.8243) lr 1.3090e-03 eta 4:27:39
epoch [26/50] batch [680/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.6818 (0.8217) lr 1.3090e-03 eta 4:27:23
epoch [26/50] batch [700/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.8281 (0.8224) lr 1.3090e-03 eta 4:27:06
epoch [26/50] batch [720/796] time 0.811 (0.835) data 0.000 (0.001) loss 2.6898 (0.8319) lr 1.3090e-03 eta 4:26:46
epoch [26/50] batch [740/796] time 0.841 (0.835) data 0.000 (0.001) loss 1.7987 (0.8337) lr 1.3090e-03 eta 4:26:29
epoch [26/50] batch [760/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.5719 (0.8277) lr 1.3090e-03 eta 4:26:13
epoch [26/50] batch [780/796] time 0.810 (0.835) data 0.000 (0.001) loss 1.0724 (0.8295) lr 1.3090e-03 eta 4:25:56
epoch [27/50] batch [20/796] time 0.832 (0.858) data 0.000 (0.028) loss 0.9303 (0.9356) lr 1.2487e-03 eta 4:32:58
epoch [27/50] batch [40/796] time 0.821 (0.846) data 0.000 (0.014) loss 1.1931 (0.7803) lr 1.2487e-03 eta 4:28:54
epoch [27/50] batch [60/796] time 0.833 (0.842) data 0.000 (0.009) loss 0.1864 (0.7637) lr 1.2487e-03 eta 4:27:07
epoch [27/50] batch [80/796] time 0.823 (0.840) data 0.000 (0.007) loss 0.8073 (0.7348) lr 1.2487e-03 eta 4:26:21
epoch [27/50] batch [100/796] time 0.822 (0.839) data 0.000 (0.006) loss 1.1180 (0.7909) lr 1.2487e-03 eta 4:25:36
epoch [27/50] batch [120/796] time 0.827 (0.838) data 0.000 (0.005) loss 1.4712 (0.8391) lr 1.2487e-03 eta 4:25:05
epoch [27/50] batch [140/796] time 0.832 (0.837) data 0.000 (0.004) loss 0.6751 (0.8409) lr 1.2487e-03 eta 4:24:41
epoch [27/50] batch [160/796] time 0.843 (0.837) data 0.000 (0.004) loss 0.6081 (0.8136) lr 1.2487e-03 eta 4:24:21
epoch [27/50] batch [180/796] time 0.819 (0.837) data 0.000 (0.003) loss 0.1558 (0.8413) lr 1.2487e-03 eta 4:23:57
epoch [27/50] batch [200/796] time 0.832 (0.836) data 0.000 (0.003) loss 0.2033 (0.8279) lr 1.2487e-03 eta 4:23:31
epoch [27/50] batch [220/796] time 0.823 (0.836) data 0.000 (0.003) loss 1.4002 (0.8117) lr 1.2487e-03 eta 4:23:08
epoch [27/50] batch [240/796] time 0.821 (0.836) data 0.000 (0.002) loss 1.5523 (0.8501) lr 1.2487e-03 eta 4:22:49
epoch [27/50] batch [260/796] time 0.843 (0.836) data 0.000 (0.002) loss 0.1317 (0.8329) lr 1.2487e-03 eta 4:22:29
epoch [27/50] batch [280/796] time 0.842 (0.836) data 0.000 (0.002) loss 1.4592 (0.8135) lr 1.2487e-03 eta 4:22:07
epoch [27/50] batch [300/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.8338 (0.8443) lr 1.2487e-03 eta 4:21:47
epoch [27/50] batch [320/796] time 0.846 (0.835) data 0.000 (0.002) loss 0.7748 (0.8446) lr 1.2487e-03 eta 4:21:28
epoch [27/50] batch [340/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.5192 (0.8460) lr 1.2487e-03 eta 4:21:12
epoch [27/50] batch [360/796] time 0.843 (0.835) data 0.000 (0.002) loss 1.5640 (0.8397) lr 1.2487e-03 eta 4:20:55
epoch [27/50] batch [380/796] time 0.841 (0.835) data 0.000 (0.002) loss 0.8601 (0.8308) lr 1.2487e-03 eta 4:20:38
epoch [27/50] batch [400/796] time 0.843 (0.835) data 0.000 (0.002) loss 1.2917 (0.8193) lr 1.2487e-03 eta 4:20:19
epoch [27/50] batch [420/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.3292 (0.8125) lr 1.2487e-03 eta 4:20:04
epoch [27/50] batch [440/796] time 0.841 (0.835) data 0.000 (0.001) loss 1.7246 (0.8229) lr 1.2487e-03 eta 4:19:44
epoch [27/50] batch [460/796] time 0.833 (0.835) data 0.000 (0.001) loss 1.1859 (0.8281) lr 1.2487e-03 eta 4:19:24
epoch [27/50] batch [480/796] time 0.832 (0.835) data 0.000 (0.001) loss 0.4905 (0.8297) lr 1.2487e-03 eta 4:19:07
epoch [27/50] batch [500/796] time 0.844 (0.835) data 0.000 (0.001) loss 1.1607 (0.8274) lr 1.2487e-03 eta 4:18:52
epoch [27/50] batch [520/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.1989 (0.8257) lr 1.2487e-03 eta 4:18:35
epoch [27/50] batch [540/796] time 0.833 (0.835) data 0.000 (0.001) loss 1.0145 (0.8265) lr 1.2487e-03 eta 4:18:18
epoch [27/50] batch [560/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.6286 (0.8338) lr 1.2487e-03 eta 4:18:01
epoch [27/50] batch [580/796] time 0.832 (0.835) data 0.000 (0.001) loss 0.3072 (0.8387) lr 1.2487e-03 eta 4:17:43
epoch [27/50] batch [600/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.5996 (0.8380) lr 1.2487e-03 eta 4:17:27
epoch [27/50] batch [620/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.0815 (0.8314) lr 1.2487e-03 eta 4:17:11
epoch [27/50] batch [640/796] time 0.834 (0.835) data 0.000 (0.001) loss 0.1813 (0.8301) lr 1.2487e-03 eta 4:16:52
epoch [27/50] batch [660/796] time 0.822 (0.835) data 0.000 (0.001) loss 0.6327 (0.8324) lr 1.2487e-03 eta 4:16:34
epoch [27/50] batch [680/796] time 0.841 (0.835) data 0.000 (0.001) loss 0.2408 (0.8361) lr 1.2487e-03 eta 4:16:18
epoch [27/50] batch [700/796] time 0.811 (0.835) data 0.000 (0.001) loss 1.1662 (0.8299) lr 1.2487e-03 eta 4:16:00
epoch [27/50] batch [720/796] time 0.841 (0.835) data 0.000 (0.001) loss 0.3524 (0.8290) lr 1.2487e-03 eta 4:15:43
epoch [27/50] batch [740/796] time 0.829 (0.835) data 0.000 (0.001) loss 0.4681 (0.8311) lr 1.2487e-03 eta 4:15:25
epoch [27/50] batch [760/796] time 0.841 (0.835) data 0.000 (0.001) loss 0.7478 (0.8279) lr 1.2487e-03 eta 4:15:08
epoch [27/50] batch [780/796] time 0.832 (0.835) data 0.000 (0.001) loss 1.9936 (0.8345) lr 1.2487e-03 eta 4:14:51
epoch [28/50] batch [20/796] time 0.821 (0.869) data 0.000 (0.032) loss 0.6067 (0.6516) lr 1.1874e-03 eta 4:24:58
epoch [28/50] batch [40/796] time 0.839 (0.851) data 0.000 (0.016) loss 1.3110 (0.7383) lr 1.1874e-03 eta 4:19:08
epoch [28/50] batch [60/796] time 0.821 (0.845) data 0.000 (0.011) loss 1.3348 (0.8714) lr 1.1874e-03 eta 4:17:03
epoch [28/50] batch [80/796] time 0.832 (0.842) data 0.000 (0.008) loss 0.3837 (0.7952) lr 1.1874e-03 eta 4:15:54
epoch [28/50] batch [100/796] time 0.843 (0.841) data 0.000 (0.007) loss 0.0521 (0.7831) lr 1.1874e-03 eta 4:15:10
epoch [28/50] batch [120/796] time 0.822 (0.840) data 0.000 (0.006) loss 0.1823 (0.7514) lr 1.1874e-03 eta 4:14:38
epoch [28/50] batch [140/796] time 0.848 (0.839) data 0.000 (0.005) loss 1.5049 (0.7898) lr 1.1874e-03 eta 4:14:09
epoch [28/50] batch [160/796] time 0.843 (0.838) data 0.000 (0.004) loss 0.4987 (0.7925) lr 1.1874e-03 eta 4:13:35
epoch [28/50] batch [180/796] time 0.842 (0.838) data 0.000 (0.004) loss 1.5823 (0.8040) lr 1.1874e-03 eta 4:13:12
epoch [28/50] batch [200/796] time 0.833 (0.838) data 0.000 (0.003) loss 0.8853 (0.8018) lr 1.1874e-03 eta 4:12:46
epoch [28/50] batch [220/796] time 0.839 (0.837) data 0.000 (0.003) loss 0.3305 (0.8021) lr 1.1874e-03 eta 4:12:25
epoch [28/50] batch [240/796] time 0.822 (0.837) data 0.000 (0.003) loss 0.7359 (0.7930) lr 1.1874e-03 eta 4:12:04
epoch [28/50] batch [260/796] time 0.842 (0.837) data 0.000 (0.003) loss 1.1152 (0.7845) lr 1.1874e-03 eta 4:11:39
epoch [28/50] batch [280/796] time 0.832 (0.836) data 0.000 (0.002) loss 0.3224 (0.8060) lr 1.1874e-03 eta 4:11:17
epoch [28/50] batch [300/796] time 0.833 (0.836) data 0.000 (0.002) loss 0.3897 (0.8072) lr 1.1874e-03 eta 4:10:58
epoch [28/50] batch [320/796] time 0.832 (0.836) data 0.000 (0.002) loss 0.1090 (0.8173) lr 1.1874e-03 eta 4:10:38
epoch [28/50] batch [340/796] time 0.820 (0.836) data 0.000 (0.002) loss 0.6347 (0.8131) lr 1.1874e-03 eta 4:10:20
epoch [28/50] batch [360/796] time 0.847 (0.836) data 0.000 (0.002) loss 0.8822 (0.8317) lr 1.1874e-03 eta 4:10:02
epoch [28/50] batch [380/796] time 0.842 (0.836) data 0.000 (0.002) loss 1.3036 (0.8387) lr 1.1874e-03 eta 4:09:42
epoch [28/50] batch [400/796] time 0.820 (0.836) data 0.000 (0.002) loss 0.8848 (0.8452) lr 1.1874e-03 eta 4:09:22
epoch [28/50] batch [420/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.7385 (0.8463) lr 1.1874e-03 eta 4:09:03
epoch [28/50] batch [440/796] time 0.845 (0.835) data 0.000 (0.002) loss 1.6889 (0.8494) lr 1.1874e-03 eta 4:08:47
epoch [28/50] batch [460/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.9860 (0.8474) lr 1.1874e-03 eta 4:08:30
epoch [28/50] batch [480/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.3102 (0.8455) lr 1.1874e-03 eta 4:08:11
epoch [28/50] batch [500/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.6346 (0.8469) lr 1.1874e-03 eta 4:07:54
epoch [28/50] batch [520/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.3249 (0.8351) lr 1.1874e-03 eta 4:07:36
epoch [28/50] batch [540/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.8899 (0.8399) lr 1.1874e-03 eta 4:07:18
epoch [28/50] batch [560/796] time 0.836 (0.835) data 0.000 (0.001) loss 0.8445 (0.8412) lr 1.1874e-03 eta 4:07:00
epoch [28/50] batch [580/796] time 0.822 (0.835) data 0.000 (0.001) loss 0.2570 (0.8364) lr 1.1874e-03 eta 4:06:42
epoch [28/50] batch [600/796] time 0.844 (0.835) data 0.000 (0.001) loss 1.2146 (0.8270) lr 1.1874e-03 eta 4:06:25
epoch [28/50] batch [620/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.3695 (0.8258) lr 1.1874e-03 eta 4:06:08
epoch [28/50] batch [640/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.5314 (0.8151) lr 1.1874e-03 eta 4:05:50
epoch [28/50] batch [660/796] time 0.823 (0.835) data 0.000 (0.001) loss 1.2860 (0.8140) lr 1.1874e-03 eta 4:05:31
epoch [28/50] batch [680/796] time 0.831 (0.835) data 0.000 (0.001) loss 0.5496 (0.8095) lr 1.1874e-03 eta 4:05:13
epoch [28/50] batch [700/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.4605 (0.8078) lr 1.1874e-03 eta 4:04:57
epoch [28/50] batch [720/796] time 0.821 (0.835) data 0.000 (0.001) loss 1.8224 (0.8074) lr 1.1874e-03 eta 4:04:40
epoch [28/50] batch [740/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.5916 (0.8048) lr 1.1874e-03 eta 4:04:22
epoch [28/50] batch [760/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.4726 (0.8001) lr 1.1874e-03 eta 4:04:06
epoch [28/50] batch [780/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.1482 (0.8011) lr 1.1874e-03 eta 4:03:50
epoch [29/50] batch [20/796] time 0.842 (0.859) data 0.000 (0.025) loss 2.6869 (1.1352) lr 1.1253e-03 eta 4:10:23
epoch [29/50] batch [40/796] time 0.821 (0.847) data 0.000 (0.013) loss 1.9886 (0.9617) lr 1.1253e-03 eta 4:06:35
epoch [29/50] batch [60/796] time 0.842 (0.843) data 0.000 (0.008) loss 1.0421 (0.8922) lr 1.1253e-03 eta 4:05:17
epoch [29/50] batch [80/796] time 0.843 (0.841) data 0.000 (0.006) loss 0.7870 (0.8570) lr 1.1253e-03 eta 4:04:29
epoch [29/50] batch [100/796] time 0.842 (0.840) data 0.000 (0.005) loss 0.9089 (0.8601) lr 1.1253e-03 eta 4:03:44
epoch [29/50] batch [120/796] time 0.844 (0.838) data 0.000 (0.004) loss 0.4077 (0.8725) lr 1.1253e-03 eta 4:03:01
epoch [29/50] batch [140/796] time 0.842 (0.838) data 0.000 (0.004) loss 0.8525 (0.8484) lr 1.1253e-03 eta 4:02:29
epoch [29/50] batch [160/796] time 0.843 (0.837) data 0.000 (0.003) loss 0.9014 (0.8530) lr 1.1253e-03 eta 4:02:04
epoch [29/50] batch [180/796] time 0.843 (0.837) data 0.000 (0.003) loss 1.0948 (0.8422) lr 1.1253e-03 eta 4:01:45
epoch [29/50] batch [200/796] time 0.832 (0.837) data 0.000 (0.003) loss 1.8880 (0.8367) lr 1.1253e-03 eta 4:01:24
epoch [29/50] batch [220/796] time 0.822 (0.837) data 0.000 (0.002) loss 0.7349 (0.8305) lr 1.1253e-03 eta 4:01:04
epoch [29/50] batch [240/796] time 0.844 (0.836) data 0.000 (0.002) loss 0.1993 (0.8105) lr 1.1253e-03 eta 4:00:47
epoch [29/50] batch [260/796] time 0.845 (0.837) data 0.000 (0.002) loss 0.6624 (0.7974) lr 1.1253e-03 eta 4:00:32
epoch [29/50] batch [280/796] time 0.829 (0.836) data 0.000 (0.002) loss 0.7600 (0.7892) lr 1.1253e-03 eta 4:00:13
epoch [29/50] batch [300/796] time 0.840 (0.836) data 0.000 (0.002) loss 1.4841 (0.7885) lr 1.1253e-03 eta 3:59:51
epoch [29/50] batch [320/796] time 0.840 (0.836) data 0.000 (0.002) loss 1.3971 (0.7975) lr 1.1253e-03 eta 3:59:28
epoch [29/50] batch [340/796] time 0.841 (0.836) data 0.000 (0.002) loss 0.6344 (0.7945) lr 1.1253e-03 eta 3:59:07
epoch [29/50] batch [360/796] time 0.838 (0.835) data 0.000 (0.002) loss 1.4086 (0.7930) lr 1.1253e-03 eta 3:58:44
epoch [29/50] batch [380/796] time 0.838 (0.835) data 0.000 (0.001) loss 1.2700 (0.8041) lr 1.1253e-03 eta 3:58:24
epoch [29/50] batch [400/796] time 0.841 (0.835) data 0.000 (0.001) loss 0.0624 (0.7999) lr 1.1253e-03 eta 3:58:03
epoch [29/50] batch [420/796] time 0.830 (0.835) data 0.000 (0.001) loss 1.3049 (0.8100) lr 1.1253e-03 eta 3:57:45
epoch [29/50] batch [440/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.5528 (0.8035) lr 1.1253e-03 eta 3:57:25
epoch [29/50] batch [460/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.7384 (0.8041) lr 1.1253e-03 eta 3:57:04
epoch [29/50] batch [480/796] time 0.838 (0.834) data 0.000 (0.001) loss 0.4375 (0.7881) lr 1.1253e-03 eta 3:56:46
epoch [29/50] batch [500/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.3592 (0.7843) lr 1.1253e-03 eta 3:56:27
epoch [29/50] batch [520/796] time 0.839 (0.834) data 0.000 (0.001) loss 0.7403 (0.7760) lr 1.1253e-03 eta 3:56:05
epoch [29/50] batch [540/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.7722 (0.7723) lr 1.1253e-03 eta 3:55:49
epoch [29/50] batch [560/796] time 0.809 (0.834) data 0.000 (0.001) loss 0.1534 (0.7738) lr 1.1253e-03 eta 3:55:30
epoch [29/50] batch [580/796] time 0.830 (0.833) data 0.000 (0.001) loss 0.0478 (0.7785) lr 1.1253e-03 eta 3:55:11
epoch [29/50] batch [600/796] time 0.840 (0.833) data 0.000 (0.001) loss 0.4205 (0.7766) lr 1.1253e-03 eta 3:54:53
epoch [29/50] batch [620/796] time 0.831 (0.833) data 0.000 (0.001) loss 0.3023 (0.7787) lr 1.1253e-03 eta 3:54:36
epoch [29/50] batch [640/796] time 0.838 (0.833) data 0.000 (0.001) loss 0.3412 (0.7827) lr 1.1253e-03 eta 3:54:17
epoch [29/50] batch [660/796] time 0.829 (0.833) data 0.000 (0.001) loss 0.0903 (0.7877) lr 1.1253e-03 eta 3:54:00
epoch [29/50] batch [680/796] time 0.831 (0.833) data 0.000 (0.001) loss 0.3216 (0.7871) lr 1.1253e-03 eta 3:53:42
epoch [29/50] batch [700/796] time 0.832 (0.833) data 0.000 (0.001) loss 0.1971 (0.7851) lr 1.1253e-03 eta 3:53:23
epoch [29/50] batch [720/796] time 0.829 (0.833) data 0.000 (0.001) loss 0.0909 (0.7783) lr 1.1253e-03 eta 3:53:05
epoch [29/50] batch [740/796] time 0.839 (0.833) data 0.000 (0.001) loss 1.2498 (0.7823) lr 1.1253e-03 eta 3:52:48
epoch [29/50] batch [760/796] time 0.831 (0.833) data 0.000 (0.001) loss 0.3279 (0.7841) lr 1.1253e-03 eta 3:52:31
epoch [29/50] batch [780/796] time 0.839 (0.833) data 0.000 (0.001) loss 1.5398 (0.7895) lr 1.1253e-03 eta 3:52:14
epoch [30/50] batch [20/796] time 0.843 (0.862) data 0.000 (0.028) loss 0.8042 (0.9965) lr 1.0628e-03 eta 3:59:45
epoch [30/50] batch [40/796] time 0.834 (0.847) data 0.000 (0.014) loss 0.4540 (0.8521) lr 1.0628e-03 eta 3:55:32
epoch [30/50] batch [60/796] time 0.822 (0.842) data 0.000 (0.009) loss 0.4223 (0.8338) lr 1.0628e-03 eta 3:53:47
epoch [30/50] batch [80/796] time 0.833 (0.840) data 0.000 (0.007) loss 0.7295 (0.8688) lr 1.0628e-03 eta 3:52:48
epoch [30/50] batch [100/796] time 0.831 (0.837) data 0.000 (0.006) loss 1.9310 (0.8696) lr 1.0628e-03 eta 3:51:54
epoch [30/50] batch [120/796] time 0.842 (0.837) data 0.000 (0.005) loss 0.6318 (0.8063) lr 1.0628e-03 eta 3:51:24
epoch [30/50] batch [140/796] time 0.833 (0.836) data 0.000 (0.004) loss 0.0616 (0.7786) lr 1.0628e-03 eta 3:50:55
epoch [30/50] batch [160/796] time 0.843 (0.835) data 0.000 (0.004) loss 0.9953 (0.7769) lr 1.0628e-03 eta 3:50:28
epoch [30/50] batch [180/796] time 0.847 (0.835) data 0.000 (0.003) loss 2.3933 (0.8064) lr 1.0628e-03 eta 3:50:06
epoch [30/50] batch [200/796] time 0.832 (0.835) data 0.000 (0.003) loss 0.5595 (0.8115) lr 1.0628e-03 eta 3:49:45
epoch [30/50] batch [220/796] time 0.832 (0.835) data 0.000 (0.003) loss 0.3469 (0.8027) lr 1.0628e-03 eta 3:49:28
epoch [30/50] batch [240/796] time 0.820 (0.834) data 0.000 (0.003) loss 0.7691 (0.8300) lr 1.0628e-03 eta 3:49:08
epoch [30/50] batch [260/796] time 0.851 (0.834) data 0.000 (0.002) loss 0.0975 (0.8457) lr 1.0628e-03 eta 3:48:47
epoch [30/50] batch [280/796] time 0.833 (0.834) data 0.000 (0.002) loss 1.4620 (0.8343) lr 1.0628e-03 eta 3:48:29
epoch [30/50] batch [300/796] time 0.833 (0.834) data 0.000 (0.002) loss 0.2384 (0.8257) lr 1.0628e-03 eta 3:48:09
epoch [30/50] batch [320/796] time 0.832 (0.834) data 0.000 (0.002) loss 2.4172 (0.8134) lr 1.0628e-03 eta 3:47:53
epoch [30/50] batch [340/796] time 0.833 (0.834) data 0.000 (0.002) loss 1.8132 (0.8117) lr 1.0628e-03 eta 3:47:34
epoch [30/50] batch [360/796] time 0.809 (0.834) data 0.000 (0.002) loss 0.1625 (0.8073) lr 1.0628e-03 eta 3:47:19
epoch [30/50] batch [380/796] time 0.831 (0.834) data 0.000 (0.002) loss 0.2444 (0.8112) lr 1.0628e-03 eta 3:47:04
epoch [30/50] batch [400/796] time 0.844 (0.834) data 0.000 (0.002) loss 1.2192 (0.8168) lr 1.0628e-03 eta 3:46:47
epoch [30/50] batch [420/796] time 0.811 (0.834) data 0.000 (0.002) loss 0.2301 (0.8251) lr 1.0628e-03 eta 3:46:27
epoch [30/50] batch [440/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.7696 (0.8260) lr 1.0628e-03 eta 3:46:11
epoch [30/50] batch [460/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.6039 (0.8167) lr 1.0628e-03 eta 3:45:54
epoch [30/50] batch [480/796] time 0.808 (0.834) data 0.000 (0.001) loss 0.9898 (0.8203) lr 1.0628e-03 eta 3:45:35
epoch [30/50] batch [500/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.2448 (0.8250) lr 1.0628e-03 eta 3:45:16
epoch [30/50] batch [520/796] time 0.841 (0.834) data 0.000 (0.001) loss 1.6189 (0.8199) lr 1.0628e-03 eta 3:44:59
epoch [30/50] batch [540/796] time 0.845 (0.834) data 0.000 (0.001) loss 0.8640 (0.8182) lr 1.0628e-03 eta 3:44:44
epoch [30/50] batch [560/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.4389 (0.8286) lr 1.0628e-03 eta 3:44:28
epoch [30/50] batch [580/796] time 0.834 (0.834) data 0.000 (0.001) loss 0.4905 (0.8289) lr 1.0628e-03 eta 3:44:11
epoch [30/50] batch [600/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.3313 (0.8219) lr 1.0628e-03 eta 3:43:54
epoch [30/50] batch [620/796] time 0.845 (0.834) data 0.000 (0.001) loss 0.1669 (0.8177) lr 1.0628e-03 eta 3:43:37
epoch [30/50] batch [640/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.2908 (0.8152) lr 1.0628e-03 eta 3:43:21
epoch [30/50] batch [660/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.9728 (0.8200) lr 1.0628e-03 eta 3:43:05
epoch [30/50] batch [680/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.0271 (0.8174) lr 1.0628e-03 eta 3:42:48
epoch [30/50] batch [700/796] time 0.818 (0.834) data 0.000 (0.001) loss 2.7123 (0.8213) lr 1.0628e-03 eta 3:42:30
epoch [30/50] batch [720/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.1526 (0.8225) lr 1.0628e-03 eta 3:42:14
epoch [30/50] batch [740/796] time 0.832 (0.834) data 0.000 (0.001) loss 1.0456 (0.8214) lr 1.0628e-03 eta 3:41:57
epoch [30/50] batch [760/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.5864 (0.8193) lr 1.0628e-03 eta 3:41:39
epoch [30/50] batch [780/796] time 0.842 (0.833) data 0.000 (0.001) loss 0.6707 (0.8148) lr 1.0628e-03 eta 3:41:21
epoch [31/50] batch [20/796] time 0.841 (0.859) data 0.000 (0.025) loss 0.2414 (0.5726) lr 1.0000e-03 eta 3:47:37
epoch [31/50] batch [40/796] time 0.833 (0.845) data 0.000 (0.013) loss 0.3610 (0.5930) lr 1.0000e-03 eta 3:43:33
epoch [31/50] batch [60/796] time 0.842 (0.840) data 0.000 (0.008) loss 0.3018 (0.7879) lr 1.0000e-03 eta 3:41:59
epoch [31/50] batch [80/796] time 0.818 (0.838) data 0.000 (0.006) loss 1.5589 (0.8455) lr 1.0000e-03 eta 3:41:10
epoch [31/50] batch [100/796] time 0.842 (0.837) data 0.000 (0.005) loss 0.3326 (0.7951) lr 1.0000e-03 eta 3:40:33
epoch [31/50] batch [120/796] time 0.843 (0.836) data 0.000 (0.004) loss 0.1365 (0.7771) lr 1.0000e-03 eta 3:40:06
epoch [31/50] batch [140/796] time 0.820 (0.835) data 0.000 (0.004) loss 0.2977 (0.7869) lr 1.0000e-03 eta 3:39:39
epoch [31/50] batch [160/796] time 0.842 (0.835) data 0.000 (0.003) loss 0.7414 (0.8059) lr 1.0000e-03 eta 3:39:18
epoch [31/50] batch [180/796] time 0.832 (0.835) data 0.000 (0.003) loss 0.8955 (0.7937) lr 1.0000e-03 eta 3:38:57
epoch [31/50] batch [200/796] time 0.841 (0.834) data 0.000 (0.003) loss 0.9698 (0.7991) lr 1.0000e-03 eta 3:38:34
epoch [31/50] batch [220/796] time 0.840 (0.834) data 0.000 (0.002) loss 0.9842 (0.8105) lr 1.0000e-03 eta 3:38:17
epoch [31/50] batch [240/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.6951 (0.8152) lr 1.0000e-03 eta 3:37:55
epoch [31/50] batch [260/796] time 0.843 (0.834) data 0.000 (0.002) loss 0.2035 (0.8054) lr 1.0000e-03 eta 3:37:37
epoch [31/50] batch [280/796] time 0.835 (0.834) data 0.000 (0.002) loss 0.4465 (0.8023) lr 1.0000e-03 eta 3:37:21
epoch [31/50] batch [300/796] time 0.841 (0.834) data 0.000 (0.002) loss 0.9508 (0.7936) lr 1.0000e-03 eta 3:37:04
epoch [31/50] batch [320/796] time 0.841 (0.834) data 0.000 (0.002) loss 0.4516 (0.7874) lr 1.0000e-03 eta 3:36:48
epoch [31/50] batch [340/796] time 0.818 (0.834) data 0.000 (0.002) loss 1.3813 (0.8043) lr 1.0000e-03 eta 3:36:32
epoch [31/50] batch [360/796] time 0.844 (0.834) data 0.000 (0.002) loss 1.2933 (0.8080) lr 1.0000e-03 eta 3:36:16
epoch [31/50] batch [380/796] time 0.845 (0.834) data 0.000 (0.001) loss 0.5284 (0.7871) lr 1.0000e-03 eta 3:35:58
epoch [31/50] batch [400/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.2862 (0.7811) lr 1.0000e-03 eta 3:35:42
epoch [31/50] batch [420/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.0997 (0.7736) lr 1.0000e-03 eta 3:35:24
epoch [31/50] batch [440/796] time 0.844 (0.834) data 0.000 (0.001) loss 0.6561 (0.7707) lr 1.0000e-03 eta 3:35:08
epoch [31/50] batch [460/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.1412 (0.7833) lr 1.0000e-03 eta 3:34:50
epoch [31/50] batch [480/796] time 0.821 (0.834) data 0.000 (0.001) loss 1.9227 (0.7819) lr 1.0000e-03 eta 3:34:34
epoch [31/50] batch [500/796] time 0.819 (0.834) data 0.000 (0.001) loss 1.5712 (0.7812) lr 1.0000e-03 eta 3:34:16
epoch [31/50] batch [520/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.7126 (0.7699) lr 1.0000e-03 eta 3:34:00
epoch [31/50] batch [540/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.1907 (0.7628) lr 1.0000e-03 eta 3:33:42
epoch [31/50] batch [560/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.8128 (0.7596) lr 1.0000e-03 eta 3:33:26
epoch [31/50] batch [580/796] time 0.844 (0.834) data 0.000 (0.001) loss 0.2330 (0.7607) lr 1.0000e-03 eta 3:33:10
epoch [31/50] batch [600/796] time 0.832 (0.834) data 0.001 (0.001) loss 0.1963 (0.7628) lr 1.0000e-03 eta 3:32:55
epoch [31/50] batch [620/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.5529 (0.7623) lr 1.0000e-03 eta 3:32:38
epoch [31/50] batch [640/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.7354 (0.7561) lr 1.0000e-03 eta 3:32:21
epoch [31/50] batch [660/796] time 0.835 (0.834) data 0.000 (0.001) loss 0.5486 (0.7678) lr 1.0000e-03 eta 3:32:05
epoch [31/50] batch [680/796] time 0.843 (0.834) data 0.000 (0.001) loss 2.3816 (0.7759) lr 1.0000e-03 eta 3:31:48
epoch [31/50] batch [700/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.7110 (0.7685) lr 1.0000e-03 eta 3:31:31
epoch [31/50] batch [720/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.1751 (0.7715) lr 1.0000e-03 eta 3:31:13
epoch [31/50] batch [740/796] time 0.833 (0.834) data 0.000 (0.001) loss 1.1448 (0.7770) lr 1.0000e-03 eta 3:30:56
epoch [31/50] batch [760/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.3542 (0.7762) lr 1.0000e-03 eta 3:30:38
epoch [31/50] batch [780/796] time 0.831 (0.834) data 0.000 (0.001) loss 0.5893 (0.7777) lr 1.0000e-03 eta 3:30:22
epoch [32/50] batch [20/796] time 0.812 (0.862) data 0.000 (0.029) loss 1.3017 (0.7012) lr 9.3721e-04 eta 3:37:05
epoch [32/50] batch [40/796] time 0.839 (0.847) data 0.000 (0.015) loss 1.4408 (0.7862) lr 9.3721e-04 eta 3:33:01
epoch [32/50] batch [60/796] time 0.834 (0.843) data 0.000 (0.010) loss 0.3281 (0.7854) lr 9.3721e-04 eta 3:31:42
epoch [32/50] batch [80/796] time 0.842 (0.841) data 0.000 (0.007) loss 0.1362 (0.7692) lr 9.3721e-04 eta 3:30:46
epoch [32/50] batch [100/796] time 0.809 (0.839) data 0.000 (0.006) loss 1.0294 (0.8244) lr 9.3721e-04 eta 3:30:04
epoch [32/50] batch [120/796] time 0.832 (0.838) data 0.000 (0.005) loss 0.8637 (0.8108) lr 9.3721e-04 eta 3:29:29
epoch [32/50] batch [140/796] time 0.842 (0.837) data 0.000 (0.004) loss 0.6067 (0.7972) lr 9.3721e-04 eta 3:29:00
epoch [32/50] batch [160/796] time 0.809 (0.836) data 0.000 (0.004) loss 1.3508 (0.7928) lr 9.3721e-04 eta 3:28:37
epoch [32/50] batch [180/796] time 0.832 (0.836) data 0.000 (0.003) loss 0.6861 (0.7762) lr 9.3721e-04 eta 3:28:16
epoch [32/50] batch [200/796] time 0.841 (0.836) data 0.000 (0.003) loss 0.4294 (0.7636) lr 9.3721e-04 eta 3:27:57
epoch [32/50] batch [220/796] time 0.832 (0.836) data 0.000 (0.003) loss 0.0959 (0.7715) lr 9.3721e-04 eta 3:27:36
epoch [32/50] batch [240/796] time 0.842 (0.836) data 0.000 (0.003) loss 0.5370 (0.7822) lr 9.3721e-04 eta 3:27:16
epoch [32/50] batch [260/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.3336 (0.7838) lr 9.3721e-04 eta 3:26:52
epoch [32/50] batch [280/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.0284 (0.7909) lr 9.3721e-04 eta 3:26:32
epoch [32/50] batch [300/796] time 0.819 (0.835) data 0.000 (0.002) loss 0.1642 (0.7848) lr 9.3721e-04 eta 3:26:17
epoch [32/50] batch [320/796] time 0.819 (0.835) data 0.000 (0.002) loss 0.5333 (0.8024) lr 9.3721e-04 eta 3:26:00
epoch [32/50] batch [340/796] time 0.834 (0.835) data 0.000 (0.002) loss 0.2924 (0.7934) lr 9.3721e-04 eta 3:25:40
epoch [32/50] batch [360/796] time 0.820 (0.835) data 0.000 (0.002) loss 0.8992 (0.7965) lr 9.3721e-04 eta 3:25:22
epoch [32/50] batch [380/796] time 0.819 (0.835) data 0.000 (0.002) loss 1.9200 (0.8046) lr 9.3721e-04 eta 3:25:04
epoch [32/50] batch [400/796] time 0.833 (0.835) data 0.000 (0.002) loss 1.7451 (0.8021) lr 9.3721e-04 eta 3:24:49
epoch [32/50] batch [420/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.3044 (0.8119) lr 9.3721e-04 eta 3:24:32
epoch [32/50] batch [440/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.9057 (0.8060) lr 9.3721e-04 eta 3:24:13
epoch [32/50] batch [460/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.9183 (0.8077) lr 9.3721e-04 eta 3:23:53
epoch [32/50] batch [480/796] time 0.843 (0.834) data 0.000 (0.001) loss 2.6105 (0.8172) lr 9.3721e-04 eta 3:23:37
epoch [32/50] batch [500/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.9987 (0.8204) lr 9.3721e-04 eta 3:23:22
epoch [32/50] batch [520/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.5831 (0.8156) lr 9.3721e-04 eta 3:23:05
epoch [32/50] batch [540/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.4502 (0.8160) lr 9.3721e-04 eta 3:22:50
epoch [32/50] batch [560/796] time 0.843 (0.834) data 0.000 (0.001) loss 1.5138 (0.8227) lr 9.3721e-04 eta 3:22:31
epoch [32/50] batch [580/796] time 0.844 (0.834) data 0.000 (0.001) loss 4.3538 (0.8278) lr 9.3721e-04 eta 3:22:15
epoch [32/50] batch [600/796] time 0.838 (0.834) data 0.000 (0.001) loss 1.6290 (0.8240) lr 9.3721e-04 eta 3:21:59
epoch [32/50] batch [620/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.1794 (0.8225) lr 9.3721e-04 eta 3:21:41
epoch [32/50] batch [640/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.2851 (0.8217) lr 9.3721e-04 eta 3:21:25
epoch [32/50] batch [660/796] time 0.846 (0.834) data 0.000 (0.001) loss 0.5271 (0.8248) lr 9.3721e-04 eta 3:21:06
epoch [32/50] batch [680/796] time 0.833 (0.834) data 0.000 (0.001) loss 1.2872 (0.8202) lr 9.3721e-04 eta 3:20:49
epoch [32/50] batch [700/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.6044 (0.8157) lr 9.3721e-04 eta 3:20:31
epoch [32/50] batch [720/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.8027 (0.8100) lr 9.3721e-04 eta 3:20:14
epoch [32/50] batch [740/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.5490 (0.8087) lr 9.3721e-04 eta 3:19:57
epoch [32/50] batch [760/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.1091 (0.8074) lr 9.3721e-04 eta 3:19:41
epoch [32/50] batch [780/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.4475 (0.8081) lr 9.3721e-04 eta 3:19:23
epoch [33/50] batch [20/796] time 0.811 (0.861) data 0.000 (0.029) loss 0.3123 (0.7405) lr 8.7467e-04 eta 3:25:20
epoch [33/50] batch [40/796] time 0.843 (0.848) data 0.000 (0.015) loss 1.1053 (0.8373) lr 8.7467e-04 eta 3:21:55
epoch [33/50] batch [60/796] time 0.842 (0.843) data 0.000 (0.010) loss 1.7494 (0.8658) lr 8.7467e-04 eta 3:20:34
epoch [33/50] batch [80/796] time 0.842 (0.841) data 0.000 (0.007) loss 1.3964 (0.8656) lr 8.7467e-04 eta 3:19:37
epoch [33/50] batch [100/796] time 0.842 (0.839) data 0.000 (0.006) loss 3.0596 (0.8947) lr 8.7467e-04 eta 3:18:55
epoch [33/50] batch [120/796] time 0.841 (0.838) data 0.000 (0.005) loss 0.8323 (0.8416) lr 8.7467e-04 eta 3:18:27
epoch [33/50] batch [140/796] time 0.848 (0.837) data 0.000 (0.004) loss 0.7138 (0.8343) lr 8.7467e-04 eta 3:17:56
epoch [33/50] batch [160/796] time 0.831 (0.836) data 0.000 (0.004) loss 0.1143 (0.8133) lr 8.7467e-04 eta 3:17:30
epoch [33/50] batch [180/796] time 0.819 (0.836) data 0.000 (0.003) loss 0.9450 (0.7992) lr 8.7467e-04 eta 3:17:09
epoch [33/50] batch [200/796] time 0.819 (0.836) data 0.000 (0.003) loss 0.3372 (0.8110) lr 8.7467e-04 eta 3:16:47
epoch [33/50] batch [220/796] time 0.832 (0.835) data 0.000 (0.003) loss 0.4540 (0.8072) lr 8.7467e-04 eta 3:16:23
epoch [33/50] batch [240/796] time 0.819 (0.835) data 0.000 (0.003) loss 0.2567 (0.7994) lr 8.7467e-04 eta 3:16:05
epoch [33/50] batch [260/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.4495 (0.8183) lr 8.7467e-04 eta 3:15:42
epoch [33/50] batch [280/796] time 0.831 (0.834) data 0.000 (0.002) loss 0.4701 (0.7958) lr 8.7467e-04 eta 3:15:22
epoch [33/50] batch [300/796] time 0.819 (0.834) data 0.000 (0.002) loss 0.1964 (0.7912) lr 8.7467e-04 eta 3:15:03
epoch [33/50] batch [320/796] time 0.841 (0.834) data 0.000 (0.002) loss 1.1207 (0.7860) lr 8.7467e-04 eta 3:14:45
epoch [33/50] batch [340/796] time 0.842 (0.834) data 0.000 (0.002) loss 1.1758 (0.7909) lr 8.7467e-04 eta 3:14:26
epoch [33/50] batch [360/796] time 0.809 (0.834) data 0.000 (0.002) loss 1.6826 (0.7960) lr 8.7467e-04 eta 3:14:09
epoch [33/50] batch [380/796] time 0.831 (0.834) data 0.000 (0.002) loss 0.0378 (0.7871) lr 8.7467e-04 eta 3:13:51
epoch [33/50] batch [400/796] time 0.811 (0.834) data 0.000 (0.002) loss 2.0438 (0.7929) lr 8.7467e-04 eta 3:13:33
epoch [33/50] batch [420/796] time 0.846 (0.834) data 0.000 (0.002) loss 0.4046 (0.7989) lr 8.7467e-04 eta 3:13:17
epoch [33/50] batch [440/796] time 0.845 (0.834) data 0.000 (0.001) loss 1.1546 (0.7899) lr 8.7467e-04 eta 3:13:00
epoch [33/50] batch [460/796] time 0.819 (0.834) data 0.000 (0.001) loss 3.0577 (0.7862) lr 8.7467e-04 eta 3:12:42
epoch [33/50] batch [480/796] time 0.819 (0.834) data 0.000 (0.001) loss 1.0317 (0.7870) lr 8.7467e-04 eta 3:12:27
epoch [33/50] batch [500/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.2095 (0.7826) lr 8.7467e-04 eta 3:12:09
epoch [33/50] batch [520/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.4075 (0.7860) lr 8.7467e-04 eta 3:11:51
epoch [33/50] batch [540/796] time 0.841 (0.834) data 0.000 (0.001) loss 1.5787 (0.7811) lr 8.7467e-04 eta 3:11:33
epoch [33/50] batch [560/796] time 0.843 (0.834) data 0.000 (0.001) loss 1.6476 (0.7922) lr 8.7467e-04 eta 3:11:16
epoch [33/50] batch [580/796] time 0.839 (0.834) data 0.000 (0.001) loss 0.3538 (0.7865) lr 8.7467e-04 eta 3:11:00
epoch [33/50] batch [600/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.9288 (0.7881) lr 8.7467e-04 eta 3:10:43
epoch [33/50] batch [620/796] time 0.809 (0.834) data 0.000 (0.001) loss 0.5779 (0.7827) lr 8.7467e-04 eta 3:10:26
epoch [33/50] batch [640/796] time 0.836 (0.833) data 0.000 (0.001) loss 0.4219 (0.7914) lr 8.7467e-04 eta 3:10:08
epoch [33/50] batch [660/796] time 0.819 (0.833) data 0.000 (0.001) loss 0.5883 (0.7879) lr 8.7467e-04 eta 3:09:51
epoch [33/50] batch [680/796] time 0.841 (0.833) data 0.000 (0.001) loss 0.1283 (0.7895) lr 8.7467e-04 eta 3:09:35
epoch [33/50] batch [700/796] time 0.832 (0.833) data 0.000 (0.001) loss 0.7453 (0.7918) lr 8.7467e-04 eta 3:09:18
epoch [33/50] batch [720/796] time 0.840 (0.833) data 0.000 (0.001) loss 1.4155 (0.7986) lr 8.7467e-04 eta 3:09:01
epoch [33/50] batch [740/796] time 0.817 (0.833) data 0.000 (0.001) loss 0.8194 (0.7957) lr 8.7467e-04 eta 3:08:45
epoch [33/50] batch [760/796] time 0.818 (0.833) data 0.000 (0.001) loss 0.2620 (0.7919) lr 8.7467e-04 eta 3:08:28
epoch [33/50] batch [780/796] time 0.818 (0.833) data 0.000 (0.001) loss 0.2726 (0.7940) lr 8.7467e-04 eta 3:08:11
epoch [34/50] batch [20/796] time 0.818 (0.866) data 0.000 (0.030) loss 0.4422 (0.6888) lr 8.1262e-04 eta 3:14:57
epoch [34/50] batch [40/796] time 0.831 (0.849) data 0.000 (0.015) loss 0.7298 (0.8011) lr 8.1262e-04 eta 3:10:57
epoch [34/50] batch [60/796] time 0.819 (0.842) data 0.000 (0.010) loss 0.8933 (0.8572) lr 8.1262e-04 eta 3:09:06
epoch [34/50] batch [80/796] time 0.830 (0.840) data 0.000 (0.008) loss 1.3242 (0.8553) lr 8.1262e-04 eta 3:08:15
epoch [34/50] batch [100/796] time 0.845 (0.838) data 0.000 (0.006) loss 1.2601 (0.8494) lr 8.1262e-04 eta 3:07:33
epoch [34/50] batch [120/796] time 0.821 (0.837) data 0.000 (0.005) loss 0.8089 (0.8419) lr 8.1262e-04 eta 3:07:05
epoch [34/50] batch [140/796] time 0.831 (0.836) data 0.000 (0.004) loss 1.0326 (0.8325) lr 8.1262e-04 eta 3:06:37
epoch [34/50] batch [160/796] time 0.831 (0.836) data 0.000 (0.004) loss 0.2678 (0.8153) lr 8.1262e-04 eta 3:06:12
epoch [34/50] batch [180/796] time 0.841 (0.835) data 0.000 (0.004) loss 1.9809 (0.8391) lr 8.1262e-04 eta 3:05:53
epoch [34/50] batch [200/796] time 0.817 (0.835) data 0.000 (0.003) loss 0.9512 (0.8561) lr 8.1262e-04 eta 3:05:33
epoch [34/50] batch [220/796] time 0.840 (0.835) data 0.000 (0.003) loss 0.4614 (0.8417) lr 8.1262e-04 eta 3:05:09
epoch [34/50] batch [240/796] time 0.841 (0.834) data 0.000 (0.003) loss 1.3352 (0.8389) lr 8.1262e-04 eta 3:04:47
epoch [34/50] batch [260/796] time 0.831 (0.834) data 0.000 (0.002) loss 0.2210 (0.8122) lr 8.1262e-04 eta 3:04:30
epoch [34/50] batch [280/796] time 0.817 (0.834) data 0.000 (0.002) loss 0.1517 (0.8276) lr 8.1262e-04 eta 3:04:10
epoch [34/50] batch [300/796] time 0.818 (0.834) data 0.000 (0.002) loss 0.6196 (0.8078) lr 8.1262e-04 eta 3:03:50
epoch [34/50] batch [320/796] time 0.831 (0.833) data 0.000 (0.002) loss 0.3000 (0.8123) lr 8.1262e-04 eta 3:03:31
epoch [34/50] batch [340/796] time 0.831 (0.833) data 0.000 (0.002) loss 0.0171 (0.8143) lr 8.1262e-04 eta 3:03:13
epoch [34/50] batch [360/796] time 0.840 (0.833) data 0.000 (0.002) loss 0.3572 (0.8149) lr 8.1262e-04 eta 3:02:57
epoch [34/50] batch [380/796] time 0.841 (0.833) data 0.000 (0.002) loss 0.6783 (0.8120) lr 8.1262e-04 eta 3:02:38
epoch [34/50] batch [400/796] time 0.841 (0.833) data 0.000 (0.002) loss 0.1200 (0.8191) lr 8.1262e-04 eta 3:02:20
epoch [34/50] batch [420/796] time 0.807 (0.833) data 0.000 (0.002) loss 0.3586 (0.8251) lr 8.1262e-04 eta 3:02:01
epoch [34/50] batch [440/796] time 0.829 (0.833) data 0.000 (0.002) loss 1.5301 (0.8271) lr 8.1262e-04 eta 3:01:44
epoch [34/50] batch [460/796] time 0.834 (0.833) data 0.000 (0.001) loss 1.2985 (0.8335) lr 8.1262e-04 eta 3:01:27
epoch [34/50] batch [480/796] time 0.817 (0.833) data 0.000 (0.001) loss 0.1879 (0.8337) lr 8.1262e-04 eta 3:01:11
epoch [34/50] batch [500/796] time 0.840 (0.833) data 0.000 (0.001) loss 0.2346 (0.8294) lr 8.1262e-04 eta 3:00:53
epoch [34/50] batch [520/796] time 0.842 (0.833) data 0.000 (0.001) loss 0.7365 (0.8227) lr 8.1262e-04 eta 3:00:37
epoch [34/50] batch [540/796] time 0.818 (0.833) data 0.000 (0.001) loss 1.6187 (0.8242) lr 8.1262e-04 eta 3:00:18
epoch [34/50] batch [560/796] time 0.841 (0.833) data 0.000 (0.001) loss 0.4811 (0.8225) lr 8.1262e-04 eta 3:00:01
epoch [34/50] batch [580/796] time 0.819 (0.833) data 0.000 (0.001) loss 0.3734 (0.8196) lr 8.1262e-04 eta 2:59:44
epoch [34/50] batch [600/796] time 0.819 (0.833) data 0.000 (0.001) loss 0.5640 (0.8280) lr 8.1262e-04 eta 2:59:27
epoch [34/50] batch [620/796] time 0.841 (0.833) data 0.000 (0.001) loss 0.3433 (0.8216) lr 8.1262e-04 eta 2:59:09
epoch [34/50] batch [640/796] time 0.820 (0.832) data 0.000 (0.001) loss 0.4923 (0.8157) lr 8.1262e-04 eta 2:58:52
epoch [34/50] batch [660/796] time 0.841 (0.832) data 0.000 (0.001) loss 0.3258 (0.8134) lr 8.1262e-04 eta 2:58:35
epoch [34/50] batch [680/796] time 0.820 (0.833) data 0.000 (0.001) loss 0.7141 (0.8157) lr 8.1262e-04 eta 2:58:19
epoch [34/50] batch [700/796] time 0.820 (0.833) data 0.000 (0.001) loss 0.6394 (0.8155) lr 8.1262e-04 eta 2:58:03
epoch [34/50] batch [720/796] time 0.819 (0.833) data 0.000 (0.001) loss 1.3123 (0.8106) lr 8.1262e-04 eta 2:57:47
epoch [34/50] batch [740/796] time 0.841 (0.833) data 0.000 (0.001) loss 0.8577 (0.8127) lr 8.1262e-04 eta 2:57:30
epoch [34/50] batch [760/796] time 0.845 (0.833) data 0.000 (0.001) loss 0.9026 (0.8093) lr 8.1262e-04 eta 2:57:13
epoch [34/50] batch [780/796] time 0.841 (0.833) data 0.000 (0.001) loss 0.2078 (0.8088) lr 8.1262e-04 eta 2:56:57
epoch [35/50] batch [20/796] time 0.840 (0.864) data 0.000 (0.029) loss 0.6376 (0.8225) lr 7.5131e-04 eta 3:03:05
epoch [35/50] batch [40/796] time 0.840 (0.848) data 0.000 (0.014) loss 0.6526 (1.0312) lr 7.5131e-04 eta 2:59:29
epoch [35/50] batch [60/796] time 0.842 (0.844) data 0.000 (0.010) loss 1.0037 (0.9511) lr 7.5131e-04 eta 2:58:18
epoch [35/50] batch [80/796] time 0.842 (0.841) data 0.000 (0.007) loss 1.7935 (0.8460) lr 7.5131e-04 eta 2:57:20
epoch [35/50] batch [100/796] time 0.844 (0.839) data 0.000 (0.006) loss 1.1342 (0.8874) lr 7.5131e-04 eta 2:56:45
epoch [35/50] batch [120/796] time 0.842 (0.839) data 0.000 (0.005) loss 1.7426 (0.8795) lr 7.5131e-04 eta 2:56:20
epoch [35/50] batch [140/796] time 0.833 (0.838) data 0.000 (0.004) loss 0.7658 (0.8731) lr 7.5131e-04 eta 2:55:50
epoch [35/50] batch [160/796] time 0.840 (0.837) data 0.000 (0.004) loss 0.5636 (0.8556) lr 7.5131e-04 eta 2:55:25
epoch [35/50] batch [180/796] time 0.841 (0.836) data 0.000 (0.003) loss 1.9680 (0.8532) lr 7.5131e-04 eta 2:55:00
epoch [35/50] batch [200/796] time 0.841 (0.836) data 0.000 (0.003) loss 1.0820 (0.8547) lr 7.5131e-04 eta 2:54:33
epoch [35/50] batch [220/796] time 0.809 (0.835) data 0.000 (0.003) loss 0.7862 (0.8462) lr 7.5131e-04 eta 2:54:09
epoch [35/50] batch [240/796] time 0.841 (0.835) data 0.000 (0.003) loss 0.3967 (0.8291) lr 7.5131e-04 eta 2:53:50
epoch [35/50] batch [260/796] time 0.849 (0.835) data 0.000 (0.002) loss 0.5664 (0.8181) lr 7.5131e-04 eta 2:53:31
epoch [35/50] batch [280/796] time 0.831 (0.834) data 0.000 (0.002) loss 0.6957 (0.8005) lr 7.5131e-04 eta 2:53:11
epoch [35/50] batch [300/796] time 0.833 (0.834) data 0.000 (0.002) loss 0.2226 (0.7976) lr 7.5131e-04 eta 2:52:56
epoch [35/50] batch [320/796] time 0.832 (0.834) data 0.000 (0.002) loss 0.4360 (0.7848) lr 7.5131e-04 eta 2:52:38
epoch [35/50] batch [340/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.2942 (0.7751) lr 7.5131e-04 eta 2:52:19
epoch [35/50] batch [360/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.6356 (0.7702) lr 7.5131e-04 eta 2:52:01
epoch [35/50] batch [380/796] time 0.842 (0.834) data 0.000 (0.002) loss 1.1082 (0.7655) lr 7.5131e-04 eta 2:51:42
epoch [35/50] batch [400/796] time 0.840 (0.834) data 0.000 (0.002) loss 0.0989 (0.7690) lr 7.5131e-04 eta 2:51:25
epoch [35/50] batch [420/796] time 0.843 (0.834) data 0.000 (0.002) loss 0.5247 (0.7596) lr 7.5131e-04 eta 2:51:07
epoch [35/50] batch [440/796] time 0.821 (0.833) data 0.000 (0.001) loss 1.0162 (0.7856) lr 7.5131e-04 eta 2:50:48
epoch [35/50] batch [460/796] time 0.820 (0.833) data 0.000 (0.001) loss 0.7934 (0.7833) lr 7.5131e-04 eta 2:50:29
epoch [35/50] batch [480/796] time 0.817 (0.833) data 0.000 (0.001) loss 0.6367 (0.7802) lr 7.5131e-04 eta 2:50:11
epoch [35/50] batch [500/796] time 0.831 (0.833) data 0.000 (0.001) loss 3.2295 (0.7951) lr 7.5131e-04 eta 2:49:53
epoch [35/50] batch [520/796] time 0.841 (0.833) data 0.000 (0.001) loss 1.0480 (0.7929) lr 7.5131e-04 eta 2:49:36
epoch [35/50] batch [540/796] time 0.818 (0.833) data 0.000 (0.001) loss 0.1912 (0.7847) lr 7.5131e-04 eta 2:49:17
epoch [35/50] batch [560/796] time 0.842 (0.833) data 0.000 (0.001) loss 0.5703 (0.7852) lr 7.5131e-04 eta 2:49:01
epoch [35/50] batch [580/796] time 0.842 (0.833) data 0.000 (0.001) loss 1.2119 (0.7912) lr 7.5131e-04 eta 2:48:45
epoch [35/50] batch [600/796] time 0.833 (0.833) data 0.000 (0.001) loss 1.4454 (0.7933) lr 7.5131e-04 eta 2:48:29
epoch [35/50] batch [620/796] time 0.819 (0.833) data 0.000 (0.001) loss 0.8674 (0.7847) lr 7.5131e-04 eta 2:48:12
epoch [35/50] batch [640/796] time 0.820 (0.833) data 0.000 (0.001) loss 0.5119 (0.7898) lr 7.5131e-04 eta 2:47:55
epoch [35/50] batch [660/796] time 0.819 (0.833) data 0.000 (0.001) loss 0.9429 (0.7873) lr 7.5131e-04 eta 2:47:38
epoch [35/50] batch [680/796] time 0.817 (0.833) data 0.000 (0.001) loss 0.1664 (0.7806) lr 7.5131e-04 eta 2:47:21
epoch [35/50] batch [700/796] time 0.817 (0.833) data 0.000 (0.001) loss 0.6329 (0.7842) lr 7.5131e-04 eta 2:47:03
epoch [35/50] batch [720/796] time 0.831 (0.833) data 0.000 (0.001) loss 0.6384 (0.7791) lr 7.5131e-04 eta 2:46:46
epoch [35/50] batch [740/796] time 0.833 (0.833) data 0.000 (0.001) loss 0.2770 (0.7837) lr 7.5131e-04 eta 2:46:29
epoch [35/50] batch [760/796] time 0.843 (0.833) data 0.000 (0.001) loss 1.1553 (0.7819) lr 7.5131e-04 eta 2:46:12
epoch [35/50] batch [780/796] time 0.815 (0.833) data 0.000 (0.001) loss 2.0477 (0.7769) lr 7.5131e-04 eta 2:45:55
epoch [36/50] batch [20/796] time 0.841 (0.864) data 0.000 (0.030) loss 1.0508 (0.7692) lr 6.9098e-04 eta 2:51:43
epoch [36/50] batch [40/796] time 0.841 (0.849) data 0.000 (0.015) loss 0.0560 (0.7095) lr 6.9098e-04 eta 2:48:18
epoch [36/50] batch [60/796] time 0.841 (0.844) data 0.000 (0.010) loss 0.8629 (0.6692) lr 6.9098e-04 eta 2:47:01
epoch [36/50] batch [80/796] time 0.819 (0.841) data 0.000 (0.008) loss 0.4068 (0.6769) lr 6.9098e-04 eta 2:46:12
epoch [36/50] batch [100/796] time 0.841 (0.840) data 0.000 (0.006) loss 0.7481 (0.6626) lr 6.9098e-04 eta 2:45:40
epoch [36/50] batch [120/796] time 0.842 (0.839) data 0.000 (0.005) loss 0.9705 (0.6698) lr 6.9098e-04 eta 2:45:11
epoch [36/50] batch [140/796] time 0.809 (0.838) data 0.000 (0.004) loss 0.1116 (0.6743) lr 6.9098e-04 eta 2:44:47
epoch [36/50] batch [160/796] time 0.827 (0.838) data 0.000 (0.004) loss 0.9862 (0.6759) lr 6.9098e-04 eta 2:44:27
epoch [36/50] batch [180/796] time 0.822 (0.837) data 0.000 (0.003) loss 0.5090 (0.6851) lr 6.9098e-04 eta 2:44:06
epoch [36/50] batch [200/796] time 0.843 (0.837) data 0.000 (0.003) loss 0.1870 (0.6941) lr 6.9098e-04 eta 2:43:47
epoch [36/50] batch [220/796] time 0.833 (0.836) data 0.000 (0.003) loss 0.4408 (0.6881) lr 6.9098e-04 eta 2:43:22
epoch [36/50] batch [240/796] time 0.843 (0.836) data 0.000 (0.003) loss 0.9217 (0.7226) lr 6.9098e-04 eta 2:43:05
epoch [36/50] batch [260/796] time 0.810 (0.836) data 0.000 (0.002) loss 0.6314 (0.7345) lr 6.9098e-04 eta 2:42:46
epoch [36/50] batch [280/796] time 0.819 (0.836) data 0.000 (0.002) loss 0.5298 (0.7267) lr 6.9098e-04 eta 2:42:25
epoch [36/50] batch [300/796] time 0.842 (0.836) data 0.000 (0.002) loss 2.0036 (0.7357) lr 6.9098e-04 eta 2:42:06
epoch [36/50] batch [320/796] time 0.832 (0.835) data 0.000 (0.002) loss 1.5642 (0.7408) lr 6.9098e-04 eta 2:41:46
epoch [36/50] batch [340/796] time 0.841 (0.835) data 0.000 (0.002) loss 0.3401 (0.7453) lr 6.9098e-04 eta 2:41:28
epoch [36/50] batch [360/796] time 0.819 (0.835) data 0.000 (0.002) loss 0.4246 (0.7579) lr 6.9098e-04 eta 2:41:08
epoch [36/50] batch [380/796] time 0.823 (0.835) data 0.000 (0.002) loss 0.1013 (0.7595) lr 6.9098e-04 eta 2:40:51
epoch [36/50] batch [400/796] time 0.818 (0.835) data 0.000 (0.002) loss 0.3500 (0.7630) lr 6.9098e-04 eta 2:40:33
epoch [36/50] batch [420/796] time 0.841 (0.835) data 0.000 (0.002) loss 0.2009 (0.7653) lr 6.9098e-04 eta 2:40:15
epoch [36/50] batch [440/796] time 0.834 (0.835) data 0.000 (0.002) loss 1.0320 (0.7657) lr 6.9098e-04 eta 2:39:57
epoch [36/50] batch [460/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.1404 (0.7628) lr 6.9098e-04 eta 2:39:38
epoch [36/50] batch [480/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.8957 (0.7643) lr 6.9098e-04 eta 2:39:21
epoch [36/50] batch [500/796] time 0.831 (0.834) data 0.000 (0.001) loss 0.2822 (0.7604) lr 6.9098e-04 eta 2:39:02
epoch [36/50] batch [520/796] time 0.843 (0.834) data 0.000 (0.001) loss 1.1410 (0.7636) lr 6.9098e-04 eta 2:38:44
epoch [36/50] batch [540/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.9650 (0.7664) lr 6.9098e-04 eta 2:38:26
epoch [36/50] batch [560/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.1212 (0.7712) lr 6.9098e-04 eta 2:38:10
epoch [36/50] batch [580/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.4142 (0.7700) lr 6.9098e-04 eta 2:37:52
epoch [36/50] batch [600/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.9438 (0.7691) lr 6.9098e-04 eta 2:37:34
epoch [36/50] batch [620/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.1019 (0.7736) lr 6.9098e-04 eta 2:37:18
epoch [36/50] batch [640/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.4279 (0.7772) lr 6.9098e-04 eta 2:37:01
epoch [36/50] batch [660/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.8834 (0.7779) lr 6.9098e-04 eta 2:36:43
epoch [36/50] batch [680/796] time 0.841 (0.834) data 0.000 (0.001) loss 1.4649 (0.7784) lr 6.9098e-04 eta 2:36:26
epoch [36/50] batch [700/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.3107 (0.7824) lr 6.9098e-04 eta 2:36:09
epoch [36/50] batch [720/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.1246 (0.7791) lr 6.9098e-04 eta 2:35:52
epoch [36/50] batch [740/796] time 0.831 (0.834) data 0.000 (0.001) loss 0.3607 (0.7834) lr 6.9098e-04 eta 2:35:35
epoch [36/50] batch [760/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.0463 (0.7823) lr 6.9098e-04 eta 2:35:19
epoch [36/50] batch [780/796] time 0.832 (0.833) data 0.000 (0.001) loss 0.4995 (0.7777) lr 6.9098e-04 eta 2:35:01
epoch [37/50] batch [20/796] time 0.832 (0.865) data 0.000 (0.031) loss 0.2061 (0.6053) lr 6.3188e-04 eta 2:40:27
epoch [37/50] batch [40/796] time 0.842 (0.850) data 0.000 (0.016) loss 0.7485 (0.6035) lr 6.3188e-04 eta 2:37:15
epoch [37/50] batch [60/796] time 0.841 (0.843) data 0.000 (0.011) loss 0.7821 (0.6727) lr 6.3188e-04 eta 2:35:42
epoch [37/50] batch [80/796] time 0.841 (0.841) data 0.000 (0.008) loss 1.9146 (0.6878) lr 6.3188e-04 eta 2:35:03
epoch [37/50] batch [100/796] time 0.832 (0.839) data 0.000 (0.006) loss 0.4979 (0.6992) lr 6.3188e-04 eta 2:34:29
epoch [37/50] batch [120/796] time 0.833 (0.838) data 0.000 (0.005) loss 0.5108 (0.6899) lr 6.3188e-04 eta 2:34:01
epoch [37/50] batch [140/796] time 0.843 (0.837) data 0.000 (0.005) loss 0.1713 (0.7098) lr 6.3188e-04 eta 2:33:35
epoch [37/50] batch [160/796] time 0.833 (0.837) data 0.000 (0.004) loss 0.0755 (0.7092) lr 6.3188e-04 eta 2:33:10
epoch [37/50] batch [180/796] time 0.809 (0.836) data 0.000 (0.004) loss 0.1820 (0.6953) lr 6.3188e-04 eta 2:32:47
epoch [37/50] batch [200/796] time 0.842 (0.836) data 0.000 (0.003) loss 0.2731 (0.7084) lr 6.3188e-04 eta 2:32:26
epoch [37/50] batch [220/796] time 0.832 (0.836) data 0.000 (0.003) loss 0.0833 (0.7522) lr 6.3188e-04 eta 2:32:08
epoch [37/50] batch [240/796] time 0.842 (0.835) data 0.000 (0.003) loss 1.3475 (0.7456) lr 6.3188e-04 eta 2:31:49
epoch [37/50] batch [260/796] time 0.841 (0.835) data 0.000 (0.003) loss 1.1088 (0.7529) lr 6.3188e-04 eta 2:31:30
epoch [37/50] batch [280/796] time 0.819 (0.835) data 0.000 (0.002) loss 0.6250 (0.7580) lr 6.3188e-04 eta 2:31:12
epoch [37/50] batch [300/796] time 0.809 (0.835) data 0.000 (0.002) loss 0.4269 (0.7524) lr 6.3188e-04 eta 2:30:53
epoch [37/50] batch [320/796] time 0.841 (0.835) data 0.000 (0.002) loss 1.6611 (0.7497) lr 6.3188e-04 eta 2:30:35
epoch [37/50] batch [340/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.1341 (0.7439) lr 6.3188e-04 eta 2:30:16
epoch [37/50] batch [360/796] time 0.818 (0.834) data 0.000 (0.002) loss 0.3061 (0.7374) lr 6.3188e-04 eta 2:29:58
epoch [37/50] batch [380/796] time 0.832 (0.834) data 0.000 (0.002) loss 2.0036 (0.7472) lr 6.3188e-04 eta 2:29:39
epoch [37/50] batch [400/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.9972 (0.7611) lr 6.3188e-04 eta 2:29:23
epoch [37/50] batch [420/796] time 0.835 (0.834) data 0.000 (0.002) loss 1.1722 (0.7654) lr 6.3188e-04 eta 2:29:05
epoch [37/50] batch [440/796] time 0.841 (0.834) data 0.000 (0.002) loss 1.6653 (0.7723) lr 6.3188e-04 eta 2:28:47
epoch [37/50] batch [460/796] time 0.818 (0.834) data 0.000 (0.002) loss 0.1430 (0.7730) lr 6.3188e-04 eta 2:28:30
epoch [37/50] batch [480/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.2822 (0.7766) lr 6.3188e-04 eta 2:28:13
epoch [37/50] batch [500/796] time 0.832 (0.834) data 0.000 (0.001) loss 1.5093 (0.7791) lr 6.3188e-04 eta 2:27:57
epoch [37/50] batch [520/796] time 0.818 (0.834) data 0.000 (0.001) loss 2.4367 (0.7811) lr 6.3188e-04 eta 2:27:40
epoch [37/50] batch [540/796] time 0.809 (0.834) data 0.000 (0.001) loss 1.4671 (0.7788) lr 6.3188e-04 eta 2:27:23
epoch [37/50] batch [560/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.6492 (0.7726) lr 6.3188e-04 eta 2:27:06
epoch [37/50] batch [580/796] time 0.822 (0.834) data 0.000 (0.001) loss 1.3981 (0.7730) lr 6.3188e-04 eta 2:26:48
epoch [37/50] batch [600/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.2267 (0.7743) lr 6.3188e-04 eta 2:26:33
epoch [37/50] batch [620/796] time 0.832 (0.834) data 0.000 (0.001) loss 1.0930 (0.7832) lr 6.3188e-04 eta 2:26:15
epoch [37/50] batch [640/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.9283 (0.7893) lr 6.3188e-04 eta 2:25:58
epoch [37/50] batch [660/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.5664 (0.7942) lr 6.3188e-04 eta 2:25:42
epoch [37/50] batch [680/796] time 0.818 (0.834) data 0.000 (0.001) loss 2.9067 (0.7928) lr 6.3188e-04 eta 2:25:25
epoch [37/50] batch [700/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.8496 (0.7933) lr 6.3188e-04 eta 2:25:08
epoch [37/50] batch [720/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.0993 (0.7894) lr 6.3188e-04 eta 2:24:51
epoch [37/50] batch [740/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.3058 (0.7862) lr 6.3188e-04 eta 2:24:34
epoch [37/50] batch [760/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.0596 (0.7868) lr 6.3188e-04 eta 2:24:17
epoch [37/50] batch [780/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.2109 (0.7859) lr 6.3188e-04 eta 2:24:00
epoch [38/50] batch [20/796] time 0.819 (0.868) data 0.000 (0.036) loss 0.3453 (0.7626) lr 5.7422e-04 eta 2:29:25
epoch [38/50] batch [40/796] time 0.832 (0.849) data 0.000 (0.018) loss 0.5269 (0.7729) lr 5.7422e-04 eta 2:25:54
epoch [38/50] batch [60/796] time 0.842 (0.844) data 0.000 (0.012) loss 0.1051 (0.7508) lr 5.7422e-04 eta 2:24:45
epoch [38/50] batch [80/796] time 0.824 (0.842) data 0.000 (0.009) loss 1.7439 (0.8463) lr 5.7422e-04 eta 2:24:03
epoch [38/50] batch [100/796] time 0.818 (0.840) data 0.000 (0.007) loss 0.3705 (0.8540) lr 5.7422e-04 eta 2:23:31
epoch [38/50] batch [120/796] time 0.842 (0.839) data 0.000 (0.006) loss 0.9073 (0.8275) lr 5.7422e-04 eta 2:22:59
epoch [38/50] batch [140/796] time 0.844 (0.838) data 0.000 (0.005) loss 0.0394 (0.8357) lr 5.7422e-04 eta 2:22:36
epoch [38/50] batch [160/796] time 0.844 (0.838) data 0.000 (0.005) loss 1.5214 (0.8135) lr 5.7422e-04 eta 2:22:15
epoch [38/50] batch [180/796] time 0.834 (0.837) data 0.000 (0.004) loss 1.7624 (0.8005) lr 5.7422e-04 eta 2:21:53
epoch [38/50] batch [200/796] time 0.844 (0.837) data 0.000 (0.004) loss 1.1271 (0.8020) lr 5.7422e-04 eta 2:21:34
epoch [38/50] batch [220/796] time 0.843 (0.837) data 0.000 (0.003) loss 0.9552 (0.7857) lr 5.7422e-04 eta 2:21:14
epoch [38/50] batch [240/796] time 0.842 (0.836) data 0.000 (0.003) loss 0.2283 (0.7667) lr 5.7422e-04 eta 2:20:54
epoch [38/50] batch [260/796] time 0.842 (0.836) data 0.000 (0.003) loss 1.0043 (0.7729) lr 5.7422e-04 eta 2:20:38
epoch [38/50] batch [280/796] time 0.819 (0.836) data 0.000 (0.003) loss 3.8659 (0.7740) lr 5.7422e-04 eta 2:20:20
epoch [38/50] batch [300/796] time 0.842 (0.836) data 0.000 (0.003) loss 0.3674 (0.7544) lr 5.7422e-04 eta 2:20:02
epoch [38/50] batch [320/796] time 0.843 (0.836) data 0.000 (0.002) loss 0.1992 (0.7404) lr 5.7422e-04 eta 2:19:42
epoch [38/50] batch [340/796] time 0.831 (0.836) data 0.000 (0.002) loss 0.7083 (0.7467) lr 5.7422e-04 eta 2:19:25
epoch [38/50] batch [360/796] time 0.832 (0.836) data 0.000 (0.002) loss 0.1106 (0.7458) lr 5.7422e-04 eta 2:19:08
epoch [38/50] batch [380/796] time 0.826 (0.836) data 0.000 (0.002) loss 0.1574 (0.7415) lr 5.7422e-04 eta 2:18:48
epoch [38/50] batch [400/796] time 0.819 (0.835) data 0.000 (0.002) loss 0.6569 (0.7334) lr 5.7422e-04 eta 2:18:30
epoch [38/50] batch [420/796] time 0.841 (0.835) data 0.000 (0.002) loss 0.6676 (0.7335) lr 5.7422e-04 eta 2:18:11
epoch [38/50] batch [440/796] time 0.819 (0.835) data 0.000 (0.002) loss 0.0886 (0.7494) lr 5.7422e-04 eta 2:17:54
epoch [38/50] batch [460/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.3367 (0.7409) lr 5.7422e-04 eta 2:17:37
epoch [38/50] batch [480/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.1960 (0.7518) lr 5.7422e-04 eta 2:17:19
epoch [38/50] batch [500/796] time 0.843 (0.835) data 0.000 (0.002) loss 1.8962 (0.7566) lr 5.7422e-04 eta 2:17:01
epoch [38/50] batch [520/796] time 0.841 (0.835) data 0.000 (0.002) loss 1.7456 (0.7592) lr 5.7422e-04 eta 2:16:44
epoch [38/50] batch [540/796] time 0.841 (0.835) data 0.000 (0.002) loss 0.2528 (0.7664) lr 5.7422e-04 eta 2:16:27
epoch [38/50] batch [560/796] time 0.841 (0.835) data 0.000 (0.001) loss 0.0657 (0.7580) lr 5.7422e-04 eta 2:16:10
epoch [38/50] batch [580/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.3109 (0.7565) lr 5.7422e-04 eta 2:15:53
epoch [38/50] batch [600/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.8122 (0.7582) lr 5.7422e-04 eta 2:15:36
epoch [38/50] batch [620/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.0493 (0.7626) lr 5.7422e-04 eta 2:15:19
epoch [38/50] batch [640/796] time 0.819 (0.835) data 0.000 (0.001) loss 0.9219 (0.7625) lr 5.7422e-04 eta 2:15:01
epoch [38/50] batch [660/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.1985 (0.7678) lr 5.7422e-04 eta 2:14:43
epoch [38/50] batch [680/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.4815 (0.7697) lr 5.7422e-04 eta 2:14:26
epoch [38/50] batch [700/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.2859 (0.7622) lr 5.7422e-04 eta 2:14:09
epoch [38/50] batch [720/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.9792 (0.7626) lr 5.7422e-04 eta 2:13:51
epoch [38/50] batch [740/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.1228 (0.7636) lr 5.7422e-04 eta 2:13:34
epoch [38/50] batch [760/796] time 0.841 (0.834) data 0.000 (0.001) loss 1.6368 (0.7630) lr 5.7422e-04 eta 2:13:17
epoch [38/50] batch [780/796] time 0.848 (0.834) data 0.000 (0.001) loss 1.6464 (0.7650) lr 5.7422e-04 eta 2:13:00
epoch [39/50] batch [20/796] time 0.842 (0.865) data 0.000 (0.031) loss 0.4801 (0.8170) lr 5.1825e-04 eta 2:17:24
epoch [39/50] batch [40/796] time 0.810 (0.850) data 0.000 (0.016) loss 0.2507 (0.7418) lr 5.1825e-04 eta 2:14:42
epoch [39/50] batch [60/796] time 0.832 (0.845) data 0.000 (0.011) loss 0.1140 (0.6894) lr 5.1825e-04 eta 2:13:37
epoch [39/50] batch [80/796] time 0.832 (0.842) data 0.000 (0.008) loss 0.3384 (0.6989) lr 5.1825e-04 eta 2:12:51
epoch [39/50] batch [100/796] time 0.832 (0.840) data 0.000 (0.006) loss 1.2634 (0.7271) lr 5.1825e-04 eta 2:12:19
epoch [39/50] batch [120/796] time 0.833 (0.838) data 0.000 (0.005) loss 0.1184 (0.7342) lr 5.1825e-04 eta 2:11:48
epoch [39/50] batch [140/796] time 0.809 (0.838) data 0.000 (0.005) loss 0.0932 (0.7325) lr 5.1825e-04 eta 2:11:24
epoch [39/50] batch [160/796] time 0.843 (0.837) data 0.000 (0.004) loss 0.9354 (0.7317) lr 5.1825e-04 eta 2:11:03
epoch [39/50] batch [180/796] time 0.841 (0.837) data 0.000 (0.004) loss 2.0047 (0.7488) lr 5.1825e-04 eta 2:10:42
epoch [39/50] batch [200/796] time 0.834 (0.836) data 0.000 (0.003) loss 0.0918 (0.7433) lr 5.1825e-04 eta 2:10:21
epoch [39/50] batch [220/796] time 0.832 (0.836) data 0.000 (0.003) loss 3.1528 (0.7390) lr 5.1825e-04 eta 2:10:00
epoch [39/50] batch [240/796] time 0.819 (0.836) data 0.000 (0.003) loss 0.2737 (0.7379) lr 5.1825e-04 eta 2:09:40
epoch [39/50] batch [260/796] time 0.834 (0.835) data 0.000 (0.003) loss 0.4406 (0.7310) lr 5.1825e-04 eta 2:09:18
epoch [39/50] batch [280/796] time 0.818 (0.835) data 0.000 (0.002) loss 0.8133 (0.7478) lr 5.1825e-04 eta 2:08:59
epoch [39/50] batch [300/796] time 0.810 (0.834) data 0.000 (0.002) loss 0.0404 (0.7372) lr 5.1825e-04 eta 2:08:40
epoch [39/50] batch [320/796] time 0.843 (0.834) data 0.000 (0.002) loss 1.1806 (0.7323) lr 5.1825e-04 eta 2:08:23
epoch [39/50] batch [340/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.7395 (0.7388) lr 5.1825e-04 eta 2:08:06
epoch [39/50] batch [360/796] time 0.832 (0.834) data 0.000 (0.002) loss 0.2844 (0.7326) lr 5.1825e-04 eta 2:07:47
epoch [39/50] batch [380/796] time 0.844 (0.834) data 0.000 (0.002) loss 1.0441 (0.7309) lr 5.1825e-04 eta 2:07:33
epoch [39/50] batch [400/796] time 0.844 (0.835) data 0.000 (0.002) loss 0.5162 (0.7334) lr 5.1825e-04 eta 2:07:18
epoch [39/50] batch [420/796] time 0.808 (0.835) data 0.000 (0.002) loss 0.0562 (0.7413) lr 5.1825e-04 eta 2:07:01
epoch [39/50] batch [440/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.7598 (0.7471) lr 5.1825e-04 eta 2:06:44
epoch [39/50] batch [460/796] time 0.820 (0.835) data 0.000 (0.002) loss 0.2190 (0.7443) lr 5.1825e-04 eta 2:06:27
epoch [39/50] batch [480/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.6199 (0.7483) lr 5.1825e-04 eta 2:06:10
epoch [39/50] batch [500/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.8000 (0.7484) lr 5.1825e-04 eta 2:05:53
epoch [39/50] batch [520/796] time 0.809 (0.834) data 0.000 (0.001) loss 0.4782 (0.7491) lr 5.1825e-04 eta 2:05:35
epoch [39/50] batch [540/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.7399 (0.7394) lr 5.1825e-04 eta 2:05:17
epoch [39/50] batch [560/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.3235 (0.7404) lr 5.1825e-04 eta 2:05:00
epoch [39/50] batch [580/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.2123 (0.7434) lr 5.1825e-04 eta 2:04:43
epoch [39/50] batch [600/796] time 0.819 (0.834) data 0.000 (0.001) loss 1.8590 (0.7454) lr 5.1825e-04 eta 2:04:26
epoch [39/50] batch [620/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.1059 (0.7484) lr 5.1825e-04 eta 2:04:09
epoch [39/50] batch [640/796] time 0.819 (0.834) data 0.000 (0.001) loss 1.6161 (0.7481) lr 5.1825e-04 eta 2:03:52
epoch [39/50] batch [660/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.2028 (0.7509) lr 5.1825e-04 eta 2:03:35
epoch [39/50] batch [680/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.7276 (0.7527) lr 5.1825e-04 eta 2:03:18
epoch [39/50] batch [700/796] time 0.822 (0.834) data 0.000 (0.001) loss 1.2628 (0.7513) lr 5.1825e-04 eta 2:03:01
epoch [39/50] batch [720/796] time 0.825 (0.834) data 0.000 (0.001) loss 0.4930 (0.7569) lr 5.1825e-04 eta 2:02:43
epoch [39/50] batch [740/796] time 0.831 (0.834) data 0.000 (0.001) loss 0.0282 (0.7540) lr 5.1825e-04 eta 2:02:26
epoch [39/50] batch [760/796] time 0.831 (0.834) data 0.000 (0.001) loss 0.6967 (0.7599) lr 5.1825e-04 eta 2:02:08
epoch [39/50] batch [780/796] time 0.842 (0.833) data 0.000 (0.001) loss 0.4471 (0.7617) lr 5.1825e-04 eta 2:01:51
epoch [40/50] batch [20/796] time 0.841 (0.862) data 0.000 (0.028) loss 0.8276 (0.9063) lr 4.6417e-04 eta 2:05:27
epoch [40/50] batch [40/796] time 0.845 (0.849) data 0.000 (0.014) loss 0.7303 (0.8799) lr 4.6417e-04 eta 2:03:16
epoch [40/50] batch [60/796] time 0.818 (0.843) data 0.000 (0.010) loss 0.0501 (0.7961) lr 4.6417e-04 eta 2:02:06
epoch [40/50] batch [80/796] time 0.843 (0.841) data 0.000 (0.007) loss 1.5990 (0.8586) lr 4.6417e-04 eta 2:01:33
epoch [40/50] batch [100/796] time 0.843 (0.838) data 0.000 (0.006) loss 0.0687 (0.8208) lr 4.6417e-04 eta 2:00:57
epoch [40/50] batch [120/796] time 0.839 (0.838) data 0.000 (0.005) loss 0.1494 (0.7905) lr 4.6417e-04 eta 2:00:38
epoch [40/50] batch [140/796] time 0.843 (0.838) data 0.000 (0.004) loss 0.4030 (0.7779) lr 4.6417e-04 eta 2:00:16
epoch [40/50] batch [160/796] time 0.843 (0.837) data 0.000 (0.004) loss 1.2792 (0.7826) lr 4.6417e-04 eta 1:59:53
epoch [40/50] batch [180/796] time 0.818 (0.836) data 0.000 (0.003) loss 0.5730 (0.7760) lr 4.6417e-04 eta 1:59:32
epoch [40/50] batch [200/796] time 0.818 (0.836) data 0.000 (0.003) loss 0.0598 (0.7755) lr 4.6417e-04 eta 1:59:12
epoch [40/50] batch [220/796] time 0.832 (0.835) data 0.000 (0.003) loss 0.9749 (0.7727) lr 4.6417e-04 eta 1:58:50
epoch [40/50] batch [240/796] time 0.840 (0.835) data 0.000 (0.003) loss 3.3745 (0.7966) lr 4.6417e-04 eta 1:58:29
epoch [40/50] batch [260/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.1240 (0.7855) lr 4.6417e-04 eta 1:58:10
epoch [40/50] batch [280/796] time 0.819 (0.834) data 0.000 (0.002) loss 0.8603 (0.7898) lr 4.6417e-04 eta 1:57:53
epoch [40/50] batch [300/796] time 0.809 (0.834) data 0.000 (0.002) loss 0.1274 (0.7914) lr 4.6417e-04 eta 1:57:36
epoch [40/50] batch [320/796] time 0.819 (0.834) data 0.000 (0.002) loss 0.4475 (0.7910) lr 4.6417e-04 eta 1:57:18
epoch [40/50] batch [340/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.0941 (0.7807) lr 4.6417e-04 eta 1:57:01
epoch [40/50] batch [360/796] time 0.818 (0.834) data 0.000 (0.002) loss 0.3374 (0.7745) lr 4.6417e-04 eta 1:56:43
epoch [40/50] batch [380/796] time 0.819 (0.834) data 0.000 (0.002) loss 0.4926 (0.7681) lr 4.6417e-04 eta 1:56:26
epoch [40/50] batch [400/796] time 0.844 (0.834) data 0.000 (0.002) loss 0.0839 (0.7667) lr 4.6417e-04 eta 1:56:08
epoch [40/50] batch [420/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.3653 (0.7589) lr 4.6417e-04 eta 1:55:50
epoch [40/50] batch [440/796] time 0.843 (0.834) data 0.000 (0.001) loss 1.7967 (0.7650) lr 4.6417e-04 eta 1:55:33
epoch [40/50] batch [460/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.7621 (0.7640) lr 4.6417e-04 eta 1:55:16
epoch [40/50] batch [480/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.8087 (0.7682) lr 4.6417e-04 eta 1:55:00
epoch [40/50] batch [500/796] time 0.844 (0.834) data 0.000 (0.001) loss 0.3166 (0.7689) lr 4.6417e-04 eta 1:54:44
epoch [40/50] batch [520/796] time 0.827 (0.834) data 0.000 (0.001) loss 1.0566 (0.7709) lr 4.6417e-04 eta 1:54:27
epoch [40/50] batch [540/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.3422 (0.7699) lr 4.6417e-04 eta 1:54:10
epoch [40/50] batch [560/796] time 0.818 (0.834) data 0.000 (0.001) loss 0.2015 (0.7649) lr 4.6417e-04 eta 1:53:55
epoch [40/50] batch [580/796] time 0.841 (0.834) data 0.000 (0.001) loss 1.4310 (0.7767) lr 4.6417e-04 eta 1:53:39
epoch [40/50] batch [600/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.0950 (0.7727) lr 4.6417e-04 eta 1:53:22
epoch [40/50] batch [620/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.5459 (0.7721) lr 4.6417e-04 eta 1:53:05
epoch [40/50] batch [640/796] time 0.832 (0.834) data 0.000 (0.001) loss 2.5646 (0.7825) lr 4.6417e-04 eta 1:52:48
epoch [40/50] batch [660/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.7508 (0.7778) lr 4.6417e-04 eta 1:52:33
epoch [40/50] batch [680/796] time 0.833 (0.834) data 0.000 (0.001) loss 1.3159 (0.7787) lr 4.6417e-04 eta 1:52:16
epoch [40/50] batch [700/796] time 0.849 (0.834) data 0.000 (0.001) loss 0.5276 (0.7878) lr 4.6417e-04 eta 1:51:59
epoch [40/50] batch [720/796] time 0.843 (0.834) data 0.000 (0.001) loss 1.5728 (0.7826) lr 4.6417e-04 eta 1:51:42
epoch [40/50] batch [740/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.3189 (0.7802) lr 4.6417e-04 eta 1:51:26
epoch [40/50] batch [760/796] time 0.834 (0.834) data 0.000 (0.001) loss 0.1826 (0.7807) lr 4.6417e-04 eta 1:51:08
epoch [40/50] batch [780/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.9501 (0.7778) lr 4.6417e-04 eta 1:50:52
epoch [41/50] batch [20/796] time 0.819 (0.866) data 0.000 (0.032) loss 1.9857 (0.8625) lr 4.1221e-04 eta 1:54:33
epoch [41/50] batch [40/796] time 0.841 (0.850) data 0.001 (0.016) loss 1.0637 (0.8283) lr 4.1221e-04 eta 1:52:08
epoch [41/50] batch [60/796] time 0.842 (0.844) data 0.000 (0.011) loss 0.8613 (0.8170) lr 4.1221e-04 eta 1:51:09
epoch [41/50] batch [80/796] time 0.842 (0.841) data 0.000 (0.008) loss 0.8887 (0.7947) lr 4.1221e-04 eta 1:50:28
epoch [41/50] batch [100/796] time 0.821 (0.839) data 0.000 (0.007) loss 0.1271 (0.7855) lr 4.1221e-04 eta 1:49:57
epoch [41/50] batch [120/796] time 0.843 (0.839) data 0.000 (0.006) loss 0.1319 (0.7893) lr 4.1221e-04 eta 1:49:33
epoch [41/50] batch [140/796] time 0.842 (0.837) data 0.000 (0.005) loss 0.3609 (0.7872) lr 4.1221e-04 eta 1:49:07
epoch [41/50] batch [160/796] time 0.809 (0.836) data 0.000 (0.004) loss 0.8687 (0.7772) lr 4.1221e-04 eta 1:48:43
epoch [41/50] batch [180/796] time 0.810 (0.836) data 0.000 (0.004) loss 0.2688 (0.7700) lr 4.1221e-04 eta 1:48:22
epoch [41/50] batch [200/796] time 0.831 (0.836) data 0.000 (0.003) loss 0.8318 (0.7662) lr 4.1221e-04 eta 1:48:04
epoch [41/50] batch [220/796] time 0.821 (0.835) data 0.000 (0.003) loss 1.6909 (0.7768) lr 4.1221e-04 eta 1:47:45
epoch [41/50] batch [240/796] time 0.841 (0.835) data 0.000 (0.003) loss 2.3406 (0.7809) lr 4.1221e-04 eta 1:47:27
epoch [41/50] batch [260/796] time 0.833 (0.835) data 0.000 (0.003) loss 0.6909 (0.7678) lr 4.1221e-04 eta 1:47:09
epoch [41/50] batch [280/796] time 0.809 (0.835) data 0.000 (0.002) loss 0.1466 (0.7805) lr 4.1221e-04 eta 1:46:51
epoch [41/50] batch [300/796] time 0.841 (0.835) data 0.000 (0.002) loss 0.4327 (0.7801) lr 4.1221e-04 eta 1:46:34
epoch [41/50] batch [320/796] time 0.841 (0.835) data 0.000 (0.002) loss 0.6810 (0.7967) lr 4.1221e-04 eta 1:46:16
epoch [41/50] batch [340/796] time 0.841 (0.835) data 0.000 (0.002) loss 0.0149 (0.7804) lr 4.1221e-04 eta 1:46:00
epoch [41/50] batch [360/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.8551 (0.7789) lr 4.1221e-04 eta 1:45:42
epoch [41/50] batch [380/796] time 0.841 (0.834) data 0.000 (0.002) loss 0.3093 (0.7673) lr 4.1221e-04 eta 1:45:25
epoch [41/50] batch [400/796] time 0.841 (0.834) data 0.000 (0.002) loss 1.8211 (0.7710) lr 4.1221e-04 eta 1:45:07
epoch [41/50] batch [420/796] time 0.818 (0.834) data 0.000 (0.002) loss 0.1489 (0.7732) lr 4.1221e-04 eta 1:44:50
epoch [41/50] batch [440/796] time 0.817 (0.834) data 0.000 (0.002) loss 0.6146 (0.7613) lr 4.1221e-04 eta 1:44:33
epoch [41/50] batch [460/796] time 0.819 (0.834) data 0.000 (0.002) loss 1.2777 (0.7711) lr 4.1221e-04 eta 1:44:16
epoch [41/50] batch [480/796] time 0.820 (0.834) data 0.000 (0.002) loss 0.2798 (0.7741) lr 4.1221e-04 eta 1:43:59
epoch [41/50] batch [500/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.8264 (0.7769) lr 4.1221e-04 eta 1:43:42
epoch [41/50] batch [520/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.1162 (0.7699) lr 4.1221e-04 eta 1:43:25
epoch [41/50] batch [540/796] time 0.845 (0.834) data 0.000 (0.001) loss 0.0918 (0.7652) lr 4.1221e-04 eta 1:43:08
epoch [41/50] batch [560/796] time 0.818 (0.834) data 0.000 (0.001) loss 0.1550 (0.7651) lr 4.1221e-04 eta 1:42:51
epoch [41/50] batch [580/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.0422 (0.7583) lr 4.1221e-04 eta 1:42:34
epoch [41/50] batch [600/796] time 0.840 (0.834) data 0.000 (0.001) loss 1.3198 (0.7713) lr 4.1221e-04 eta 1:42:17
epoch [41/50] batch [620/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.3190 (0.7695) lr 4.1221e-04 eta 1:42:00
epoch [41/50] batch [640/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.4493 (0.7705) lr 4.1221e-04 eta 1:41:43
epoch [41/50] batch [660/796] time 0.857 (0.834) data 0.000 (0.001) loss 0.2082 (0.7656) lr 4.1221e-04 eta 1:41:27
epoch [41/50] batch [680/796] time 1.000 (0.838) data 0.000 (0.001) loss 0.4379 (0.7651) lr 4.1221e-04 eta 1:41:39
epoch [41/50] batch [700/796] time 0.907 (0.841) data 0.000 (0.001) loss 0.1372 (0.7607) lr 4.1221e-04 eta 1:41:48
epoch [41/50] batch [720/796] time 0.844 (0.845) data 0.000 (0.001) loss 1.3973 (0.7626) lr 4.1221e-04 eta 1:41:57
epoch [41/50] batch [740/796] time 0.857 (0.847) data 0.000 (0.001) loss 0.4845 (0.7601) lr 4.1221e-04 eta 1:41:52
epoch [41/50] batch [760/796] time 0.843 (0.851) data 0.000 (0.001) loss 0.6195 (0.7540) lr 4.1221e-04 eta 1:42:09
epoch [41/50] batch [780/796] time 0.921 (0.852) data 0.000 (0.001) loss 0.4595 (0.7536) lr 4.1221e-04 eta 1:41:55
epoch [42/50] batch [20/796] time 1.093 (0.999) data 0.000 (0.029) loss 2.0736 (0.9549) lr 3.6258e-04 eta 1:58:58
epoch [42/50] batch [40/796] time 0.921 (0.976) data 0.000 (0.015) loss 0.2235 (0.9421) lr 3.6258e-04 eta 1:55:54
epoch [42/50] batch [60/796] time 0.824 (0.974) data 0.000 (0.010) loss 0.3121 (0.8858) lr 3.6258e-04 eta 1:55:22
epoch [42/50] batch [80/796] time 0.819 (0.938) data 0.000 (0.007) loss 0.7205 (0.8030) lr 3.6258e-04 eta 1:50:47
epoch [42/50] batch [100/796] time 0.834 (0.917) data 0.000 (0.006) loss 0.7031 (0.7596) lr 3.6258e-04 eta 1:47:58
epoch [42/50] batch [120/796] time 0.842 (0.903) data 0.000 (0.005) loss 0.6382 (0.7404) lr 3.6258e-04 eta 1:46:01
epoch [42/50] batch [140/796] time 0.841 (0.893) data 0.000 (0.004) loss 1.5268 (0.7700) lr 3.6258e-04 eta 1:44:32
epoch [42/50] batch [160/796] time 0.895 (0.887) data 0.000 (0.004) loss 0.1804 (0.7876) lr 3.6258e-04 eta 1:43:35
epoch [42/50] batch [180/796] time 0.872 (0.897) data 0.000 (0.003) loss 0.3048 (0.7868) lr 3.6258e-04 eta 1:44:27
epoch [42/50] batch [200/796] time 0.833 (0.908) data 0.000 (0.003) loss 2.2016 (0.7839) lr 3.6258e-04 eta 1:45:20
epoch [42/50] batch [220/796] time 0.821 (0.914) data 0.000 (0.003) loss 1.0023 (0.7912) lr 3.6258e-04 eta 1:45:45
epoch [42/50] batch [240/796] time 0.970 (0.911) data 0.000 (0.003) loss 0.1551 (0.7975) lr 3.6258e-04 eta 1:45:05
epoch [42/50] batch [260/796] time 0.863 (0.915) data 0.000 (0.002) loss 0.8945 (0.8123) lr 3.6258e-04 eta 1:45:18
epoch [42/50] batch [280/796] time 0.944 (0.918) data 0.000 (0.002) loss 0.4416 (0.8013) lr 3.6258e-04 eta 1:45:17
epoch [42/50] batch [300/796] time 0.842 (0.921) data 0.000 (0.002) loss 0.3753 (0.8087) lr 3.6258e-04 eta 1:45:23
epoch [42/50] batch [320/796] time 0.842 (0.921) data 0.000 (0.002) loss 0.0964 (0.8156) lr 3.6258e-04 eta 1:45:00
epoch [42/50] batch [340/796] time 0.820 (0.927) data 0.000 (0.002) loss 0.1274 (0.8062) lr 3.6258e-04 eta 1:45:26
epoch [42/50] batch [360/796] time 0.938 (0.925) data 0.000 (0.002) loss 1.0032 (0.8103) lr 3.6258e-04 eta 1:44:51
epoch [42/50] batch [380/796] time 0.843 (0.925) data 0.000 (0.002) loss 0.4814 (0.7979) lr 3.6258e-04 eta 1:44:35
epoch [42/50] batch [400/796] time 0.840 (0.920) data 0.000 (0.002) loss 2.1009 (0.8031) lr 3.6258e-04 eta 1:43:45
epoch [42/50] batch [420/796] time 0.832 (0.916) data 0.000 (0.002) loss 0.6600 (0.7913) lr 3.6258e-04 eta 1:42:58
epoch [42/50] batch [440/796] time 0.842 (0.912) data 0.000 (0.002) loss 0.5505 (0.7935) lr 3.6258e-04 eta 1:42:14
epoch [42/50] batch [460/796] time 0.842 (0.909) data 0.000 (0.001) loss 0.1260 (0.7879) lr 3.6258e-04 eta 1:41:32
epoch [42/50] batch [480/796] time 0.824 (0.906) data 0.000 (0.001) loss 0.0459 (0.7820) lr 3.6258e-04 eta 1:40:54
epoch [42/50] batch [500/796] time 0.840 (0.903) data 0.000 (0.001) loss 0.7233 (0.7874) lr 3.6258e-04 eta 1:40:16
epoch [42/50] batch [520/796] time 0.818 (0.900) data 0.000 (0.001) loss 1.3979 (0.7899) lr 3.6258e-04 eta 1:39:40
epoch [42/50] batch [540/796] time 0.843 (0.898) data 0.000 (0.001) loss 0.7722 (0.7827) lr 3.6258e-04 eta 1:39:05
epoch [42/50] batch [560/796] time 0.819 (0.895) data 0.000 (0.001) loss 0.2484 (0.7833) lr 3.6258e-04 eta 1:38:32
epoch [42/50] batch [580/796] time 0.842 (0.893) data 0.000 (0.001) loss 2.0167 (0.7813) lr 3.6258e-04 eta 1:38:00
epoch [42/50] batch [600/796] time 0.843 (0.891) data 0.000 (0.001) loss 0.1403 (0.7809) lr 3.6258e-04 eta 1:37:29
epoch [42/50] batch [620/796] time 0.843 (0.889) data 0.000 (0.001) loss 0.3578 (0.7719) lr 3.6258e-04 eta 1:36:59
epoch [42/50] batch [640/796] time 0.844 (0.887) data 0.000 (0.001) loss 0.5056 (0.7717) lr 3.6258e-04 eta 1:36:29
epoch [42/50] batch [660/796] time 0.820 (0.886) data 0.000 (0.001) loss 0.0891 (0.7644) lr 3.6258e-04 eta 1:36:00
epoch [42/50] batch [680/796] time 0.834 (0.884) data 0.000 (0.001) loss 0.5773 (0.7631) lr 3.6258e-04 eta 1:35:32
epoch [42/50] batch [700/796] time 0.832 (0.883) data 0.000 (0.001) loss 0.2821 (0.7654) lr 3.6258e-04 eta 1:35:05
epoch [42/50] batch [720/796] time 0.843 (0.881) data 0.000 (0.001) loss 0.2282 (0.7668) lr 3.6258e-04 eta 1:34:39
epoch [42/50] batch [740/796] time 0.921 (0.881) data 0.000 (0.001) loss 0.0455 (0.7639) lr 3.6258e-04 eta 1:34:18
epoch [42/50] batch [760/796] time 0.929 (0.883) data 0.000 (0.001) loss 0.0879 (0.7591) lr 3.6258e-04 eta 1:34:17
epoch [42/50] batch [780/796] time 0.958 (0.885) data 0.000 (0.001) loss 1.1865 (0.7623) lr 3.6258e-04 eta 1:34:11
epoch [43/50] batch [20/796] time 1.107 (0.984) data 0.000 (0.037) loss 0.6515 (1.0627) lr 3.1545e-04 eta 1:44:07
epoch [43/50] batch [40/796] time 0.837 (0.983) data 0.000 (0.019) loss 0.4794 (0.8422) lr 3.1545e-04 eta 1:43:41
epoch [43/50] batch [60/796] time 0.817 (0.981) data 0.001 (0.013) loss 1.1475 (0.8080) lr 3.1545e-04 eta 1:43:10
epoch [43/50] batch [80/796] time 1.024 (0.961) data 0.000 (0.010) loss 1.6906 (0.8250) lr 3.1545e-04 eta 1:40:43
epoch [43/50] batch [100/796] time 0.848 (0.981) data 0.000 (0.008) loss 0.8164 (0.7734) lr 3.1545e-04 eta 1:42:27
epoch [43/50] batch [120/796] time 0.929 (0.962) data 0.000 (0.007) loss 0.2720 (0.7518) lr 3.1545e-04 eta 1:40:12
epoch [43/50] batch [140/796] time 0.833 (0.959) data 0.000 (0.006) loss 0.9327 (0.7874) lr 3.1545e-04 eta 1:39:34
epoch [43/50] batch [160/796] time 0.820 (0.944) data 0.000 (0.005) loss 0.5278 (0.7764) lr 3.1545e-04 eta 1:37:37
epoch [43/50] batch [180/796] time 0.843 (0.931) data 0.000 (0.004) loss 0.8549 (0.7885) lr 3.1545e-04 eta 1:36:01
epoch [43/50] batch [200/796] time 0.832 (0.921) data 0.000 (0.004) loss 1.2027 (0.7755) lr 3.1545e-04 eta 1:34:42
epoch [43/50] batch [220/796] time 0.837 (0.913) data 0.000 (0.004) loss 0.2978 (0.7716) lr 3.1545e-04 eta 1:33:34
epoch [43/50] batch [240/796] time 0.831 (0.907) data 0.000 (0.003) loss 0.2125 (0.7691) lr 3.1545e-04 eta 1:32:35
epoch [43/50] batch [260/796] time 0.843 (0.901) data 0.000 (0.003) loss 0.4340 (0.7672) lr 3.1545e-04 eta 1:31:42
epoch [43/50] batch [280/796] time 0.833 (0.896) data 0.000 (0.003) loss 0.1401 (0.7706) lr 3.1545e-04 eta 1:30:53
epoch [43/50] batch [300/796] time 0.821 (0.892) data 0.000 (0.003) loss 1.1368 (0.7665) lr 3.1545e-04 eta 1:30:10
epoch [43/50] batch [320/796] time 0.841 (0.888) data 0.000 (0.003) loss 1.6970 (0.7734) lr 3.1545e-04 eta 1:29:29
epoch [43/50] batch [340/796] time 0.833 (0.885) data 0.000 (0.002) loss 0.6357 (0.7573) lr 3.1545e-04 eta 1:28:53
epoch [43/50] batch [360/796] time 0.820 (0.882) data 0.000 (0.002) loss 0.0509 (0.7578) lr 3.1545e-04 eta 1:28:18
epoch [43/50] batch [380/796] time 0.821 (0.879) data 0.000 (0.002) loss 0.5246 (0.7531) lr 3.1545e-04 eta 1:27:44
epoch [43/50] batch [400/796] time 0.849 (0.877) data 0.000 (0.002) loss 0.9644 (0.7551) lr 3.1545e-04 eta 1:27:14
epoch [43/50] batch [420/796] time 0.893 (0.881) data 0.000 (0.002) loss 0.2661 (0.7632) lr 3.1545e-04 eta 1:27:18
epoch [43/50] batch [440/796] time 0.813 (0.885) data 0.000 (0.002) loss 0.8625 (0.7674) lr 3.1545e-04 eta 1:27:25
epoch [43/50] batch [460/796] time 0.822 (0.889) data 0.000 (0.002) loss 0.1682 (0.7701) lr 3.1545e-04 eta 1:27:31
epoch [43/50] batch [480/796] time 0.840 (0.890) data 0.000 (0.002) loss 0.6729 (0.7700) lr 3.1545e-04 eta 1:27:21
epoch [43/50] batch [500/796] time 0.843 (0.895) data 0.000 (0.002) loss 0.0789 (0.7672) lr 3.1545e-04 eta 1:27:31
epoch [43/50] batch [520/796] time 1.718 (0.897) data 0.000 (0.002) loss 0.3589 (0.7686) lr 3.1545e-04 eta 1:27:24
epoch [43/50] batch [540/796] time 0.925 (0.897) data 0.000 (0.002) loss 0.3558 (0.7769) lr 3.1545e-04 eta 1:27:06
epoch [43/50] batch [560/796] time 0.876 (0.899) data 0.000 (0.002) loss 0.6782 (0.7765) lr 3.1545e-04 eta 1:27:01
epoch [43/50] batch [580/796] time 0.951 (0.901) data 0.000 (0.002) loss 0.2610 (0.7723) lr 3.1545e-04 eta 1:26:54
epoch [43/50] batch [600/796] time 0.846 (0.903) data 0.001 (0.002) loss 1.0823 (0.7646) lr 3.1545e-04 eta 1:26:47
epoch [43/50] batch [620/796] time 0.842 (0.901) data 0.000 (0.001) loss 1.1932 (0.7603) lr 3.1545e-04 eta 1:26:16
epoch [43/50] batch [640/796] time 0.843 (0.898) data 0.000 (0.001) loss 0.4234 (0.7662) lr 3.1545e-04 eta 1:25:46
epoch [43/50] batch [660/796] time 0.843 (0.897) data 0.000 (0.001) loss 0.7366 (0.7654) lr 3.1545e-04 eta 1:25:17
epoch [43/50] batch [680/796] time 0.842 (0.895) data 0.000 (0.001) loss 0.2897 (0.7647) lr 3.1545e-04 eta 1:24:49
epoch [43/50] batch [700/796] time 0.843 (0.893) data 0.000 (0.001) loss 0.8493 (0.7648) lr 3.1545e-04 eta 1:24:21
epoch [43/50] batch [720/796] time 0.843 (0.891) data 0.000 (0.001) loss 0.2461 (0.7649) lr 3.1545e-04 eta 1:23:54
epoch [43/50] batch [740/796] time 0.833 (0.890) data 0.000 (0.001) loss 0.2598 (0.7665) lr 3.1545e-04 eta 1:23:27
epoch [43/50] batch [760/796] time 0.834 (0.888) data 0.000 (0.001) loss 1.0531 (0.7653) lr 3.1545e-04 eta 1:23:00
epoch [43/50] batch [780/796] time 0.842 (0.887) data 0.000 (0.001) loss 0.2364 (0.7704) lr 3.1545e-04 eta 1:22:35
epoch [44/50] batch [20/796] time 0.819 (0.864) data 0.000 (0.033) loss 0.3740 (0.6145) lr 2.7103e-04 eta 1:19:58
epoch [44/50] batch [40/796] time 0.833 (0.850) data 0.000 (0.017) loss 0.9495 (0.7006) lr 2.7103e-04 eta 1:18:20
epoch [44/50] batch [60/796] time 0.842 (0.844) data 0.000 (0.011) loss 0.9653 (0.7305) lr 2.7103e-04 eta 1:17:34
epoch [44/50] batch [80/796] time 0.831 (0.842) data 0.000 (0.008) loss 1.8776 (0.7425) lr 2.7103e-04 eta 1:17:01
epoch [44/50] batch [100/796] time 0.820 (0.840) data 0.000 (0.007) loss 0.1361 (0.7383) lr 2.7103e-04 eta 1:16:33
epoch [44/50] batch [120/796] time 0.842 (0.838) data 0.000 (0.006) loss 0.1556 (0.7476) lr 2.7103e-04 eta 1:16:08
epoch [44/50] batch [140/796] time 0.819 (0.837) data 0.000 (0.005) loss 0.6529 (0.7920) lr 2.7103e-04 eta 1:15:47
epoch [44/50] batch [160/796] time 0.821 (0.837) data 0.000 (0.004) loss 0.4047 (0.8075) lr 2.7103e-04 eta 1:15:27
epoch [44/50] batch [180/796] time 0.819 (0.836) data 0.000 (0.004) loss 0.8720 (0.8125) lr 2.7103e-04 eta 1:15:09
epoch [44/50] batch [200/796] time 0.819 (0.836) data 0.000 (0.003) loss 1.4523 (0.7991) lr 2.7103e-04 eta 1:14:50
epoch [44/50] batch [220/796] time 0.841 (0.836) data 0.000 (0.003) loss 0.5983 (0.8014) lr 2.7103e-04 eta 1:14:32
epoch [44/50] batch [240/796] time 0.833 (0.835) data 0.000 (0.003) loss 0.5760 (0.7855) lr 2.7103e-04 eta 1:14:13
epoch [44/50] batch [260/796] time 0.842 (0.835) data 0.000 (0.003) loss 0.1280 (0.7896) lr 2.7103e-04 eta 1:13:54
epoch [44/50] batch [280/796] time 0.831 (0.835) data 0.000 (0.003) loss 1.4303 (0.7983) lr 2.7103e-04 eta 1:13:37
epoch [44/50] batch [300/796] time 0.841 (0.834) data 0.000 (0.002) loss 2.0866 (0.8108) lr 2.7103e-04 eta 1:13:19
epoch [44/50] batch [320/796] time 0.831 (0.834) data 0.000 (0.002) loss 0.5177 (0.8095) lr 2.7103e-04 eta 1:13:01
epoch [44/50] batch [340/796] time 0.823 (0.834) data 0.000 (0.002) loss 0.3949 (0.8014) lr 2.7103e-04 eta 1:12:44
epoch [44/50] batch [360/796] time 0.833 (0.834) data 0.000 (0.002) loss 0.0573 (0.7921) lr 2.7103e-04 eta 1:12:27
epoch [44/50] batch [380/796] time 0.818 (0.834) data 0.000 (0.002) loss 0.6280 (0.7824) lr 2.7103e-04 eta 1:12:09
epoch [44/50] batch [400/796] time 0.834 (0.834) data 0.000 (0.002) loss 3.1594 (0.7822) lr 2.7103e-04 eta 1:11:52
epoch [44/50] batch [420/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.4902 (0.7773) lr 2.7103e-04 eta 1:11:35
epoch [44/50] batch [440/796] time 0.819 (0.834) data 0.000 (0.002) loss 1.6782 (0.7879) lr 2.7103e-04 eta 1:11:18
epoch [44/50] batch [460/796] time 0.842 (0.834) data 0.000 (0.002) loss 1.8903 (0.7936) lr 2.7103e-04 eta 1:11:01
epoch [44/50] batch [480/796] time 0.833 (0.834) data 0.000 (0.002) loss 0.8084 (0.8009) lr 2.7103e-04 eta 1:10:44
epoch [44/50] batch [500/796] time 0.841 (0.834) data 0.000 (0.002) loss 0.0331 (0.8063) lr 2.7103e-04 eta 1:10:27
epoch [44/50] batch [520/796] time 0.834 (0.833) data 0.000 (0.001) loss 2.9379 (0.8084) lr 2.7103e-04 eta 1:10:10
epoch [44/50] batch [540/796] time 0.832 (0.833) data 0.000 (0.001) loss 0.2889 (0.8096) lr 2.7103e-04 eta 1:09:53
epoch [44/50] batch [560/796] time 0.820 (0.833) data 0.000 (0.001) loss 0.5645 (0.8061) lr 2.7103e-04 eta 1:09:36
epoch [44/50] batch [580/796] time 0.834 (0.833) data 0.000 (0.001) loss 0.1597 (0.8001) lr 2.7103e-04 eta 1:09:19
epoch [44/50] batch [600/796] time 0.847 (0.833) data 0.000 (0.001) loss 0.0760 (0.7905) lr 2.7103e-04 eta 1:09:03
epoch [44/50] batch [620/796] time 0.825 (0.833) data 0.000 (0.001) loss 0.2508 (0.7887) lr 2.7103e-04 eta 1:08:46
epoch [44/50] batch [640/796] time 0.809 (0.833) data 0.000 (0.001) loss 0.1751 (0.7894) lr 2.7103e-04 eta 1:08:29
epoch [44/50] batch [660/796] time 0.819 (0.833) data 0.000 (0.001) loss 0.2503 (0.7912) lr 2.7103e-04 eta 1:08:12
epoch [44/50] batch [680/796] time 0.842 (0.833) data 0.000 (0.001) loss 0.1745 (0.7942) lr 2.7103e-04 eta 1:07:55
epoch [44/50] batch [700/796] time 0.842 (0.833) data 0.000 (0.001) loss 0.4152 (0.7848) lr 2.7103e-04 eta 1:07:38
epoch [44/50] batch [720/796] time 0.834 (0.833) data 0.000 (0.001) loss 0.1512 (0.7826) lr 2.7103e-04 eta 1:07:22
epoch [44/50] batch [740/796] time 0.843 (0.833) data 0.000 (0.001) loss 1.3794 (0.7807) lr 2.7103e-04 eta 1:07:05
epoch [44/50] batch [760/796] time 0.820 (0.833) data 0.000 (0.001) loss 0.4755 (0.7814) lr 2.7103e-04 eta 1:06:48
epoch [44/50] batch [780/796] time 0.844 (0.833) data 0.000 (0.001) loss 0.0742 (0.7759) lr 2.7103e-04 eta 1:06:31
epoch [45/50] batch [20/796] time 0.810 (0.865) data 0.000 (0.032) loss 1.1695 (0.5470) lr 2.2949e-04 eta 1:08:32
epoch [45/50] batch [40/796] time 0.833 (0.851) data 0.000 (0.016) loss 0.0903 (0.6342) lr 2.2949e-04 eta 1:07:08
epoch [45/50] batch [60/796] time 0.819 (0.845) data 0.000 (0.011) loss 0.4284 (0.7907) lr 2.2949e-04 eta 1:06:23
epoch [45/50] batch [80/796] time 0.832 (0.841) data 0.000 (0.008) loss 0.9028 (0.7905) lr 2.2949e-04 eta 1:05:51
epoch [45/50] batch [100/796] time 0.833 (0.840) data 0.000 (0.007) loss 0.4170 (0.7990) lr 2.2949e-04 eta 1:05:28
epoch [45/50] batch [120/796] time 0.841 (0.839) data 0.000 (0.006) loss 0.3030 (0.7600) lr 2.2949e-04 eta 1:05:06
epoch [45/50] batch [140/796] time 0.841 (0.838) data 0.000 (0.005) loss 0.6679 (0.7581) lr 2.2949e-04 eta 1:04:43
epoch [45/50] batch [160/796] time 0.810 (0.837) data 0.000 (0.004) loss 1.0328 (0.7300) lr 2.2949e-04 eta 1:04:23
epoch [45/50] batch [180/796] time 0.818 (0.836) data 0.000 (0.004) loss 0.8749 (0.7556) lr 2.2949e-04 eta 1:04:04
epoch [45/50] batch [200/796] time 0.832 (0.836) data 0.000 (0.003) loss 0.5949 (0.7652) lr 2.2949e-04 eta 1:03:45
epoch [45/50] batch [220/796] time 0.843 (0.836) data 0.000 (0.003) loss 2.3090 (0.7763) lr 2.2949e-04 eta 1:03:27
epoch [45/50] batch [240/796] time 0.843 (0.836) data 0.000 (0.003) loss 0.6590 (0.7682) lr 2.2949e-04 eta 1:03:09
epoch [45/50] batch [260/796] time 0.831 (0.835) data 0.000 (0.003) loss 0.5422 (0.7804) lr 2.2949e-04 eta 1:02:52
epoch [45/50] batch [280/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.3562 (0.7796) lr 2.2949e-04 eta 1:02:33
epoch [45/50] batch [300/796] time 0.831 (0.834) data 0.000 (0.002) loss 1.0996 (0.7950) lr 2.2949e-04 eta 1:02:14
epoch [45/50] batch [320/796] time 0.843 (0.834) data 0.000 (0.002) loss 0.2377 (0.7841) lr 2.2949e-04 eta 1:01:58
epoch [45/50] batch [340/796] time 0.818 (0.834) data 0.000 (0.002) loss 0.5134 (0.7763) lr 2.2949e-04 eta 1:01:40
epoch [45/50] batch [360/796] time 0.819 (0.834) data 0.000 (0.002) loss 0.6531 (0.7958) lr 2.2949e-04 eta 1:01:23
epoch [45/50] batch [380/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.2967 (0.7947) lr 2.2949e-04 eta 1:01:06
epoch [45/50] batch [400/796] time 0.810 (0.834) data 0.000 (0.002) loss 0.0265 (0.7881) lr 2.2949e-04 eta 1:00:49
epoch [45/50] batch [420/796] time 0.820 (0.834) data 0.000 (0.002) loss 1.3416 (0.7905) lr 2.2949e-04 eta 1:00:32
epoch [45/50] batch [440/796] time 0.842 (0.834) data 0.000 (0.002) loss 1.3947 (0.7910) lr 2.2949e-04 eta 1:00:15
epoch [45/50] batch [460/796] time 0.819 (0.834) data 0.000 (0.002) loss 1.0356 (0.7855) lr 2.2949e-04 eta 0:59:58
epoch [45/50] batch [480/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.7099 (0.7817) lr 2.2949e-04 eta 0:59:42
epoch [45/50] batch [500/796] time 0.831 (0.834) data 0.000 (0.001) loss 0.5430 (0.7815) lr 2.2949e-04 eta 0:59:25
epoch [45/50] batch [520/796] time 0.843 (0.834) data 0.000 (0.001) loss 1.9918 (0.7818) lr 2.2949e-04 eta 0:59:08
epoch [45/50] batch [540/796] time 0.818 (0.833) data 0.000 (0.001) loss 0.1063 (0.7729) lr 2.2949e-04 eta 0:58:50
epoch [45/50] batch [560/796] time 0.832 (0.833) data 0.000 (0.001) loss 0.8694 (0.7809) lr 2.2949e-04 eta 0:58:33
epoch [45/50] batch [580/796] time 0.843 (0.833) data 0.000 (0.001) loss 0.1935 (0.7763) lr 2.2949e-04 eta 0:58:16
epoch [45/50] batch [600/796] time 0.819 (0.833) data 0.000 (0.001) loss 0.4876 (0.7703) lr 2.2949e-04 eta 0:58:00
epoch [45/50] batch [620/796] time 0.820 (0.833) data 0.000 (0.001) loss 2.1932 (0.7721) lr 2.2949e-04 eta 0:57:43
epoch [45/50] batch [640/796] time 0.809 (0.833) data 0.000 (0.001) loss 0.5325 (0.7704) lr 2.2949e-04 eta 0:57:27
epoch [45/50] batch [660/796] time 0.832 (0.833) data 0.000 (0.001) loss 2.0365 (0.7684) lr 2.2949e-04 eta 0:57:10
epoch [45/50] batch [680/796] time 0.833 (0.833) data 0.000 (0.001) loss 0.0950 (0.7650) lr 2.2949e-04 eta 0:56:53
epoch [45/50] batch [700/796] time 0.842 (0.833) data 0.000 (0.001) loss 0.4553 (0.7626) lr 2.2949e-04 eta 0:56:37
epoch [45/50] batch [720/796] time 0.841 (0.833) data 0.000 (0.001) loss 1.4871 (0.7583) lr 2.2949e-04 eta 0:56:20
epoch [45/50] batch [740/796] time 0.820 (0.833) data 0.000 (0.001) loss 0.4516 (0.7597) lr 2.2949e-04 eta 0:56:03
epoch [45/50] batch [760/796] time 0.844 (0.833) data 0.000 (0.001) loss 0.2658 (0.7608) lr 2.2949e-04 eta 0:55:47
epoch [45/50] batch [780/796] time 0.841 (0.833) data 0.000 (0.001) loss 0.0910 (0.7526) lr 2.2949e-04 eta 0:55:30
epoch [46/50] batch [20/796] time 0.842 (0.868) data 0.000 (0.032) loss 0.8792 (0.5783) lr 1.9098e-04 eta 0:57:16
epoch [46/50] batch [40/796] time 0.842 (0.851) data 0.000 (0.016) loss 0.4736 (0.5950) lr 1.9098e-04 eta 0:55:51
epoch [46/50] batch [60/796] time 0.841 (0.845) data 0.000 (0.011) loss 1.6500 (0.7389) lr 1.9098e-04 eta 0:55:13
epoch [46/50] batch [80/796] time 0.845 (0.842) data 0.000 (0.008) loss 0.2609 (0.7131) lr 1.9098e-04 eta 0:54:45
epoch [46/50] batch [100/796] time 0.844 (0.840) data 0.000 (0.006) loss 1.0593 (0.7124) lr 1.9098e-04 eta 0:54:19
epoch [46/50] batch [120/796] time 0.842 (0.839) data 0.000 (0.005) loss 0.3253 (0.6913) lr 1.9098e-04 eta 0:53:57
epoch [46/50] batch [140/796] time 0.831 (0.838) data 0.000 (0.005) loss 0.3799 (0.6708) lr 1.9098e-04 eta 0:53:38
epoch [46/50] batch [160/796] time 0.820 (0.837) data 0.000 (0.004) loss 1.7050 (0.7038) lr 1.9098e-04 eta 0:53:17
epoch [46/50] batch [180/796] time 0.819 (0.837) data 0.000 (0.004) loss 0.2765 (0.7039) lr 1.9098e-04 eta 0:52:59
epoch [46/50] batch [200/796] time 0.810 (0.836) data 0.000 (0.003) loss 0.4678 (0.7022) lr 1.9098e-04 eta 0:52:41
epoch [46/50] batch [220/796] time 0.841 (0.836) data 0.000 (0.003) loss 3.5832 (0.7346) lr 1.9098e-04 eta 0:52:24
epoch [46/50] batch [240/796] time 0.842 (0.836) data 0.000 (0.003) loss 0.2382 (0.7125) lr 1.9098e-04 eta 0:52:06
epoch [46/50] batch [260/796] time 0.820 (0.836) data 0.000 (0.003) loss 1.4016 (0.7105) lr 1.9098e-04 eta 0:51:48
epoch [46/50] batch [280/796] time 0.834 (0.836) data 0.000 (0.002) loss 0.6122 (0.7150) lr 1.9098e-04 eta 0:51:31
epoch [46/50] batch [300/796] time 0.818 (0.836) data 0.000 (0.002) loss 0.2581 (0.7137) lr 1.9098e-04 eta 0:51:14
epoch [46/50] batch [320/796] time 0.832 (0.836) data 0.000 (0.002) loss 1.8627 (0.7197) lr 1.9098e-04 eta 0:50:58
epoch [46/50] batch [340/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.7463 (0.7354) lr 1.9098e-04 eta 0:50:40
epoch [46/50] batch [360/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.1376 (0.7308) lr 1.9098e-04 eta 0:50:23
epoch [46/50] batch [380/796] time 0.840 (0.835) data 0.000 (0.002) loss 0.5604 (0.7275) lr 1.9098e-04 eta 0:50:05
epoch [46/50] batch [400/796] time 0.832 (0.835) data 0.000 (0.002) loss 1.3073 (0.7247) lr 1.9098e-04 eta 0:49:48
epoch [46/50] batch [420/796] time 0.819 (0.835) data 0.000 (0.002) loss 1.4713 (0.7421) lr 1.9098e-04 eta 0:49:30
epoch [46/50] batch [440/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.0529 (0.7543) lr 1.9098e-04 eta 0:49:14
epoch [46/50] batch [460/796] time 0.842 (0.834) data 0.000 (0.002) loss 3.9116 (0.7637) lr 1.9098e-04 eta 0:48:57
epoch [46/50] batch [480/796] time 0.825 (0.834) data 0.000 (0.001) loss 0.4181 (0.7752) lr 1.9098e-04 eta 0:48:40
epoch [46/50] batch [500/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.8129 (0.7680) lr 1.9098e-04 eta 0:48:23
epoch [46/50] batch [520/796] time 0.841 (0.834) data 0.000 (0.001) loss 1.8122 (0.7702) lr 1.9098e-04 eta 0:48:06
epoch [46/50] batch [540/796] time 0.841 (0.834) data 0.000 (0.001) loss 1.3234 (0.7680) lr 1.9098e-04 eta 0:47:49
epoch [46/50] batch [560/796] time 0.818 (0.834) data 0.000 (0.001) loss 0.4177 (0.7686) lr 1.9098e-04 eta 0:47:32
epoch [46/50] batch [580/796] time 0.844 (0.834) data 0.000 (0.001) loss 1.7359 (0.7609) lr 1.9098e-04 eta 0:47:15
epoch [46/50] batch [600/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.3697 (0.7659) lr 1.9098e-04 eta 0:46:58
epoch [46/50] batch [620/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.0830 (0.7648) lr 1.9098e-04 eta 0:46:41
epoch [46/50] batch [640/796] time 0.831 (0.834) data 0.000 (0.001) loss 1.3532 (0.7593) lr 1.9098e-04 eta 0:46:24
epoch [46/50] batch [660/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.2368 (0.7625) lr 1.9098e-04 eta 0:46:07
epoch [46/50] batch [680/796] time 0.848 (0.833) data 0.000 (0.001) loss 0.4505 (0.7641) lr 1.9098e-04 eta 0:45:50
epoch [46/50] batch [700/796] time 0.845 (0.833) data 0.000 (0.001) loss 0.0503 (0.7631) lr 1.9098e-04 eta 0:45:33
epoch [46/50] batch [720/796] time 0.832 (0.833) data 0.000 (0.001) loss 0.8722 (0.7574) lr 1.9098e-04 eta 0:45:17
epoch [46/50] batch [740/796] time 0.811 (0.833) data 0.000 (0.001) loss 1.5355 (0.7641) lr 1.9098e-04 eta 0:45:00
epoch [46/50] batch [760/796] time 0.834 (0.833) data 0.000 (0.001) loss 0.7874 (0.7687) lr 1.9098e-04 eta 0:44:43
epoch [46/50] batch [780/796] time 0.821 (0.833) data 0.003 (0.001) loss 0.0572 (0.7629) lr 1.9098e-04 eta 0:44:27
epoch [47/50] batch [20/796] time 0.843 (0.863) data 0.000 (0.031) loss 0.7326 (0.8497) lr 1.5567e-04 eta 0:45:29
epoch [47/50] batch [40/796] time 0.819 (0.846) data 0.000 (0.016) loss 0.5147 (0.7463) lr 1.5567e-04 eta 0:44:20
epoch [47/50] batch [60/796] time 0.842 (0.843) data 0.000 (0.010) loss 0.3075 (0.7323) lr 1.5567e-04 eta 0:43:53
epoch [47/50] batch [80/796] time 0.819 (0.840) data 0.000 (0.008) loss 0.4662 (0.7980) lr 1.5567e-04 eta 0:43:27
epoch [47/50] batch [100/796] time 0.832 (0.838) data 0.000 (0.006) loss 1.3529 (0.7881) lr 1.5567e-04 eta 0:43:05
epoch [47/50] batch [120/796] time 0.835 (0.837) data 0.000 (0.005) loss 1.2774 (0.7833) lr 1.5567e-04 eta 0:42:44
epoch [47/50] batch [140/796] time 0.818 (0.836) data 0.000 (0.005) loss 0.1078 (0.7926) lr 1.5567e-04 eta 0:42:24
epoch [47/50] batch [160/796] time 0.842 (0.835) data 0.000 (0.004) loss 0.4192 (0.7435) lr 1.5567e-04 eta 0:42:06
epoch [47/50] batch [180/796] time 0.842 (0.835) data 0.000 (0.004) loss 3.1452 (0.7589) lr 1.5567e-04 eta 0:41:48
epoch [47/50] batch [200/796] time 0.810 (0.835) data 0.000 (0.003) loss 0.3967 (0.7585) lr 1.5567e-04 eta 0:41:31
epoch [47/50] batch [220/796] time 0.819 (0.834) data 0.000 (0.003) loss 0.1615 (0.7673) lr 1.5567e-04 eta 0:41:13
epoch [47/50] batch [240/796] time 0.836 (0.835) data 0.000 (0.003) loss 0.4918 (0.7444) lr 1.5567e-04 eta 0:40:57
epoch [47/50] batch [260/796] time 0.832 (0.835) data 0.000 (0.003) loss 0.7346 (0.7230) lr 1.5567e-04 eta 0:40:40
epoch [47/50] batch [280/796] time 0.838 (0.834) data 0.000 (0.002) loss 1.3645 (0.7484) lr 1.5567e-04 eta 0:40:23
epoch [47/50] batch [300/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.4937 (0.7547) lr 1.5567e-04 eta 0:40:06
epoch [47/50] batch [320/796] time 0.809 (0.834) data 0.000 (0.002) loss 0.3115 (0.7399) lr 1.5567e-04 eta 0:39:48
epoch [47/50] batch [340/796] time 0.833 (0.834) data 0.000 (0.002) loss 0.3414 (0.7731) lr 1.5567e-04 eta 0:39:31
epoch [47/50] batch [360/796] time 0.845 (0.834) data 0.000 (0.002) loss 0.5548 (0.7731) lr 1.5567e-04 eta 0:39:14
epoch [47/50] batch [380/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.7609 (0.7733) lr 1.5567e-04 eta 0:38:57
epoch [47/50] batch [400/796] time 0.833 (0.834) data 0.000 (0.002) loss 0.5440 (0.7777) lr 1.5567e-04 eta 0:38:40
epoch [47/50] batch [420/796] time 0.820 (0.834) data 0.000 (0.002) loss 0.8530 (0.7785) lr 1.5567e-04 eta 0:38:23
epoch [47/50] batch [440/796] time 0.842 (0.833) data 0.000 (0.002) loss 0.2417 (0.7826) lr 1.5567e-04 eta 0:38:06
epoch [47/50] batch [460/796] time 0.852 (0.833) data 0.000 (0.002) loss 1.0621 (0.7826) lr 1.5567e-04 eta 0:37:50
epoch [47/50] batch [480/796] time 0.834 (0.833) data 0.000 (0.001) loss 0.6070 (0.7833) lr 1.5567e-04 eta 0:37:33
epoch [47/50] batch [500/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.0788 (0.7859) lr 1.5567e-04 eta 0:37:17
epoch [47/50] batch [520/796] time 0.832 (0.833) data 0.000 (0.001) loss 0.9175 (0.7938) lr 1.5567e-04 eta 0:36:59
epoch [47/50] batch [540/796] time 0.842 (0.833) data 0.000 (0.001) loss 1.5939 (0.7867) lr 1.5567e-04 eta 0:36:43
epoch [47/50] batch [560/796] time 0.841 (0.833) data 0.000 (0.001) loss 0.7803 (0.7843) lr 1.5567e-04 eta 0:36:26
epoch [47/50] batch [580/796] time 0.832 (0.833) data 0.000 (0.001) loss 0.2483 (0.7851) lr 1.5567e-04 eta 0:36:09
epoch [47/50] batch [600/796] time 0.819 (0.833) data 0.000 (0.001) loss 0.3294 (0.7810) lr 1.5567e-04 eta 0:35:52
epoch [47/50] batch [620/796] time 0.833 (0.833) data 0.000 (0.001) loss 1.8189 (0.7755) lr 1.5567e-04 eta 0:35:36
epoch [47/50] batch [640/796] time 0.843 (0.833) data 0.000 (0.001) loss 0.7630 (0.7688) lr 1.5567e-04 eta 0:35:19
epoch [47/50] batch [660/796] time 0.832 (0.833) data 0.000 (0.001) loss 3.7743 (0.7789) lr 1.5567e-04 eta 0:35:02
epoch [47/50] batch [680/796] time 0.818 (0.833) data 0.000 (0.001) loss 0.3886 (0.7851) lr 1.5567e-04 eta 0:34:46
epoch [47/50] batch [700/796] time 0.841 (0.833) data 0.000 (0.001) loss 0.9447 (0.7858) lr 1.5567e-04 eta 0:34:29
epoch [47/50] batch [720/796] time 0.842 (0.833) data 0.000 (0.001) loss 1.1017 (0.7826) lr 1.5567e-04 eta 0:34:12
epoch [47/50] batch [740/796] time 0.841 (0.833) data 0.000 (0.001) loss 0.0205 (0.7784) lr 1.5567e-04 eta 0:33:56
epoch [47/50] batch [760/796] time 0.819 (0.833) data 0.000 (0.001) loss 0.5299 (0.7720) lr 1.5567e-04 eta 0:33:39
epoch [47/50] batch [780/796] time 0.841 (0.833) data 0.000 (0.001) loss 1.3156 (0.7817) lr 1.5567e-04 eta 0:33:22
epoch [48/50] batch [20/796] time 0.838 (0.867) data 0.000 (0.034) loss 0.6961 (0.7705) lr 1.2369e-04 eta 0:34:12
epoch [48/50] batch [40/796] time 0.811 (0.848) data 0.000 (0.017) loss 1.9280 (0.7837) lr 1.2369e-04 eta 0:33:12
epoch [48/50] batch [60/796] time 0.832 (0.842) data 0.000 (0.012) loss 1.3327 (0.7345) lr 1.2369e-04 eta 0:32:39
epoch [48/50] batch [80/796] time 0.843 (0.840) data 0.000 (0.009) loss 0.1966 (0.6919) lr 1.2369e-04 eta 0:32:18
epoch [48/50] batch [100/796] time 0.842 (0.838) data 0.000 (0.007) loss 1.3083 (0.7103) lr 1.2369e-04 eta 0:31:58
epoch [48/50] batch [120/796] time 0.842 (0.837) data 0.000 (0.006) loss 0.7868 (0.7173) lr 1.2369e-04 eta 0:31:38
epoch [48/50] batch [140/796] time 0.842 (0.836) data 0.000 (0.005) loss 1.3102 (0.7390) lr 1.2369e-04 eta 0:31:20
epoch [48/50] batch [160/796] time 0.832 (0.836) data 0.000 (0.004) loss 1.0175 (0.7291) lr 1.2369e-04 eta 0:31:03
epoch [48/50] batch [180/796] time 0.832 (0.836) data 0.000 (0.004) loss 0.2095 (0.7614) lr 1.2369e-04 eta 0:30:46
epoch [48/50] batch [200/796] time 0.842 (0.835) data 0.000 (0.004) loss 2.0152 (0.7792) lr 1.2369e-04 eta 0:30:28
epoch [48/50] batch [220/796] time 0.842 (0.835) data 0.000 (0.003) loss 0.0875 (0.7863) lr 1.2369e-04 eta 0:30:10
epoch [48/50] batch [240/796] time 0.832 (0.835) data 0.000 (0.003) loss 0.7921 (0.7809) lr 1.2369e-04 eta 0:29:53
epoch [48/50] batch [260/796] time 0.834 (0.835) data 0.000 (0.003) loss 0.3008 (0.7671) lr 1.2369e-04 eta 0:29:36
epoch [48/50] batch [280/796] time 0.844 (0.835) data 0.000 (0.003) loss 3.1958 (0.7874) lr 1.2369e-04 eta 0:29:19
epoch [48/50] batch [300/796] time 0.809 (0.835) data 0.000 (0.002) loss 0.2425 (0.7746) lr 1.2369e-04 eta 0:29:02
epoch [48/50] batch [320/796] time 0.819 (0.835) data 0.000 (0.002) loss 0.1946 (0.7589) lr 1.2369e-04 eta 0:28:45
epoch [48/50] batch [340/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.0594 (0.7493) lr 1.2369e-04 eta 0:28:28
epoch [48/50] batch [360/796] time 0.841 (0.834) data 0.000 (0.002) loss 0.0979 (0.7417) lr 1.2369e-04 eta 0:28:12
epoch [48/50] batch [380/796] time 0.841 (0.834) data 0.000 (0.002) loss 0.5145 (0.7456) lr 1.2369e-04 eta 0:27:55
epoch [48/50] batch [400/796] time 0.833 (0.834) data 0.000 (0.002) loss 0.7081 (0.7466) lr 1.2369e-04 eta 0:27:38
epoch [48/50] batch [420/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.6171 (0.7460) lr 1.2369e-04 eta 0:27:21
epoch [48/50] batch [440/796] time 0.831 (0.834) data 0.000 (0.002) loss 0.6091 (0.7481) lr 1.2369e-04 eta 0:27:04
epoch [48/50] batch [460/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.0355 (0.7518) lr 1.2369e-04 eta 0:26:47
epoch [48/50] batch [480/796] time 0.831 (0.834) data 0.000 (0.002) loss 0.9852 (0.7524) lr 1.2369e-04 eta 0:26:30
epoch [48/50] batch [500/796] time 0.819 (0.834) data 0.000 (0.002) loss 0.0379 (0.7414) lr 1.2369e-04 eta 0:26:14
epoch [48/50] batch [520/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.2792 (0.7536) lr 1.2369e-04 eta 0:25:57
epoch [48/50] batch [540/796] time 0.831 (0.834) data 0.000 (0.001) loss 0.1885 (0.7514) lr 1.2369e-04 eta 0:25:40
epoch [48/50] batch [560/796] time 0.831 (0.834) data 0.000 (0.001) loss 2.0049 (0.7488) lr 1.2369e-04 eta 0:25:24
epoch [48/50] batch [580/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.1655 (0.7489) lr 1.2369e-04 eta 0:25:07
epoch [48/50] batch [600/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.7786 (0.7476) lr 1.2369e-04 eta 0:24:50
epoch [48/50] batch [620/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.2335 (0.7433) lr 1.2369e-04 eta 0:24:33
epoch [48/50] batch [640/796] time 0.843 (0.833) data 0.000 (0.001) loss 0.0786 (0.7390) lr 1.2369e-04 eta 0:24:16
epoch [48/50] batch [660/796] time 0.819 (0.833) data 0.000 (0.001) loss 0.1173 (0.7355) lr 1.2369e-04 eta 0:24:00
epoch [48/50] batch [680/796] time 0.843 (0.833) data 0.000 (0.001) loss 0.2067 (0.7311) lr 1.2369e-04 eta 0:23:43
epoch [48/50] batch [700/796] time 0.818 (0.833) data 0.000 (0.001) loss 0.4805 (0.7285) lr 1.2369e-04 eta 0:23:26
epoch [48/50] batch [720/796] time 0.831 (0.833) data 0.000 (0.001) loss 0.8658 (0.7268) lr 1.2369e-04 eta 0:23:09
epoch [48/50] batch [740/796] time 0.818 (0.833) data 0.000 (0.001) loss 1.0626 (0.7281) lr 1.2369e-04 eta 0:22:53
epoch [48/50] batch [760/796] time 0.809 (0.833) data 0.000 (0.001) loss 0.4761 (0.7293) lr 1.2369e-04 eta 0:22:36
epoch [48/50] batch [780/796] time 0.841 (0.833) data 0.000 (0.001) loss 0.6382 (0.7267) lr 1.2369e-04 eta 0:22:19
epoch [49/50] batch [20/796] time 0.832 (0.860) data 0.000 (0.028) loss 1.1631 (0.6365) lr 9.5173e-05 eta 0:22:32
epoch [49/50] batch [40/796] time 0.818 (0.845) data 0.000 (0.014) loss 0.5286 (0.7051) lr 9.5173e-05 eta 0:21:50
epoch [49/50] batch [60/796] time 0.831 (0.840) data 0.000 (0.010) loss 0.2776 (0.6188) lr 9.5173e-05 eta 0:21:27
epoch [49/50] batch [80/796] time 0.818 (0.838) data 0.000 (0.007) loss 0.1129 (0.6795) lr 9.5173e-05 eta 0:21:06
epoch [49/50] batch [100/796] time 0.842 (0.837) data 0.000 (0.006) loss 0.2007 (0.6739) lr 9.5173e-05 eta 0:20:48
epoch [49/50] batch [120/796] time 0.820 (0.836) data 0.000 (0.005) loss 0.9531 (0.6994) lr 9.5173e-05 eta 0:20:30
epoch [49/50] batch [140/796] time 0.843 (0.835) data 0.000 (0.004) loss 0.2236 (0.6905) lr 9.5173e-05 eta 0:20:13
epoch [49/50] batch [160/796] time 0.841 (0.835) data 0.000 (0.004) loss 0.7412 (0.7131) lr 9.5173e-05 eta 0:19:55
epoch [49/50] batch [180/796] time 0.833 (0.835) data 0.000 (0.003) loss 0.9354 (0.7331) lr 9.5173e-05 eta 0:19:38
epoch [49/50] batch [200/796] time 0.825 (0.834) data 0.000 (0.003) loss 0.2515 (0.7365) lr 9.5173e-05 eta 0:19:21
epoch [49/50] batch [220/796] time 0.842 (0.834) data 0.000 (0.003) loss 0.8796 (0.7308) lr 9.5173e-05 eta 0:19:04
epoch [49/50] batch [240/796] time 0.842 (0.834) data 0.000 (0.003) loss 0.4022 (0.7452) lr 9.5173e-05 eta 0:18:47
epoch [49/50] batch [260/796] time 0.833 (0.834) data 0.000 (0.002) loss 1.0084 (0.7350) lr 9.5173e-05 eta 0:18:30
epoch [49/50] batch [280/796] time 0.840 (0.833) data 0.000 (0.002) loss 1.2016 (0.7371) lr 9.5173e-05 eta 0:18:13
epoch [49/50] batch [300/796] time 0.832 (0.833) data 0.000 (0.002) loss 0.1850 (0.7478) lr 9.5173e-05 eta 0:17:56
epoch [49/50] batch [320/796] time 0.841 (0.833) data 0.000 (0.002) loss 0.9362 (0.7389) lr 9.5173e-05 eta 0:17:40
epoch [49/50] batch [340/796] time 0.849 (0.833) data 0.000 (0.002) loss 2.0935 (0.7360) lr 9.5173e-05 eta 0:17:23
epoch [49/50] batch [360/796] time 0.822 (0.833) data 0.000 (0.002) loss 1.0457 (0.7328) lr 9.5173e-05 eta 0:17:06
epoch [49/50] batch [380/796] time 0.842 (0.833) data 0.000 (0.002) loss 1.8879 (0.7495) lr 9.5173e-05 eta 0:16:49
epoch [49/50] batch [400/796] time 0.843 (0.833) data 0.000 (0.002) loss 0.0456 (0.7474) lr 9.5173e-05 eta 0:16:32
epoch [49/50] batch [420/796] time 0.819 (0.833) data 0.000 (0.002) loss 1.2801 (0.7518) lr 9.5173e-05 eta 0:16:16
epoch [49/50] batch [440/796] time 0.842 (0.833) data 0.000 (0.001) loss 0.7866 (0.7573) lr 9.5173e-05 eta 0:15:59
epoch [49/50] batch [460/796] time 0.833 (0.833) data 0.000 (0.001) loss 0.4821 (0.7662) lr 9.5173e-05 eta 0:15:43
epoch [49/50] batch [480/796] time 0.819 (0.833) data 0.000 (0.001) loss 0.0879 (0.7586) lr 9.5173e-05 eta 0:15:26
epoch [49/50] batch [500/796] time 0.842 (0.833) data 0.000 (0.001) loss 0.0656 (0.7691) lr 9.5173e-05 eta 0:15:09
epoch [49/50] batch [520/796] time 0.841 (0.833) data 0.000 (0.001) loss 2.1677 (0.7746) lr 9.5173e-05 eta 0:14:53
epoch [49/50] batch [540/796] time 0.842 (0.833) data 0.000 (0.001) loss 0.4355 (0.7728) lr 9.5173e-05 eta 0:14:36
epoch [49/50] batch [560/796] time 0.819 (0.833) data 0.000 (0.001) loss 0.3624 (0.7732) lr 9.5173e-05 eta 0:14:19
epoch [49/50] batch [580/796] time 0.833 (0.833) data 0.000 (0.001) loss 0.2557 (0.7713) lr 9.5173e-05 eta 0:14:03
epoch [49/50] batch [600/796] time 0.841 (0.833) data 0.000 (0.001) loss 0.3548 (0.7653) lr 9.5173e-05 eta 0:13:46
epoch [49/50] batch [620/796] time 0.845 (0.833) data 0.000 (0.001) loss 0.3162 (0.7614) lr 9.5173e-05 eta 0:13:29
epoch [49/50] batch [640/796] time 0.842 (0.833) data 0.000 (0.001) loss 0.3333 (0.7606) lr 9.5173e-05 eta 0:13:13
epoch [49/50] batch [660/796] time 0.832 (0.833) data 0.000 (0.001) loss 0.3560 (0.7618) lr 9.5173e-05 eta 0:12:56
epoch [49/50] batch [680/796] time 0.831 (0.833) data 0.000 (0.001) loss 1.5400 (0.7615) lr 9.5173e-05 eta 0:12:39
epoch [49/50] batch [700/796] time 0.841 (0.833) data 0.000 (0.001) loss 0.4187 (0.7593) lr 9.5173e-05 eta 0:12:22
epoch [49/50] batch [720/796] time 0.843 (0.833) data 0.000 (0.001) loss 1.0864 (0.7541) lr 9.5173e-05 eta 0:12:06
epoch [49/50] batch [740/796] time 0.833 (0.833) data 0.000 (0.001) loss 2.5352 (0.7584) lr 9.5173e-05 eta 0:11:49
epoch [49/50] batch [760/796] time 0.843 (0.833) data 0.000 (0.001) loss 0.8717 (0.7544) lr 9.5173e-05 eta 0:11:32
epoch [49/50] batch [780/796] time 0.842 (0.833) data 0.000 (0.001) loss 1.8016 (0.7514) lr 9.5173e-05 eta 0:11:16
epoch [50/50] batch [20/796] time 0.831 (0.865) data 0.000 (0.029) loss 0.1436 (0.7383) lr 7.0224e-05 eta 0:11:11
epoch [50/50] batch [40/796] time 0.842 (0.848) data 0.000 (0.015) loss 0.4005 (0.7177) lr 7.0224e-05 eta 0:10:40
epoch [50/50] batch [60/796] time 0.841 (0.843) data 0.000 (0.010) loss 1.7953 (0.7369) lr 7.0224e-05 eta 0:10:20
epoch [50/50] batch [80/796] time 0.818 (0.840) data 0.000 (0.007) loss 0.0154 (0.7218) lr 7.0224e-05 eta 0:10:01
epoch [50/50] batch [100/796] time 0.833 (0.838) data 0.000 (0.006) loss 0.5180 (0.7147) lr 7.0224e-05 eta 0:09:43
epoch [50/50] batch [120/796] time 0.818 (0.837) data 0.000 (0.005) loss 0.2573 (0.6888) lr 7.0224e-05 eta 0:09:26
epoch [50/50] batch [140/796] time 0.841 (0.837) data 0.000 (0.004) loss 0.5471 (0.7288) lr 7.0224e-05 eta 0:09:08
epoch [50/50] batch [160/796] time 0.842 (0.836) data 0.000 (0.004) loss 1.7856 (0.7447) lr 7.0224e-05 eta 0:08:51
epoch [50/50] batch [180/796] time 0.818 (0.836) data 0.000 (0.003) loss 0.5841 (0.7637) lr 7.0224e-05 eta 0:08:34
epoch [50/50] batch [200/796] time 0.851 (0.836) data 0.000 (0.003) loss 0.7796 (0.7505) lr 7.0224e-05 eta 0:08:18
epoch [50/50] batch [220/796] time 0.842 (0.836) data 0.000 (0.003) loss 0.2651 (0.7254) lr 7.0224e-05 eta 0:08:01
epoch [50/50] batch [240/796] time 0.840 (0.835) data 0.000 (0.003) loss 0.3790 (0.7230) lr 7.0224e-05 eta 0:07:44
epoch [50/50] batch [260/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.9780 (0.7102) lr 7.0224e-05 eta 0:07:27
epoch [50/50] batch [280/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.6984 (0.7202) lr 7.0224e-05 eta 0:07:10
epoch [50/50] batch [300/796] time 0.833 (0.835) data 0.000 (0.002) loss 0.0943 (0.7227) lr 7.0224e-05 eta 0:06:54
epoch [50/50] batch [320/796] time 0.818 (0.835) data 0.000 (0.002) loss 1.5278 (0.7235) lr 7.0224e-05 eta 0:06:37
epoch [50/50] batch [340/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.7610 (0.7308) lr 7.0224e-05 eta 0:06:20
epoch [50/50] batch [360/796] time 0.835 (0.834) data 0.000 (0.002) loss 0.9060 (0.7384) lr 7.0224e-05 eta 0:06:03
epoch [50/50] batch [380/796] time 0.844 (0.834) data 0.000 (0.002) loss 0.3819 (0.7460) lr 7.0224e-05 eta 0:05:46
epoch [50/50] batch [400/796] time 0.818 (0.834) data 0.000 (0.002) loss 0.4264 (0.7592) lr 7.0224e-05 eta 0:05:30
epoch [50/50] batch [420/796] time 0.841 (0.834) data 0.000 (0.002) loss 2.4102 (0.7717) lr 7.0224e-05 eta 0:05:13
epoch [50/50] batch [440/796] time 0.843 (0.834) data 0.000 (0.002) loss 1.2772 (0.7666) lr 7.0224e-05 eta 0:04:56
epoch [50/50] batch [460/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.3114 (0.7679) lr 7.0224e-05 eta 0:04:40
epoch [50/50] batch [480/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.1262 (0.7586) lr 7.0224e-05 eta 0:04:23
epoch [50/50] batch [500/796] time 0.818 (0.834) data 0.000 (0.001) loss 0.3787 (0.7569) lr 7.0224e-05 eta 0:04:06
epoch [50/50] batch [520/796] time 0.809 (0.834) data 0.000 (0.001) loss 1.3680 (0.7598) lr 7.0224e-05 eta 0:03:50
epoch [50/50] batch [540/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.1441 (0.7541) lr 7.0224e-05 eta 0:03:33
epoch [50/50] batch [560/796] time 0.840 (0.834) data 0.000 (0.001) loss 2.5371 (0.7650) lr 7.0224e-05 eta 0:03:16
epoch [50/50] batch [580/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.0435 (0.7696) lr 7.0224e-05 eta 0:03:00
epoch [50/50] batch [600/796] time 0.843 (0.834) data 0.000 (0.001) loss 1.9347 (0.7735) lr 7.0224e-05 eta 0:02:43
epoch [50/50] batch [620/796] time 0.824 (0.834) data 0.000 (0.001) loss 0.0420 (0.7737) lr 7.0224e-05 eta 0:02:26
epoch [50/50] batch [640/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.4854 (0.7787) lr 7.0224e-05 eta 0:02:10
epoch [50/50] batch [660/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.6725 (0.7782) lr 7.0224e-05 eta 0:01:53
epoch [50/50] batch [680/796] time 0.818 (0.834) data 0.000 (0.001) loss 1.4912 (0.7725) lr 7.0224e-05 eta 0:01:36
epoch [50/50] batch [700/796] time 0.818 (0.834) data 0.000 (0.001) loss 2.4779 (0.7692) lr 7.0224e-05 eta 0:01:20
epoch [50/50] batch [720/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.8681 (0.7645) lr 7.0224e-05 eta 0:01:03
epoch [50/50] batch [740/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.1350 (0.7698) lr 7.0224e-05 eta 0:00:46
epoch [50/50] batch [760/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.1111 (0.7682) lr 7.0224e-05 eta 0:00:30
epoch [50/50] batch [780/796] time 0.818 (0.834) data 0.000 (0.001) loss 0.4227 (0.7688) lr 7.0224e-05 eta 0:00:13
Checkpoint saved to output/base2new/train_base/sun397/vit_b16_ep50_c4_BZ4_ProDA/seed2/prompt_learner/model.pth.tar-50
Finish training
Deploy the last-epoch model
Evaluate on the *test* set
=> result
* total: 9,950
* correct: 8,402
* accuracy: 84.44%
* error: 15.56%
* macro_f1: 84.30%
Elapsed: 9:17:47
