***************
** Arguments **
***************
backbone: 
config_file: configs/trainers/ProDA/vit_b16_ep50_c4_BZ4_ProDA.yaml
dataset_config_file: configs/datasets/sun397.yaml
eval_only: False
head: 
load_epoch: None
model_dir: 
no_train: False
opts: ['DATASET.NUM_SHOTS', '16', 'DATASET.SUBSAMPLE_CLASSES', 'base']
output_dir: output/base2new/train_base/sun397/vit_b16_ep50_c4_BZ4_ProDA/seed3
resume: 
root: /mnt/hdd/DATA
seed: 3
source_domains: None
target_domains: None
trainer: ProDA
transforms: None
************
** Config **
************
DATALOADER:
  K_TRANSFORMS: 1
  NUM_WORKERS: 8
  RETURN_IMG0: False
  TEST:
    BATCH_SIZE: 100
    SAMPLER: SequentialSampler
  TRAIN_U:
    BATCH_SIZE: 32
    N_DOMAIN: 0
    N_INS: 16
    SAME_AS_X: True
    SAMPLER: RandomSampler
  TRAIN_X:
    BATCH_SIZE: 4
    N_DOMAIN: 0
    N_INS: 16
    SAMPLER: RandomSampler
DATASET:
  ALL_AS_UNLABELED: False
  CIFAR_C_LEVEL: 1
  CIFAR_C_TYPE: 
  NAME: SUN397
  NUM_LABELED: -1
  NUM_SHOTS: 16
  ROOT: /mnt/hdd/DATA
  SOURCE_DOMAINS: ()
  STL10_FOLD: -1
  SUBSAMPLE_CLASSES: base
  TARGET_DOMAINS: ()
  VAL_PERCENT: 0.1
INPUT:
  COLORJITTER_B: 0.4
  COLORJITTER_C: 0.4
  COLORJITTER_H: 0.1
  COLORJITTER_S: 0.4
  CROP_PADDING: 4
  CUTOUT_LEN: 16
  CUTOUT_N: 1
  GB_K: 21
  GB_P: 0.5
  GN_MEAN: 0.0
  GN_STD: 0.15
  INTERPOLATION: bicubic
  NO_TRANSFORM: False
  PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
  PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
  RANDAUGMENT_M: 10
  RANDAUGMENT_N: 2
  RGS_P: 0.2
  RRCROP_SCALE: (0.08, 1.0)
  SIZE: (224, 224)
  TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize')
MODEL:
  BACKBONE:
    NAME: ViT-B/16
    PRETRAINED: True
  HEAD:
    ACTIVATION: relu
    BN: True
    DROPOUT: 0.0
    HIDDEN_LAYERS: ()
    NAME: 
  INIT_WEIGHTS: 
OPTIM:
  ADAM_BETA1: 0.9
  ADAM_BETA2: 0.999
  BASE_LR_MULT: 0.1
  GAMMA: 0.1
  LR: 0.002
  LR_SCHEDULER: cosine
  MAX_EPOCH: 50
  MOMENTUM: 0.9
  NAME: sgd
  NEW_LAYERS: ()
  RMSPROP_ALPHA: 0.99
  SGD_DAMPNING: 0
  SGD_NESTEROV: False
  STAGED_LR: False
  STEPSIZE: (-1,)
  WARMUP_CONS_LR: 1e-05
  WARMUP_EPOCH: 5
  WARMUP_MIN_LR: 1e-05
  WARMUP_RECOUNT: True
  WARMUP_TYPE: constant
  WEIGHT_DECAY: 0.0005
OUTPUT_DIR: output/base2new/train_base/sun397/vit_b16_ep50_c4_BZ4_ProDA/seed3
RESUME: 
SEED: 3
TEST:
  COMPUTE_CMAT: False
  EVALUATOR: Classification
  FINAL_MODEL: last_step
  NO_TEST: False
  PER_CLASS_RESULT: False
  SPLIT: test
TRAIN:
  CHECKPOINT_FREQ: 0
  COUNT_ITER: train_x
  PRINT_FREQ: 20
TRAINER:
  CDAC:
    CLASS_LR_MULTI: 10
    P_THRESH: 0.95
    RAMPUP_COEF: 30
    RAMPUP_ITRS: 1000
    STRONG_TRANSFORMS: ()
    TOPK_MATCH: 5
  COCOOP:
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  COOP:
    CLASS_TOKEN_POSITION: end
    CSC: False
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  CROSSGRAD:
    ALPHA_D: 0.5
    ALPHA_F: 0.5
    EPS_D: 1.0
    EPS_F: 1.0
  DAEL:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DAELDG:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DDAIG:
    ALPHA: 0.5
    CLAMP: False
    CLAMP_MAX: 1.0
    CLAMP_MIN: -1.0
    G_ARCH: 
    LMDA: 0.3
    WARMUP: 0
  DOMAINMIX:
    ALPHA: 1.0
    BETA: 1.0
    TYPE: crossdomain
  ENTMIN:
    LMDA: 0.001
  FIXMATCH:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 1.0
  IVLP:
    CTX_INIT: a photo of a
    N_CTX_TEXT: 2
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_TEXT: 9
    PROMPT_DEPTH_VISION: 9
  M3SDA:
    LMDA: 0.5
    N_STEP_F: 4
  MAPLE:
    CTX_INIT: a photo of a
    N_CTX: 4
    PREC: fp16
    PROMPT_DEPTH: 9
  MCD:
    N_STEP_F: 4
  MEANTEACHER:
    EMA_ALPHA: 0.999
    RAMPUP: 5
    WEIGHT_U: 1.0
  MIXMATCH:
    MIXUP_BETA: 0.75
    RAMPUP: 20000
    TEMP: 2.0
    WEIGHT_U: 100.0
  MME:
    LMDA: 0.1
  NAME: ProDA
  ProDA:
    N_CTX: 4
    N_PROMPT: 32
    PREC: fp16
  SE:
    CONF_THRE: 0.95
    EMA_ALPHA: 0.999
    RAMPUP: 300
  VPT:
    CTX_INIT: a photo of a
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_VISION: 1
USE_CUDA: True
VERBOSE: True
VERSION: 1
Collecting env info ...
** System info **
PyTorch version: 2.2.1+cu121
Is debug build: False
CUDA used to build PyTorch: 12.1
ROCM used to build PyTorch: N/A

OS: Debian GNU/Linux 12 (bookworm) (x86_64)
GCC version: (Debian 12.2.0-14) 12.2.0
Clang version: Could not collect
CMake version: Could not collect
Libc version: glibc-2.36

Python version: 3.11.2 (main, Mar 13 2023, 12:18:29) [GCC 12.2.0] (64-bit runtime)
Python platform: Linux-6.5.13-3-pve-x86_64-with-glibc2.36
Is CUDA available: True
CUDA runtime version: 11.8.89
CUDA_MODULE_LOADING set to: LAZY
GPU models and configuration: 
GPU 0: NVIDIA A800 80GB PCIe
GPU 1: NVIDIA A800 80GB PCIe

Nvidia driver version: 525.147.05
cuDNN version: Could not collect
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True

CPU:
Architecture:                       x86_64
CPU op-mode(s):                     32-bit, 64-bit
Address sizes:                      46 bits physical, 57 bits virtual
Byte Order:                         Little Endian
CPU(s):                             64
On-line CPU(s) list:                18,20,22,23,25-27,29,31,32,34,37,46-49
Off-line CPU(s) list:               0-17,19,21,24,28,30,33,35,36,38-45,50-63
Vendor ID:                          GenuineIntel
Model name:                         Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz
CPU family:                         6
Model:                              106
Thread(s) per core:                 2
Core(s) per socket:                 16
Socket(s):                          2
Stepping:                           6
CPU(s) scaling MHz:                 98%
CPU max MHz:                        3500.0000
CPU min MHz:                        800.0000
BogoMIPS:                           5800.00
Flags:                              fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect wbnoinvd dtherm ida arat pln pts vnmi avx512vbmi umip pku ospke avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid fsrm md_clear pconfig flush_l1d arch_capabilities
Virtualization:                     VT-x
L1d cache:                          1.5 MiB (32 instances)
L1i cache:                          1 MiB (32 instances)
L2 cache:                           40 MiB (32 instances)
L3 cache:                           48 MiB (2 instances)
NUMA node(s):                       2
NUMA node0 CPU(s):                  0-15,32-47
NUMA node1 CPU(s):                  16-31,48-63
Vulnerability Gather data sampling: Vulnerable: No microcode
Vulnerability Itlb multihit:        Not affected
Vulnerability L1tf:                 Not affected
Vulnerability Mds:                  Not affected
Vulnerability Meltdown:             Not affected
Vulnerability Mmio stale data:      Mitigation; Clear CPU buffers; SMT vulnerable
Vulnerability Retbleed:             Not affected
Vulnerability Spec rstack overflow: Not affected
Vulnerability Spec store bypass:    Mitigation; Speculative Store Bypass disabled via prctl
Vulnerability Spectre v1:           Mitigation; usercopy/swapgs barriers and __user pointer sanitization
Vulnerability Spectre v2:           Mitigation; Enhanced / Automatic IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
Vulnerability Srbds:                Not affected
Vulnerability Tsx async abort:      Not affected

Versions of relevant libraries:
[pip3] flake8==3.7.9
[pip3] flake8==3.7.9
[pip3] numpy==1.26.4
[pip3] torch==2.2.1
[pip3] torchaudio==2.2.1
[pip3] torchvision==0.17.1
[pip3] triton==2.2.0
[conda] Could not collect
        Pillow (10.2.0)

Loading trainer: ProDA
Loading dataset: SUN397
Reading split from /mnt/hdd/DATA/sun397/split_zhou_SUN397.json
Loading preprocessed few-shot data from /mnt/hdd/DATA/sun397/split_fewshot/shot_16_shuffled-seed_3.pkl
SUBSAMPLE BASE CLASSES!
Building transform_train
+ random resized crop (size=(224, 224), scale=(0.08, 1.0))
+ random flip
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
Building transform_test
+ resize the smaller edge to 224
+ 224x224 center crop
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
---------  ------
Dataset    SUN397
# classes  199
# train_x  3,184
# val      796
# test     9,950
---------  ------
Loading CLIP (backbone: ViT-B/16)
Building custom CLIP
Turning off gradients in both the image and the text encoder
Parameters to be updated: {'prompt_learner.ctx'}
Loading evaluator: Classification
No checkpoint found, train from scratch
Initialize tensorboard (log_dir=output/base2new/train_base/sun397/vit_b16_ep50_c4_BZ4_ProDA/seed3/tensorboard)
epoch [1/50] batch [20/796] time 0.828 (0.960) data 0.000 (0.027) loss 3.8141 (2.4952) lr 1.0000e-05 eta 10:36:10
epoch [1/50] batch [40/796] time 0.838 (0.896) data 0.000 (0.014) loss 4.1925 (2.4174) lr 1.0000e-05 eta 9:53:27
epoch [1/50] batch [60/796] time 0.839 (0.873) data 0.000 (0.009) loss 2.2344 (2.4579) lr 1.0000e-05 eta 9:38:20
epoch [1/50] batch [80/796] time 0.818 (0.862) data 0.000 (0.007) loss 4.9297 (2.4298) lr 1.0000e-05 eta 9:30:34
epoch [1/50] batch [100/796] time 0.837 (0.856) data 0.000 (0.006) loss 1.6714 (2.3645) lr 1.0000e-05 eta 9:26:10
epoch [1/50] batch [120/796] time 0.820 (0.852) data 0.000 (0.005) loss 3.2677 (2.3868) lr 1.0000e-05 eta 9:23:10
epoch [1/50] batch [140/796] time 0.819 (0.848) data 0.000 (0.004) loss 2.4158 (2.3288) lr 1.0000e-05 eta 9:20:46
epoch [1/50] batch [160/796] time 0.818 (0.846) data 0.000 (0.004) loss 1.9911 (2.3450) lr 1.0000e-05 eta 9:19:00
epoch [1/50] batch [180/796] time 0.816 (0.844) data 0.000 (0.003) loss 3.0322 (2.3206) lr 1.0000e-05 eta 9:17:23
epoch [1/50] batch [200/796] time 0.837 (0.843) data 0.000 (0.003) loss 2.1137 (2.2900) lr 1.0000e-05 eta 9:16:14
epoch [1/50] batch [220/796] time 0.828 (0.841) data 0.000 (0.003) loss 3.1940 (2.2809) lr 1.0000e-05 eta 9:15:05
epoch [1/50] batch [240/796] time 0.828 (0.840) data 0.000 (0.002) loss 1.4803 (2.2773) lr 1.0000e-05 eta 9:14:03
epoch [1/50] batch [260/796] time 0.829 (0.839) data 0.000 (0.002) loss 1.8702 (2.2498) lr 1.0000e-05 eta 9:13:08
epoch [1/50] batch [280/796] time 0.817 (0.839) data 0.000 (0.002) loss 2.5301 (2.2484) lr 1.0000e-05 eta 9:12:17
epoch [1/50] batch [300/796] time 0.838 (0.838) data 0.000 (0.002) loss 1.3984 (2.2103) lr 1.0000e-05 eta 9:11:34
epoch [1/50] batch [320/796] time 0.837 (0.837) data 0.000 (0.002) loss 2.6178 (2.1881) lr 1.0000e-05 eta 9:11:03
epoch [1/50] batch [340/796] time 0.817 (0.837) data 0.000 (0.002) loss 3.7584 (2.1671) lr 1.0000e-05 eta 9:10:25
epoch [1/50] batch [360/796] time 0.838 (0.836) data 0.000 (0.002) loss 3.4119 (2.1645) lr 1.0000e-05 eta 9:09:47
epoch [1/50] batch [380/796] time 0.836 (0.836) data 0.000 (0.002) loss 0.6350 (2.1458) lr 1.0000e-05 eta 9:09:18
epoch [1/50] batch [400/796] time 0.837 (0.836) data 0.000 (0.002) loss 1.5437 (2.1394) lr 1.0000e-05 eta 9:08:44
epoch [1/50] batch [420/796] time 0.838 (0.835) data 0.000 (0.001) loss 1.6186 (2.1150) lr 1.0000e-05 eta 9:08:17
epoch [1/50] batch [440/796] time 0.837 (0.835) data 0.000 (0.001) loss 0.6836 (2.1041) lr 1.0000e-05 eta 9:07:51
epoch [1/50] batch [460/796] time 0.827 (0.835) data 0.000 (0.001) loss 3.0498 (2.0875) lr 1.0000e-05 eta 9:07:22
epoch [1/50] batch [480/796] time 0.837 (0.835) data 0.000 (0.001) loss 1.3549 (2.0923) lr 1.0000e-05 eta 9:06:56
epoch [1/50] batch [500/796] time 0.837 (0.834) data 0.000 (0.001) loss 2.2931 (2.0844) lr 1.0000e-05 eta 9:06:23
epoch [1/50] batch [520/796] time 0.838 (0.834) data 0.000 (0.001) loss 0.4476 (2.0746) lr 1.0000e-05 eta 9:05:58
epoch [1/50] batch [540/796] time 0.840 (0.834) data 0.000 (0.001) loss 0.7286 (2.0674) lr 1.0000e-05 eta 9:05:35
epoch [1/50] batch [560/796] time 0.839 (0.834) data 0.000 (0.001) loss 2.3206 (2.0524) lr 1.0000e-05 eta 9:05:14
epoch [1/50] batch [580/796] time 0.838 (0.834) data 0.000 (0.001) loss 2.7224 (2.0488) lr 1.0000e-05 eta 9:04:53
epoch [1/50] batch [600/796] time 0.817 (0.833) data 0.000 (0.001) loss 2.0479 (2.0410) lr 1.0000e-05 eta 9:04:32
epoch [1/50] batch [620/796] time 0.817 (0.833) data 0.000 (0.001) loss 1.8419 (2.0373) lr 1.0000e-05 eta 9:04:11
epoch [1/50] batch [640/796] time 0.830 (0.833) data 0.000 (0.001) loss 3.5811 (2.0345) lr 1.0000e-05 eta 9:03:51
epoch [1/50] batch [660/796] time 0.838 (0.833) data 0.000 (0.001) loss 0.7881 (2.0205) lr 1.0000e-05 eta 9:03:30
epoch [1/50] batch [680/796] time 0.828 (0.833) data 0.000 (0.001) loss 0.5305 (2.0145) lr 1.0000e-05 eta 9:03:08
epoch [1/50] batch [700/796] time 0.829 (0.833) data 0.000 (0.001) loss 1.3916 (2.0156) lr 1.0000e-05 eta 9:02:46
epoch [1/50] batch [720/796] time 0.829 (0.833) data 0.000 (0.001) loss 1.4293 (2.0063) lr 1.0000e-05 eta 9:02:22
epoch [1/50] batch [740/796] time 0.817 (0.833) data 0.000 (0.001) loss 2.2741 (2.0023) lr 1.0000e-05 eta 9:02:00
epoch [1/50] batch [760/796] time 0.817 (0.832) data 0.000 (0.001) loss 1.7401 (2.0007) lr 1.0000e-05 eta 9:01:39
epoch [1/50] batch [780/796] time 0.816 (0.832) data 0.000 (0.001) loss 1.6989 (1.9989) lr 1.0000e-05 eta 9:01:17
epoch [2/50] batch [20/796] time 0.838 (0.854) data 0.000 (0.023) loss 1.5064 (1.7274) lr 1.0000e-05 eta 9:14:40
epoch [2/50] batch [40/796] time 0.818 (0.842) data 0.000 (0.012) loss 1.8325 (1.5895) lr 1.0000e-05 eta 9:06:37
epoch [2/50] batch [60/796] time 0.838 (0.837) data 0.000 (0.008) loss 3.8651 (1.6617) lr 1.0000e-05 eta 9:03:26
epoch [2/50] batch [80/796] time 0.817 (0.835) data 0.000 (0.006) loss 1.5941 (1.6572) lr 1.0000e-05 eta 9:01:29
epoch [2/50] batch [100/796] time 0.837 (0.834) data 0.000 (0.005) loss 1.2384 (1.6578) lr 1.0000e-05 eta 9:00:49
epoch [2/50] batch [120/796] time 0.809 (0.833) data 0.000 (0.004) loss 1.3361 (1.6324) lr 1.0000e-05 eta 8:59:58
epoch [2/50] batch [140/796] time 0.819 (0.833) data 0.000 (0.003) loss 2.7921 (1.6390) lr 1.0000e-05 eta 8:59:27
epoch [2/50] batch [160/796] time 0.819 (0.832) data 0.000 (0.003) loss 1.5984 (1.6674) lr 1.0000e-05 eta 8:58:48
epoch [2/50] batch [180/796] time 0.829 (0.832) data 0.000 (0.003) loss 0.8133 (1.7134) lr 1.0000e-05 eta 8:58:26
epoch [2/50] batch [200/796] time 0.839 (0.832) data 0.000 (0.002) loss 2.3149 (1.7011) lr 1.0000e-05 eta 8:58:07
epoch [2/50] batch [220/796] time 0.829 (0.832) data 0.000 (0.002) loss 1.8772 (1.6895) lr 1.0000e-05 eta 8:57:39
epoch [2/50] batch [240/796] time 0.829 (0.832) data 0.000 (0.002) loss 1.9006 (1.6670) lr 1.0000e-05 eta 8:57:20
epoch [2/50] batch [260/796] time 0.818 (0.832) data 0.000 (0.002) loss 1.8447 (1.6871) lr 1.0000e-05 eta 8:56:56
epoch [2/50] batch [280/796] time 0.842 (0.831) data 0.000 (0.002) loss 3.2290 (1.6818) lr 1.0000e-05 eta 8:56:32
epoch [2/50] batch [300/796] time 0.819 (0.831) data 0.000 (0.002) loss 1.2985 (1.6784) lr 1.0000e-05 eta 8:56:12
epoch [2/50] batch [320/796] time 0.831 (0.831) data 0.000 (0.002) loss 0.4867 (1.6683) lr 1.0000e-05 eta 8:55:59
epoch [2/50] batch [340/796] time 0.837 (0.831) data 0.000 (0.002) loss 0.5023 (1.6446) lr 1.0000e-05 eta 8:55:31
epoch [2/50] batch [360/796] time 0.829 (0.831) data 0.000 (0.001) loss 3.3303 (1.6459) lr 1.0000e-05 eta 8:55:09
epoch [2/50] batch [380/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.3538 (1.6541) lr 1.0000e-05 eta 8:54:47
epoch [2/50] batch [400/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.4724 (1.6496) lr 1.0000e-05 eta 8:54:28
epoch [2/50] batch [420/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.1740 (1.6367) lr 1.0000e-05 eta 8:54:08
epoch [2/50] batch [440/796] time 0.809 (0.831) data 0.000 (0.001) loss 0.3001 (1.6359) lr 1.0000e-05 eta 8:53:47
epoch [2/50] batch [460/796] time 0.837 (0.830) data 0.000 (0.001) loss 0.8940 (1.6345) lr 1.0000e-05 eta 8:53:30
epoch [2/50] batch [480/796] time 0.809 (0.830) data 0.000 (0.001) loss 0.8776 (1.6358) lr 1.0000e-05 eta 8:53:13
epoch [2/50] batch [500/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.7102 (1.6366) lr 1.0000e-05 eta 8:53:01
epoch [2/50] batch [520/796] time 0.850 (0.831) data 0.000 (0.001) loss 2.9949 (1.6441) lr 1.0000e-05 eta 8:52:46
epoch [2/50] batch [540/796] time 0.820 (0.831) data 0.000 (0.001) loss 2.1582 (1.6482) lr 1.0000e-05 eta 8:52:34
epoch [2/50] batch [560/796] time 0.840 (0.831) data 0.000 (0.001) loss 2.0217 (1.6453) lr 1.0000e-05 eta 8:52:21
epoch [2/50] batch [580/796] time 0.839 (0.831) data 0.000 (0.001) loss 2.2820 (1.6462) lr 1.0000e-05 eta 8:52:07
epoch [2/50] batch [600/796] time 0.842 (0.831) data 0.000 (0.001) loss 2.0996 (1.6510) lr 1.0000e-05 eta 8:51:50
epoch [2/50] batch [620/796] time 0.841 (0.831) data 0.000 (0.001) loss 0.7768 (1.6488) lr 1.0000e-05 eta 8:51:35
epoch [2/50] batch [640/796] time 0.840 (0.831) data 0.000 (0.001) loss 3.0903 (1.6405) lr 1.0000e-05 eta 8:51:20
epoch [2/50] batch [660/796] time 0.818 (0.831) data 0.000 (0.001) loss 2.7433 (1.6418) lr 1.0000e-05 eta 8:51:01
epoch [2/50] batch [680/796] time 0.808 (0.831) data 0.000 (0.001) loss 3.0134 (1.6545) lr 1.0000e-05 eta 8:50:44
epoch [2/50] batch [700/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.6487 (1.6569) lr 1.0000e-05 eta 8:50:29
epoch [2/50] batch [720/796] time 0.838 (0.831) data 0.000 (0.001) loss 3.2240 (1.6594) lr 1.0000e-05 eta 8:50:11
epoch [2/50] batch [740/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.9757 (1.6607) lr 1.0000e-05 eta 8:49:53
epoch [2/50] batch [760/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.3358 (1.6589) lr 1.0000e-05 eta 8:49:37
epoch [2/50] batch [780/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.8450 (1.6540) lr 1.0000e-05 eta 8:49:19
epoch [3/50] batch [20/796] time 0.838 (0.857) data 0.000 (0.023) loss 1.3700 (1.4478) lr 1.0000e-05 eta 9:05:36
epoch [3/50] batch [40/796] time 0.838 (0.842) data 0.000 (0.012) loss 1.7925 (1.5194) lr 1.0000e-05 eta 8:55:47
epoch [3/50] batch [60/796] time 0.828 (0.838) data 0.000 (0.008) loss 2.7644 (1.6346) lr 1.0000e-05 eta 8:52:45
epoch [3/50] batch [80/796] time 0.808 (0.836) data 0.000 (0.006) loss 3.3256 (1.6121) lr 1.0000e-05 eta 8:50:57
epoch [3/50] batch [100/796] time 0.839 (0.835) data 0.000 (0.005) loss 2.6952 (1.6258) lr 1.0000e-05 eta 8:50:27
epoch [3/50] batch [120/796] time 0.829 (0.834) data 0.000 (0.004) loss 2.1133 (1.6238) lr 1.0000e-05 eta 8:49:26
epoch [3/50] batch [140/796] time 0.843 (0.834) data 0.000 (0.003) loss 2.8672 (1.5966) lr 1.0000e-05 eta 8:48:52
epoch [3/50] batch [160/796] time 0.829 (0.833) data 0.000 (0.003) loss 1.0653 (1.5678) lr 1.0000e-05 eta 8:48:28
epoch [3/50] batch [180/796] time 0.818 (0.833) data 0.000 (0.003) loss 1.2814 (1.5586) lr 1.0000e-05 eta 8:47:57
epoch [3/50] batch [200/796] time 0.839 (0.833) data 0.000 (0.002) loss 2.3008 (1.5691) lr 1.0000e-05 eta 8:47:27
epoch [3/50] batch [220/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.1889 (1.5981) lr 1.0000e-05 eta 8:47:03
epoch [3/50] batch [240/796] time 0.818 (0.832) data 0.000 (0.002) loss 2.4216 (1.6158) lr 1.0000e-05 eta 8:46:38
epoch [3/50] batch [260/796] time 0.829 (0.832) data 0.000 (0.002) loss 1.4547 (1.5935) lr 1.0000e-05 eta 8:46:13
epoch [3/50] batch [280/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.1288 (1.6090) lr 1.0000e-05 eta 8:45:46
epoch [3/50] batch [300/796] time 0.828 (0.831) data 0.000 (0.002) loss 1.4908 (1.6248) lr 1.0000e-05 eta 8:45:17
epoch [3/50] batch [320/796] time 0.817 (0.831) data 0.000 (0.002) loss 0.3786 (1.6119) lr 1.0000e-05 eta 8:44:55
epoch [3/50] batch [340/796] time 0.839 (0.831) data 0.000 (0.002) loss 2.1972 (1.6138) lr 1.0000e-05 eta 8:44:34
epoch [3/50] batch [360/796] time 0.818 (0.831) data 0.000 (0.001) loss 2.0040 (1.6059) lr 1.0000e-05 eta 8:44:10
epoch [3/50] batch [380/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.4625 (1.6072) lr 1.0000e-05 eta 8:43:48
epoch [3/50] batch [400/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.3143 (1.6043) lr 1.0000e-05 eta 8:43:29
epoch [3/50] batch [420/796] time 0.838 (0.831) data 0.000 (0.001) loss 2.2843 (1.6100) lr 1.0000e-05 eta 8:43:09
epoch [3/50] batch [440/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.2913 (1.5943) lr 1.0000e-05 eta 8:42:53
epoch [3/50] batch [460/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.8527 (1.5979) lr 1.0000e-05 eta 8:42:30
epoch [3/50] batch [480/796] time 0.833 (0.831) data 0.000 (0.001) loss 1.3449 (1.6007) lr 1.0000e-05 eta 8:42:14
epoch [3/50] batch [500/796] time 0.819 (0.830) data 0.000 (0.001) loss 0.8922 (1.5986) lr 1.0000e-05 eta 8:41:49
epoch [3/50] batch [520/796] time 0.837 (0.830) data 0.000 (0.001) loss 2.7073 (1.5896) lr 1.0000e-05 eta 8:41:32
epoch [3/50] batch [540/796] time 0.829 (0.830) data 0.000 (0.001) loss 1.3365 (1.5899) lr 1.0000e-05 eta 8:41:15
epoch [3/50] batch [560/796] time 0.817 (0.830) data 0.000 (0.001) loss 1.7182 (1.5850) lr 1.0000e-05 eta 8:41:00
epoch [3/50] batch [580/796] time 0.831 (0.830) data 0.000 (0.001) loss 0.2410 (1.5822) lr 1.0000e-05 eta 8:40:45
epoch [3/50] batch [600/796] time 0.838 (0.830) data 0.000 (0.001) loss 1.8192 (1.5840) lr 1.0000e-05 eta 8:40:29
epoch [3/50] batch [620/796] time 0.828 (0.830) data 0.000 (0.001) loss 1.1362 (1.5869) lr 1.0000e-05 eta 8:40:08
epoch [3/50] batch [640/796] time 0.829 (0.830) data 0.000 (0.001) loss 1.9436 (1.5808) lr 1.0000e-05 eta 8:39:50
epoch [3/50] batch [660/796] time 0.839 (0.830) data 0.000 (0.001) loss 3.0194 (1.5843) lr 1.0000e-05 eta 8:39:33
epoch [3/50] batch [680/796] time 0.837 (0.830) data 0.000 (0.001) loss 0.8390 (1.5814) lr 1.0000e-05 eta 8:39:15
epoch [3/50] batch [700/796] time 0.839 (0.830) data 0.000 (0.001) loss 2.4129 (1.5710) lr 1.0000e-05 eta 8:38:56
epoch [3/50] batch [720/796] time 0.838 (0.830) data 0.000 (0.001) loss 2.5820 (1.5653) lr 1.0000e-05 eta 8:38:41
epoch [3/50] batch [740/796] time 0.838 (0.830) data 0.000 (0.001) loss 1.6894 (1.5568) lr 1.0000e-05 eta 8:38:27
epoch [3/50] batch [760/796] time 0.817 (0.830) data 0.000 (0.001) loss 3.5889 (1.5542) lr 1.0000e-05 eta 8:38:06
epoch [3/50] batch [780/796] time 0.838 (0.830) data 0.000 (0.001) loss 1.2179 (1.5507) lr 1.0000e-05 eta 8:37:50
epoch [4/50] batch [20/796] time 0.819 (0.856) data 0.000 (0.025) loss 2.8863 (1.8381) lr 1.0000e-05 eta 8:53:28
epoch [4/50] batch [40/796] time 0.838 (0.841) data 0.000 (0.013) loss 0.4943 (1.7598) lr 1.0000e-05 eta 8:44:04
epoch [4/50] batch [60/796] time 0.817 (0.838) data 0.000 (0.009) loss 1.2479 (1.6694) lr 1.0000e-05 eta 8:41:30
epoch [4/50] batch [80/796] time 0.818 (0.836) data 0.000 (0.006) loss 1.8419 (1.6863) lr 1.0000e-05 eta 8:39:56
epoch [4/50] batch [100/796] time 0.818 (0.835) data 0.000 (0.005) loss 1.4896 (1.6146) lr 1.0000e-05 eta 8:39:02
epoch [4/50] batch [120/796] time 0.838 (0.834) data 0.000 (0.004) loss 2.4283 (1.5992) lr 1.0000e-05 eta 8:38:18
epoch [4/50] batch [140/796] time 0.817 (0.833) data 0.000 (0.004) loss 1.3328 (1.5696) lr 1.0000e-05 eta 8:37:32
epoch [4/50] batch [160/796] time 0.837 (0.833) data 0.000 (0.003) loss 1.5408 (1.5606) lr 1.0000e-05 eta 8:36:57
epoch [4/50] batch [180/796] time 0.830 (0.833) data 0.000 (0.003) loss 1.7409 (1.5580) lr 1.0000e-05 eta 8:36:38
epoch [4/50] batch [200/796] time 0.836 (0.832) data 0.000 (0.003) loss 2.9473 (1.5826) lr 1.0000e-05 eta 8:36:07
epoch [4/50] batch [220/796] time 0.828 (0.832) data 0.000 (0.002) loss 2.2834 (1.5630) lr 1.0000e-05 eta 8:35:37
epoch [4/50] batch [240/796] time 0.843 (0.832) data 0.000 (0.002) loss 1.6053 (1.5820) lr 1.0000e-05 eta 8:35:13
epoch [4/50] batch [260/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.1299 (1.5743) lr 1.0000e-05 eta 8:34:53
epoch [4/50] batch [280/796] time 0.838 (0.832) data 0.000 (0.002) loss 2.4146 (1.5587) lr 1.0000e-05 eta 8:34:40
epoch [4/50] batch [300/796] time 0.830 (0.831) data 0.000 (0.002) loss 1.4056 (1.5658) lr 1.0000e-05 eta 8:34:16
epoch [4/50] batch [320/796] time 0.840 (0.831) data 0.000 (0.002) loss 1.7270 (1.5605) lr 1.0000e-05 eta 8:34:01
epoch [4/50] batch [340/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.4936 (1.5602) lr 1.0000e-05 eta 8:33:48
epoch [4/50] batch [360/796] time 0.831 (0.832) data 0.000 (0.002) loss 1.5282 (1.5643) lr 1.0000e-05 eta 8:33:36
epoch [4/50] batch [380/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.4223 (1.5588) lr 1.0000e-05 eta 8:33:18
epoch [4/50] batch [400/796] time 0.838 (0.832) data 0.000 (0.001) loss 1.6846 (1.5649) lr 1.0000e-05 eta 8:33:04
epoch [4/50] batch [420/796] time 0.829 (0.832) data 0.000 (0.001) loss 0.3546 (1.5588) lr 1.0000e-05 eta 8:32:45
epoch [4/50] batch [440/796] time 0.818 (0.832) data 0.000 (0.001) loss 1.2341 (1.5531) lr 1.0000e-05 eta 8:32:26
epoch [4/50] batch [460/796] time 0.838 (0.832) data 0.000 (0.001) loss 1.3636 (1.5477) lr 1.0000e-05 eta 8:32:07
epoch [4/50] batch [480/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.4893 (1.5405) lr 1.0000e-05 eta 8:31:44
epoch [4/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.9420 (1.5437) lr 1.0000e-05 eta 8:31:29
epoch [4/50] batch [520/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2159 (1.5366) lr 1.0000e-05 eta 8:31:10
epoch [4/50] batch [540/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.3650 (1.5357) lr 1.0000e-05 eta 8:30:48
epoch [4/50] batch [560/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.5538 (1.5180) lr 1.0000e-05 eta 8:30:31
epoch [4/50] batch [580/796] time 0.840 (0.831) data 0.000 (0.001) loss 2.5621 (1.5210) lr 1.0000e-05 eta 8:30:16
epoch [4/50] batch [600/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.9878 (1.5166) lr 1.0000e-05 eta 8:30:00
epoch [4/50] batch [620/796] time 0.843 (0.831) data 0.000 (0.001) loss 1.8717 (1.5184) lr 1.0000e-05 eta 8:29:41
epoch [4/50] batch [640/796] time 0.832 (0.831) data 0.000 (0.001) loss 3.9861 (1.5158) lr 1.0000e-05 eta 8:29:23
epoch [4/50] batch [660/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.3182 (1.5154) lr 1.0000e-05 eta 8:29:02
epoch [4/50] batch [680/796] time 0.837 (0.831) data 0.000 (0.001) loss 1.4248 (1.5073) lr 1.0000e-05 eta 8:28:46
epoch [4/50] batch [700/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.8387 (1.5010) lr 1.0000e-05 eta 8:28:28
epoch [4/50] batch [720/796] time 0.854 (0.831) data 0.000 (0.001) loss 0.3207 (1.4844) lr 1.0000e-05 eta 8:28:11
epoch [4/50] batch [740/796] time 0.832 (0.831) data 0.000 (0.001) loss 0.8679 (1.4815) lr 1.0000e-05 eta 8:27:51
epoch [4/50] batch [760/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.5676 (1.4785) lr 1.0000e-05 eta 8:27:30
epoch [4/50] batch [780/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.7254 (1.4820) lr 1.0000e-05 eta 8:27:14
epoch [5/50] batch [20/796] time 0.822 (0.853) data 0.000 (0.024) loss 3.4872 (1.4770) lr 1.0000e-05 eta 8:40:03
epoch [5/50] batch [40/796] time 0.818 (0.840) data 0.000 (0.012) loss 1.2550 (1.5674) lr 1.0000e-05 eta 8:32:08
epoch [5/50] batch [60/796] time 0.839 (0.837) data 0.000 (0.008) loss 1.0431 (1.4455) lr 1.0000e-05 eta 8:29:58
epoch [5/50] batch [80/796] time 0.838 (0.834) data 0.000 (0.006) loss 1.3167 (1.5209) lr 1.0000e-05 eta 8:28:09
epoch [5/50] batch [100/796] time 0.819 (0.833) data 0.000 (0.005) loss 2.1656 (1.5391) lr 1.0000e-05 eta 8:27:09
epoch [5/50] batch [120/796] time 0.817 (0.833) data 0.000 (0.004) loss 2.7216 (1.5175) lr 1.0000e-05 eta 8:26:36
epoch [5/50] batch [140/796] time 0.829 (0.832) data 0.000 (0.004) loss 1.5448 (1.5314) lr 1.0000e-05 eta 8:25:50
epoch [5/50] batch [160/796] time 0.828 (0.832) data 0.000 (0.003) loss 1.8587 (1.5164) lr 1.0000e-05 eta 8:25:20
epoch [5/50] batch [180/796] time 0.830 (0.832) data 0.000 (0.003) loss 2.1619 (1.5098) lr 1.0000e-05 eta 8:25:00
epoch [5/50] batch [200/796] time 0.838 (0.831) data 0.000 (0.003) loss 1.5692 (1.4889) lr 1.0000e-05 eta 8:24:33
epoch [5/50] batch [220/796] time 0.838 (0.831) data 0.000 (0.002) loss 1.4687 (1.5062) lr 1.0000e-05 eta 8:24:14
epoch [5/50] batch [240/796] time 0.829 (0.831) data 0.000 (0.002) loss 0.0399 (1.5131) lr 1.0000e-05 eta 8:23:51
epoch [5/50] batch [260/796] time 0.808 (0.831) data 0.000 (0.002) loss 1.4034 (1.5182) lr 1.0000e-05 eta 8:23:18
epoch [5/50] batch [280/796] time 0.817 (0.831) data 0.000 (0.002) loss 2.0978 (1.5049) lr 1.0000e-05 eta 8:22:58
epoch [5/50] batch [300/796] time 0.830 (0.830) data 0.000 (0.002) loss 1.2917 (1.5229) lr 1.0000e-05 eta 8:22:40
epoch [5/50] batch [320/796] time 0.839 (0.830) data 0.000 (0.002) loss 2.2386 (1.5093) lr 1.0000e-05 eta 8:22:17
epoch [5/50] batch [340/796] time 0.829 (0.830) data 0.000 (0.002) loss 0.6344 (1.4907) lr 1.0000e-05 eta 8:22:05
epoch [5/50] batch [360/796] time 0.838 (0.830) data 0.000 (0.002) loss 2.5513 (1.4934) lr 1.0000e-05 eta 8:21:45
epoch [5/50] batch [380/796] time 0.839 (0.830) data 0.000 (0.001) loss 2.6516 (1.4896) lr 1.0000e-05 eta 8:21:33
epoch [5/50] batch [400/796] time 0.840 (0.830) data 0.000 (0.001) loss 1.3860 (1.4974) lr 1.0000e-05 eta 8:21:13
epoch [5/50] batch [420/796] time 0.818 (0.830) data 0.000 (0.001) loss 2.0394 (1.4956) lr 1.0000e-05 eta 8:20:49
epoch [5/50] batch [440/796] time 0.819 (0.830) data 0.000 (0.001) loss 4.5409 (1.4920) lr 1.0000e-05 eta 8:20:31
epoch [5/50] batch [460/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.7456 (1.4942) lr 1.0000e-05 eta 8:20:13
epoch [5/50] batch [480/796] time 0.818 (0.830) data 0.000 (0.001) loss 0.3817 (1.4906) lr 1.0000e-05 eta 8:19:56
epoch [5/50] batch [500/796] time 0.809 (0.830) data 0.000 (0.001) loss 1.0000 (1.4857) lr 1.0000e-05 eta 8:19:38
epoch [5/50] batch [520/796] time 0.838 (0.830) data 0.000 (0.001) loss 2.0429 (1.4833) lr 1.0000e-05 eta 8:19:19
epoch [5/50] batch [540/796] time 0.818 (0.830) data 0.000 (0.001) loss 1.7098 (1.4700) lr 1.0000e-05 eta 8:19:02
epoch [5/50] batch [560/796] time 0.829 (0.830) data 0.000 (0.001) loss 1.7933 (1.4812) lr 1.0000e-05 eta 8:18:45
epoch [5/50] batch [580/796] time 0.839 (0.830) data 0.000 (0.001) loss 0.5028 (1.4799) lr 1.0000e-05 eta 8:18:30
epoch [5/50] batch [600/796] time 0.829 (0.830) data 0.000 (0.001) loss 1.7506 (1.4771) lr 1.0000e-05 eta 8:18:12
epoch [5/50] batch [620/796] time 0.817 (0.830) data 0.000 (0.001) loss 1.2823 (1.4702) lr 1.0000e-05 eta 8:17:51
epoch [5/50] batch [640/796] time 0.837 (0.830) data 0.000 (0.001) loss 1.6644 (1.4649) lr 1.0000e-05 eta 8:17:34
epoch [5/50] batch [660/796] time 0.837 (0.830) data 0.000 (0.001) loss 0.7195 (1.4666) lr 1.0000e-05 eta 8:17:18
epoch [5/50] batch [680/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.7701 (1.4624) lr 1.0000e-05 eta 8:16:59
epoch [5/50] batch [700/796] time 0.809 (0.830) data 0.000 (0.001) loss 1.7596 (1.4489) lr 1.0000e-05 eta 8:16:41
epoch [5/50] batch [720/796] time 0.829 (0.830) data 0.000 (0.001) loss 0.6716 (1.4486) lr 1.0000e-05 eta 8:16:24
epoch [5/50] batch [740/796] time 0.829 (0.830) data 0.000 (0.001) loss 2.6218 (1.4457) lr 1.0000e-05 eta 8:16:08
epoch [5/50] batch [760/796] time 0.828 (0.830) data 0.000 (0.001) loss 0.9575 (1.4449) lr 1.0000e-05 eta 8:15:52
epoch [5/50] batch [780/796] time 0.839 (0.830) data 0.000 (0.001) loss 2.3327 (1.4412) lr 1.0000e-05 eta 8:15:34
epoch [6/50] batch [20/796] time 0.837 (0.853) data 0.000 (0.024) loss 0.7714 (1.4086) lr 2.0000e-03 eta 8:28:59
epoch [6/50] batch [40/796] time 0.828 (0.841) data 0.000 (0.012) loss 1.8186 (1.6401) lr 2.0000e-03 eta 8:21:41
epoch [6/50] batch [60/796] time 0.828 (0.838) data 0.001 (0.008) loss 3.5026 (1.6182) lr 2.0000e-03 eta 8:19:24
epoch [6/50] batch [80/796] time 0.829 (0.836) data 0.000 (0.006) loss 2.8685 (1.5771) lr 2.0000e-03 eta 8:17:56
epoch [6/50] batch [100/796] time 0.828 (0.834) data 0.000 (0.005) loss 1.5457 (1.6560) lr 2.0000e-03 eta 8:16:41
epoch [6/50] batch [120/796] time 0.837 (0.834) data 0.000 (0.004) loss 1.1885 (1.6614) lr 2.0000e-03 eta 8:16:07
epoch [6/50] batch [140/796] time 0.829 (0.833) data 0.000 (0.004) loss 1.3573 (1.6578) lr 2.0000e-03 eta 8:15:22
epoch [6/50] batch [160/796] time 0.831 (0.832) data 0.000 (0.003) loss 0.9939 (1.6285) lr 2.0000e-03 eta 8:14:38
epoch [6/50] batch [180/796] time 0.840 (0.832) data 0.000 (0.003) loss 1.8966 (1.5941) lr 2.0000e-03 eta 8:14:17
epoch [6/50] batch [200/796] time 0.819 (0.832) data 0.000 (0.003) loss 1.4280 (1.5923) lr 2.0000e-03 eta 8:13:54
epoch [6/50] batch [220/796] time 0.839 (0.832) data 0.000 (0.002) loss 2.5156 (1.5642) lr 2.0000e-03 eta 8:13:28
epoch [6/50] batch [240/796] time 0.837 (0.832) data 0.000 (0.002) loss 0.6525 (1.5479) lr 2.0000e-03 eta 8:13:11
epoch [6/50] batch [260/796] time 0.817 (0.831) data 0.000 (0.002) loss 1.7824 (1.5302) lr 2.0000e-03 eta 8:12:47
epoch [6/50] batch [280/796] time 0.831 (0.831) data 0.000 (0.002) loss 0.9819 (1.5054) lr 2.0000e-03 eta 8:12:23
epoch [6/50] batch [300/796] time 0.818 (0.831) data 0.000 (0.002) loss 1.9166 (1.4913) lr 2.0000e-03 eta 8:12:03
epoch [6/50] batch [320/796] time 0.833 (0.831) data 0.000 (0.002) loss 1.8059 (1.4809) lr 2.0000e-03 eta 8:11:36
epoch [6/50] batch [340/796] time 0.810 (0.831) data 0.000 (0.002) loss 3.3427 (1.4580) lr 2.0000e-03 eta 8:11:19
epoch [6/50] batch [360/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.0492 (1.4401) lr 2.0000e-03 eta 8:11:01
epoch [6/50] batch [380/796] time 0.808 (0.831) data 0.000 (0.001) loss 1.1150 (1.4543) lr 2.0000e-03 eta 8:10:45
epoch [6/50] batch [400/796] time 0.841 (0.831) data 0.000 (0.001) loss 3.2301 (1.4514) lr 2.0000e-03 eta 8:10:27
epoch [6/50] batch [420/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.8151 (1.4317) lr 2.0000e-03 eta 8:10:06
epoch [6/50] batch [440/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.3140 (1.4188) lr 2.0000e-03 eta 8:09:49
epoch [6/50] batch [460/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.7014 (1.4052) lr 2.0000e-03 eta 8:09:31
epoch [6/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.7576 (1.3881) lr 2.0000e-03 eta 8:09:15
epoch [6/50] batch [500/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.3086 (1.3798) lr 2.0000e-03 eta 8:08:54
epoch [6/50] batch [520/796] time 0.817 (0.830) data 0.000 (0.001) loss 0.8844 (1.3770) lr 2.0000e-03 eta 8:08:32
epoch [6/50] batch [540/796] time 0.809 (0.830) data 0.000 (0.001) loss 0.1817 (1.3644) lr 2.0000e-03 eta 8:08:14
epoch [6/50] batch [560/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.3873 (1.3570) lr 2.0000e-03 eta 8:07:55
epoch [6/50] batch [580/796] time 0.829 (0.830) data 0.000 (0.001) loss 1.5516 (1.3562) lr 2.0000e-03 eta 8:07:36
epoch [6/50] batch [600/796] time 0.828 (0.830) data 0.000 (0.001) loss 0.8288 (1.3654) lr 2.0000e-03 eta 8:07:16
epoch [6/50] batch [620/796] time 0.839 (0.830) data 0.000 (0.001) loss 0.2276 (1.3486) lr 2.0000e-03 eta 8:07:00
epoch [6/50] batch [640/796] time 0.830 (0.830) data 0.000 (0.001) loss 1.4654 (1.3511) lr 2.0000e-03 eta 8:06:43
epoch [6/50] batch [660/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.4469 (1.3445) lr 2.0000e-03 eta 8:06:30
epoch [6/50] batch [680/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.4662 (1.3374) lr 2.0000e-03 eta 8:06:14
epoch [6/50] batch [700/796] time 0.818 (0.830) data 0.000 (0.001) loss 0.2632 (1.3335) lr 2.0000e-03 eta 8:05:54
epoch [6/50] batch [720/796] time 0.817 (0.830) data 0.000 (0.001) loss 0.9172 (1.3323) lr 2.0000e-03 eta 8:05:39
epoch [6/50] batch [740/796] time 0.838 (0.830) data 0.000 (0.001) loss 2.2335 (1.3255) lr 2.0000e-03 eta 8:05:22
epoch [6/50] batch [760/796] time 0.817 (0.830) data 0.000 (0.001) loss 2.5499 (1.3203) lr 2.0000e-03 eta 8:05:04
epoch [6/50] batch [780/796] time 0.839 (0.830) data 0.000 (0.001) loss 0.8148 (1.3154) lr 2.0000e-03 eta 8:04:46
epoch [7/50] batch [20/796] time 0.839 (0.857) data 0.000 (0.024) loss 1.6772 (1.1872) lr 1.9980e-03 eta 8:19:42
epoch [7/50] batch [40/796] time 0.817 (0.842) data 0.000 (0.012) loss 1.2988 (1.2374) lr 1.9980e-03 eta 8:11:11
epoch [7/50] batch [60/796] time 0.829 (0.838) data 0.000 (0.008) loss 0.4980 (1.2780) lr 1.9980e-03 eta 8:08:09
epoch [7/50] batch [80/796] time 0.831 (0.836) data 0.000 (0.006) loss 0.0492 (1.2119) lr 1.9980e-03 eta 8:06:49
epoch [7/50] batch [100/796] time 0.830 (0.835) data 0.000 (0.005) loss 0.4443 (1.1917) lr 1.9980e-03 eta 8:06:11
epoch [7/50] batch [120/796] time 0.829 (0.834) data 0.000 (0.004) loss 1.3243 (1.1765) lr 1.9980e-03 eta 8:05:05
epoch [7/50] batch [140/796] time 0.829 (0.833) data 0.000 (0.004) loss 0.7527 (1.1474) lr 1.9980e-03 eta 8:04:25
epoch [7/50] batch [160/796] time 0.839 (0.833) data 0.000 (0.003) loss 0.5634 (1.1816) lr 1.9980e-03 eta 8:03:50
epoch [7/50] batch [180/796] time 0.838 (0.832) data 0.000 (0.003) loss 2.5553 (1.1729) lr 1.9980e-03 eta 8:03:17
epoch [7/50] batch [200/796] time 0.838 (0.832) data 0.000 (0.003) loss 1.1404 (1.1864) lr 1.9980e-03 eta 8:02:59
epoch [7/50] batch [220/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.4885 (1.1688) lr 1.9980e-03 eta 8:02:38
epoch [7/50] batch [240/796] time 0.817 (0.832) data 0.000 (0.002) loss 2.0764 (1.1747) lr 1.9980e-03 eta 8:02:18
epoch [7/50] batch [260/796] time 0.838 (0.832) data 0.000 (0.002) loss 2.5765 (1.1923) lr 1.9980e-03 eta 8:01:56
epoch [7/50] batch [280/796] time 0.820 (0.832) data 0.000 (0.002) loss 1.5609 (1.2193) lr 1.9980e-03 eta 8:01:43
epoch [7/50] batch [300/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.6321 (1.1977) lr 1.9980e-03 eta 8:01:20
epoch [7/50] batch [320/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.7154 (1.1983) lr 1.9980e-03 eta 8:00:58
epoch [7/50] batch [340/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.8226 (1.1985) lr 1.9980e-03 eta 8:00:41
epoch [7/50] batch [360/796] time 0.838 (0.831) data 0.000 (0.002) loss 0.7572 (1.2046) lr 1.9980e-03 eta 8:00:17
epoch [7/50] batch [380/796] time 0.831 (0.831) data 0.000 (0.001) loss 0.4393 (1.1972) lr 1.9980e-03 eta 8:00:01
epoch [7/50] batch [400/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.0843 (1.1876) lr 1.9980e-03 eta 7:59:43
epoch [7/50] batch [420/796] time 0.809 (0.831) data 0.000 (0.001) loss 3.1888 (1.1801) lr 1.9980e-03 eta 7:59:22
epoch [7/50] batch [440/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.4168 (1.1688) lr 1.9980e-03 eta 7:59:08
epoch [7/50] batch [460/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.2516 (1.1478) lr 1.9980e-03 eta 7:58:49
epoch [7/50] batch [480/796] time 0.836 (0.831) data 0.000 (0.001) loss 0.3409 (1.1326) lr 1.9980e-03 eta 7:58:35
epoch [7/50] batch [500/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.5158 (1.1322) lr 1.9980e-03 eta 7:58:15
epoch [7/50] batch [520/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.2519 (1.1386) lr 1.9980e-03 eta 7:57:53
epoch [7/50] batch [540/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.6412 (1.1351) lr 1.9980e-03 eta 7:57:34
epoch [7/50] batch [560/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.4256 (1.1415) lr 1.9980e-03 eta 7:57:18
epoch [7/50] batch [580/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.8196 (1.1555) lr 1.9980e-03 eta 7:57:00
epoch [7/50] batch [600/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.8234 (1.1407) lr 1.9980e-03 eta 7:56:41
epoch [7/50] batch [620/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.0187 (1.1466) lr 1.9980e-03 eta 7:56:25
epoch [7/50] batch [640/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.4272 (1.1465) lr 1.9980e-03 eta 7:56:06
epoch [7/50] batch [660/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.8788 (1.1446) lr 1.9980e-03 eta 7:55:46
epoch [7/50] batch [680/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.3404 (1.1398) lr 1.9980e-03 eta 7:55:31
epoch [7/50] batch [700/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.8416 (1.1414) lr 1.9980e-03 eta 7:55:15
epoch [7/50] batch [720/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.4917 (1.1341) lr 1.9980e-03 eta 7:54:57
epoch [7/50] batch [740/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.7405 (1.1407) lr 1.9980e-03 eta 7:54:41
epoch [7/50] batch [760/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.5227 (1.1349) lr 1.9980e-03 eta 7:54:23
epoch [7/50] batch [780/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.1400 (1.1366) lr 1.9980e-03 eta 7:54:07
epoch [8/50] batch [20/796] time 0.809 (0.858) data 0.000 (0.024) loss 0.2450 (0.9452) lr 1.9921e-03 eta 8:08:58
epoch [8/50] batch [40/796] time 0.821 (0.843) data 0.000 (0.012) loss 0.4968 (1.1868) lr 1.9921e-03 eta 8:00:19
epoch [8/50] batch [60/796] time 0.819 (0.838) data 0.000 (0.008) loss 2.5488 (1.1842) lr 1.9921e-03 eta 7:57:24
epoch [8/50] batch [80/796] time 0.838 (0.836) data 0.000 (0.006) loss 1.1090 (1.1714) lr 1.9921e-03 eta 7:55:50
epoch [8/50] batch [100/796] time 0.839 (0.835) data 0.000 (0.005) loss 2.1148 (1.1945) lr 1.9921e-03 eta 7:54:56
epoch [8/50] batch [120/796] time 0.818 (0.835) data 0.000 (0.004) loss 1.3192 (1.2177) lr 1.9921e-03 eta 7:54:23
epoch [8/50] batch [140/796] time 0.838 (0.834) data 0.000 (0.004) loss 1.8649 (1.2595) lr 1.9921e-03 eta 7:53:42
epoch [8/50] batch [160/796] time 0.845 (0.833) data 0.000 (0.003) loss 2.0987 (1.2421) lr 1.9921e-03 eta 7:53:11
epoch [8/50] batch [180/796] time 0.843 (0.833) data 0.000 (0.003) loss 1.5565 (1.2239) lr 1.9921e-03 eta 7:52:47
epoch [8/50] batch [200/796] time 0.829 (0.833) data 0.000 (0.003) loss 1.0668 (1.2240) lr 1.9921e-03 eta 7:52:25
epoch [8/50] batch [220/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.7587 (1.1931) lr 1.9921e-03 eta 7:51:53
epoch [8/50] batch [240/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.4485 (1.1845) lr 1.9921e-03 eta 7:51:30
epoch [8/50] batch [260/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.7230 (1.1574) lr 1.9921e-03 eta 7:51:14
epoch [8/50] batch [280/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.3653 (1.1546) lr 1.9921e-03 eta 7:50:48
epoch [8/50] batch [300/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.2108 (1.1562) lr 1.9921e-03 eta 7:50:25
epoch [8/50] batch [320/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.3658 (1.1412) lr 1.9921e-03 eta 7:50:06
epoch [8/50] batch [340/796] time 0.829 (0.832) data 0.000 (0.002) loss 1.5085 (1.1220) lr 1.9921e-03 eta 7:49:46
epoch [8/50] batch [360/796] time 0.836 (0.832) data 0.000 (0.002) loss 0.5440 (1.1144) lr 1.9921e-03 eta 7:49:26
epoch [8/50] batch [380/796] time 0.818 (0.832) data 0.000 (0.001) loss 1.3808 (1.1063) lr 1.9921e-03 eta 7:49:07
epoch [8/50] batch [400/796] time 0.838 (0.832) data 0.000 (0.001) loss 2.0543 (1.1115) lr 1.9921e-03 eta 7:48:51
epoch [8/50] batch [420/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.4709 (1.0999) lr 1.9921e-03 eta 7:48:36
epoch [8/50] batch [440/796] time 0.809 (0.831) data 0.000 (0.001) loss 0.3453 (1.1087) lr 1.9921e-03 eta 7:48:11
epoch [8/50] batch [460/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.1497 (1.1038) lr 1.9921e-03 eta 7:47:53
epoch [8/50] batch [480/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.5468 (1.1149) lr 1.9921e-03 eta 7:47:34
epoch [8/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.6518 (1.1107) lr 1.9921e-03 eta 7:47:16
epoch [8/50] batch [520/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.5233 (1.1156) lr 1.9921e-03 eta 7:46:56
epoch [8/50] batch [540/796] time 0.837 (0.831) data 0.000 (0.001) loss 2.3572 (1.1193) lr 1.9921e-03 eta 7:46:37
epoch [8/50] batch [560/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2637 (1.1205) lr 1.9921e-03 eta 7:46:21
epoch [8/50] batch [580/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.8142 (1.1186) lr 1.9921e-03 eta 7:46:04
epoch [8/50] batch [600/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.4149 (1.1084) lr 1.9921e-03 eta 7:45:47
epoch [8/50] batch [620/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.0910 (1.1071) lr 1.9921e-03 eta 7:45:30
epoch [8/50] batch [640/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.0223 (1.1053) lr 1.9921e-03 eta 7:45:12
epoch [8/50] batch [660/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.4298 (1.0944) lr 1.9921e-03 eta 7:44:54
epoch [8/50] batch [680/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.0595 (1.0830) lr 1.9921e-03 eta 7:44:40
epoch [8/50] batch [700/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.9337 (1.0833) lr 1.9921e-03 eta 7:44:24
epoch [8/50] batch [720/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.7179 (1.0828) lr 1.9921e-03 eta 7:44:07
epoch [8/50] batch [740/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.8605 (1.0838) lr 1.9921e-03 eta 7:43:50
epoch [8/50] batch [760/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.9223 (1.0897) lr 1.9921e-03 eta 7:43:32
epoch [8/50] batch [780/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.9632 (1.0900) lr 1.9921e-03 eta 7:43:14
epoch [9/50] batch [20/796] time 0.829 (0.852) data 0.000 (0.024) loss 1.6587 (0.9114) lr 1.9823e-03 eta 7:54:20
epoch [9/50] batch [40/796] time 0.838 (0.842) data 0.000 (0.012) loss 0.2597 (0.9315) lr 1.9823e-03 eta 7:48:43
epoch [9/50] batch [60/796] time 0.830 (0.837) data 0.000 (0.008) loss 0.7894 (1.0506) lr 1.9823e-03 eta 7:45:45
epoch [9/50] batch [80/796] time 0.817 (0.835) data 0.000 (0.006) loss 0.6998 (1.0247) lr 1.9823e-03 eta 7:44:21
epoch [9/50] batch [100/796] time 0.839 (0.835) data 0.000 (0.005) loss 1.8396 (1.0513) lr 1.9823e-03 eta 7:43:42
epoch [9/50] batch [120/796] time 0.841 (0.834) data 0.000 (0.004) loss 1.3195 (1.0411) lr 1.9823e-03 eta 7:43:08
epoch [9/50] batch [140/796] time 0.838 (0.834) data 0.000 (0.004) loss 0.7136 (1.0577) lr 1.9823e-03 eta 7:42:44
epoch [9/50] batch [160/796] time 0.819 (0.833) data 0.000 (0.003) loss 1.9143 (1.0591) lr 1.9823e-03 eta 7:42:05
epoch [9/50] batch [180/796] time 0.841 (0.833) data 0.000 (0.003) loss 1.0109 (1.0584) lr 1.9823e-03 eta 7:41:40
epoch [9/50] batch [200/796] time 0.839 (0.833) data 0.000 (0.003) loss 0.7782 (1.0450) lr 1.9823e-03 eta 7:41:12
epoch [9/50] batch [220/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.8232 (1.0560) lr 1.9823e-03 eta 7:40:41
epoch [9/50] batch [240/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.7324 (1.0616) lr 1.9823e-03 eta 7:40:11
epoch [9/50] batch [260/796] time 0.817 (0.832) data 0.000 (0.002) loss 2.2241 (1.0651) lr 1.9823e-03 eta 7:39:50
epoch [9/50] batch [280/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.0935 (1.0580) lr 1.9823e-03 eta 7:39:31
epoch [9/50] batch [300/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.6158 (1.0643) lr 1.9823e-03 eta 7:39:11
epoch [9/50] batch [320/796] time 0.841 (0.831) data 0.000 (0.002) loss 1.9331 (1.0863) lr 1.9823e-03 eta 7:38:46
epoch [9/50] batch [340/796] time 0.838 (0.831) data 0.000 (0.002) loss 1.8564 (1.0901) lr 1.9823e-03 eta 7:38:27
epoch [9/50] batch [360/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.2607 (1.0845) lr 1.9823e-03 eta 7:38:11
epoch [9/50] batch [380/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.2795 (1.0661) lr 1.9823e-03 eta 7:37:52
epoch [9/50] batch [400/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.5123 (1.0726) lr 1.9823e-03 eta 7:37:34
epoch [9/50] batch [420/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.1063 (1.0686) lr 1.9823e-03 eta 7:37:19
epoch [9/50] batch [440/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.8091 (1.0650) lr 1.9823e-03 eta 7:36:57
epoch [9/50] batch [460/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2489 (1.0685) lr 1.9823e-03 eta 7:36:40
epoch [9/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2512 (1.0690) lr 1.9823e-03 eta 7:36:25
epoch [9/50] batch [500/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.4596 (1.0622) lr 1.9823e-03 eta 7:36:09
epoch [9/50] batch [520/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.8782 (1.0528) lr 1.9823e-03 eta 7:35:51
epoch [9/50] batch [540/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.0486 (1.0627) lr 1.9823e-03 eta 7:35:34
epoch [9/50] batch [560/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.5531 (1.0674) lr 1.9823e-03 eta 7:35:18
epoch [9/50] batch [580/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.8385 (1.0688) lr 1.9823e-03 eta 7:34:58
epoch [9/50] batch [600/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.9759 (1.0621) lr 1.9823e-03 eta 7:34:41
epoch [9/50] batch [620/796] time 0.817 (0.831) data 0.000 (0.001) loss 2.3264 (1.0642) lr 1.9823e-03 eta 7:34:21
epoch [9/50] batch [640/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.1892 (1.0624) lr 1.9823e-03 eta 7:34:05
epoch [9/50] batch [660/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.4156 (1.0583) lr 1.9823e-03 eta 7:33:48
epoch [9/50] batch [680/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.4771 (1.0495) lr 1.9823e-03 eta 7:33:31
epoch [9/50] batch [700/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.4130 (1.0538) lr 1.9823e-03 eta 7:33:13
epoch [9/50] batch [720/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.6486 (1.0504) lr 1.9823e-03 eta 7:32:59
epoch [9/50] batch [740/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.4881 (1.0554) lr 1.9823e-03 eta 7:32:39
epoch [9/50] batch [760/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.3744 (1.0555) lr 1.9823e-03 eta 7:32:23
epoch [9/50] batch [780/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.9126 (1.0564) lr 1.9823e-03 eta 7:32:04
epoch [10/50] batch [20/796] time 0.831 (0.859) data 0.000 (0.024) loss 0.4937 (1.0739) lr 1.9686e-03 eta 7:46:51
epoch [10/50] batch [40/796] time 0.839 (0.845) data 0.000 (0.012) loss 2.8037 (1.1602) lr 1.9686e-03 eta 7:39:17
epoch [10/50] batch [60/796] time 0.819 (0.841) data 0.000 (0.008) loss 0.6719 (1.0096) lr 1.9686e-03 eta 7:36:32
epoch [10/50] batch [80/796] time 0.838 (0.838) data 0.000 (0.006) loss 0.3488 (1.0017) lr 1.9686e-03 eta 7:34:49
epoch [10/50] batch [100/796] time 0.818 (0.837) data 0.000 (0.005) loss 0.2812 (1.0433) lr 1.9686e-03 eta 7:33:50
epoch [10/50] batch [120/796] time 0.837 (0.836) data 0.000 (0.004) loss 1.2544 (1.0359) lr 1.9686e-03 eta 7:32:52
epoch [10/50] batch [140/796] time 0.818 (0.835) data 0.000 (0.004) loss 0.1478 (1.0011) lr 1.9686e-03 eta 7:32:08
epoch [10/50] batch [160/796] time 0.829 (0.834) data 0.000 (0.003) loss 0.4562 (0.9711) lr 1.9686e-03 eta 7:31:34
epoch [10/50] batch [180/796] time 0.847 (0.833) data 0.000 (0.003) loss 0.2043 (0.9708) lr 1.9686e-03 eta 7:30:51
epoch [10/50] batch [200/796] time 0.837 (0.833) data 0.000 (0.003) loss 0.6584 (0.9656) lr 1.9686e-03 eta 7:30:21
epoch [10/50] batch [220/796] time 0.840 (0.833) data 0.000 (0.002) loss 1.3779 (0.9790) lr 1.9686e-03 eta 7:29:48
epoch [10/50] batch [240/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.2799 (0.9807) lr 1.9686e-03 eta 7:29:23
epoch [10/50] batch [260/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.1170 (0.9844) lr 1.9686e-03 eta 7:29:04
epoch [10/50] batch [280/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.3366 (0.9792) lr 1.9686e-03 eta 7:28:35
epoch [10/50] batch [300/796] time 0.828 (0.832) data 0.000 (0.002) loss 1.2961 (0.9974) lr 1.9686e-03 eta 7:28:18
epoch [10/50] batch [320/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.4240 (1.0080) lr 1.9686e-03 eta 7:27:56
epoch [10/50] batch [340/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.0219 (1.0221) lr 1.9686e-03 eta 7:27:40
epoch [10/50] batch [360/796] time 0.817 (0.831) data 0.000 (0.002) loss 0.8815 (1.0214) lr 1.9686e-03 eta 7:27:14
epoch [10/50] batch [380/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.7970 (1.0140) lr 1.9686e-03 eta 7:26:56
epoch [10/50] batch [400/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.4074 (1.0188) lr 1.9686e-03 eta 7:26:35
epoch [10/50] batch [420/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.6622 (1.0041) lr 1.9686e-03 eta 7:26:14
epoch [10/50] batch [440/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.0721 (0.9994) lr 1.9686e-03 eta 7:25:58
epoch [10/50] batch [460/796] time 0.819 (0.831) data 0.000 (0.001) loss 2.4172 (1.0009) lr 1.9686e-03 eta 7:25:35
epoch [10/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.1391 (0.9924) lr 1.9686e-03 eta 7:25:19
epoch [10/50] batch [500/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.9017 (0.9960) lr 1.9686e-03 eta 7:25:01
epoch [10/50] batch [520/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.4375 (1.0025) lr 1.9686e-03 eta 7:24:45
epoch [10/50] batch [540/796] time 0.838 (0.831) data 0.000 (0.001) loss 2.2692 (1.0060) lr 1.9686e-03 eta 7:24:27
epoch [10/50] batch [560/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.6984 (1.0150) lr 1.9686e-03 eta 7:24:12
epoch [10/50] batch [580/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.5481 (1.0194) lr 1.9686e-03 eta 7:23:52
epoch [10/50] batch [600/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.3955 (1.0224) lr 1.9686e-03 eta 7:23:30
epoch [10/50] batch [620/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.0752 (1.0188) lr 1.9686e-03 eta 7:23:12
epoch [10/50] batch [640/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.0900 (1.0168) lr 1.9686e-03 eta 7:22:54
epoch [10/50] batch [660/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.9997 (1.0156) lr 1.9686e-03 eta 7:22:36
epoch [10/50] batch [680/796] time 0.810 (0.830) data 0.000 (0.001) loss 0.6584 (1.0103) lr 1.9686e-03 eta 7:22:18
epoch [10/50] batch [700/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.3827 (1.0216) lr 1.9686e-03 eta 7:22:03
epoch [10/50] batch [720/796] time 0.818 (0.830) data 0.000 (0.001) loss 0.2832 (1.0145) lr 1.9686e-03 eta 7:21:43
epoch [10/50] batch [740/796] time 0.818 (0.830) data 0.000 (0.001) loss 0.5607 (1.0168) lr 1.9686e-03 eta 7:21:23
epoch [10/50] batch [760/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.1037 (1.0156) lr 1.9686e-03 eta 7:21:06
epoch [10/50] batch [780/796] time 0.809 (0.830) data 0.000 (0.001) loss 2.2860 (1.0233) lr 1.9686e-03 eta 7:20:50
epoch [11/50] batch [20/796] time 0.838 (0.858) data 0.000 (0.028) loss 1.2416 (1.0830) lr 1.9511e-03 eta 7:35:01
epoch [11/50] batch [40/796] time 0.839 (0.845) data 0.000 (0.014) loss 0.6583 (1.1275) lr 1.9511e-03 eta 7:27:45
epoch [11/50] batch [60/796] time 0.837 (0.839) data 0.000 (0.009) loss 2.0308 (1.1382) lr 1.9511e-03 eta 7:24:31
epoch [11/50] batch [80/796] time 0.839 (0.837) data 0.000 (0.007) loss 0.3792 (1.1168) lr 1.9511e-03 eta 7:23:14
epoch [11/50] batch [100/796] time 0.832 (0.836) data 0.000 (0.006) loss 1.4251 (1.0733) lr 1.9511e-03 eta 7:22:09
epoch [11/50] batch [120/796] time 0.809 (0.835) data 0.000 (0.005) loss 0.1539 (1.0760) lr 1.9511e-03 eta 7:21:14
epoch [11/50] batch [140/796] time 0.817 (0.834) data 0.000 (0.004) loss 0.8647 (1.0585) lr 1.9511e-03 eta 7:20:27
epoch [11/50] batch [160/796] time 0.838 (0.833) data 0.000 (0.004) loss 0.8485 (1.0982) lr 1.9511e-03 eta 7:19:53
epoch [11/50] batch [180/796] time 0.816 (0.832) data 0.000 (0.003) loss 1.3602 (1.0899) lr 1.9511e-03 eta 7:19:15
epoch [11/50] batch [200/796] time 0.829 (0.832) data 0.000 (0.003) loss 0.4046 (1.0783) lr 1.9511e-03 eta 7:18:48
epoch [11/50] batch [220/796] time 0.817 (0.832) data 0.000 (0.003) loss 0.3453 (1.0565) lr 1.9511e-03 eta 7:18:23
epoch [11/50] batch [240/796] time 0.837 (0.832) data 0.000 (0.003) loss 0.7572 (1.0567) lr 1.9511e-03 eta 7:18:01
epoch [11/50] batch [260/796] time 0.829 (0.831) data 0.000 (0.002) loss 0.4051 (1.0608) lr 1.9511e-03 eta 7:17:28
epoch [11/50] batch [280/796] time 0.838 (0.831) data 0.000 (0.002) loss 1.5951 (1.0515) lr 1.9511e-03 eta 7:17:13
epoch [11/50] batch [300/796] time 0.828 (0.831) data 0.000 (0.002) loss 0.9558 (1.0422) lr 1.9511e-03 eta 7:16:50
epoch [11/50] batch [320/796] time 0.817 (0.831) data 0.000 (0.002) loss 0.3424 (1.0550) lr 1.9511e-03 eta 7:16:26
epoch [11/50] batch [340/796] time 0.828 (0.831) data 0.000 (0.002) loss 1.2487 (1.0478) lr 1.9511e-03 eta 7:16:06
epoch [11/50] batch [360/796] time 0.829 (0.830) data 0.000 (0.002) loss 0.6764 (1.0380) lr 1.9511e-03 eta 7:15:40
epoch [11/50] batch [380/796] time 0.830 (0.830) data 0.000 (0.002) loss 0.5581 (1.0357) lr 1.9511e-03 eta 7:15:19
epoch [11/50] batch [400/796] time 0.818 (0.830) data 0.000 (0.002) loss 2.6251 (1.0304) lr 1.9511e-03 eta 7:15:00
epoch [11/50] batch [420/796] time 0.818 (0.830) data 0.000 (0.002) loss 1.5041 (1.0318) lr 1.9511e-03 eta 7:14:41
epoch [11/50] batch [440/796] time 0.839 (0.830) data 0.000 (0.001) loss 2.5550 (1.0269) lr 1.9511e-03 eta 7:14:23
epoch [11/50] batch [460/796] time 0.839 (0.830) data 0.000 (0.001) loss 0.2603 (1.0156) lr 1.9511e-03 eta 7:14:05
epoch [11/50] batch [480/796] time 0.819 (0.830) data 0.000 (0.001) loss 0.4356 (1.0213) lr 1.9511e-03 eta 7:13:52
epoch [11/50] batch [500/796] time 0.818 (0.830) data 0.000 (0.001) loss 0.5491 (1.0254) lr 1.9511e-03 eta 7:13:38
epoch [11/50] batch [520/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.8199 (1.0280) lr 1.9511e-03 eta 7:13:20
epoch [11/50] batch [540/796] time 0.829 (0.830) data 0.000 (0.001) loss 0.7263 (1.0249) lr 1.9511e-03 eta 7:13:02
epoch [11/50] batch [560/796] time 0.839 (0.830) data 0.000 (0.001) loss 0.8501 (1.0160) lr 1.9511e-03 eta 7:12:47
epoch [11/50] batch [580/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.2158 (1.0148) lr 1.9511e-03 eta 7:12:29
epoch [11/50] batch [600/796] time 0.829 (0.830) data 0.000 (0.001) loss 0.3970 (1.0122) lr 1.9511e-03 eta 7:12:12
epoch [11/50] batch [620/796] time 0.829 (0.830) data 0.000 (0.001) loss 2.5611 (1.0148) lr 1.9511e-03 eta 7:11:55
epoch [11/50] batch [640/796] time 0.839 (0.830) data 0.000 (0.001) loss 1.3042 (1.0117) lr 1.9511e-03 eta 7:11:38
epoch [11/50] batch [660/796] time 0.837 (0.830) data 0.000 (0.001) loss 1.1107 (1.0151) lr 1.9511e-03 eta 7:11:22
epoch [11/50] batch [680/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.6138 (1.0119) lr 1.9511e-03 eta 7:11:05
epoch [11/50] batch [700/796] time 0.837 (0.830) data 0.000 (0.001) loss 2.0546 (1.0070) lr 1.9511e-03 eta 7:10:47
epoch [11/50] batch [720/796] time 0.839 (0.830) data 0.000 (0.001) loss 0.7889 (1.0078) lr 1.9511e-03 eta 7:10:31
epoch [11/50] batch [740/796] time 0.839 (0.830) data 0.000 (0.001) loss 1.1043 (1.0073) lr 1.9511e-03 eta 7:10:14
epoch [11/50] batch [760/796] time 0.810 (0.830) data 0.000 (0.001) loss 0.4556 (1.0084) lr 1.9511e-03 eta 7:09:57
epoch [11/50] batch [780/796] time 0.828 (0.830) data 0.000 (0.001) loss 1.4104 (1.0063) lr 1.9511e-03 eta 7:09:39
epoch [12/50] batch [20/796] time 0.838 (0.855) data 0.000 (0.023) loss 0.4098 (1.1441) lr 1.9298e-03 eta 7:22:15
epoch [12/50] batch [40/796] time 0.829 (0.842) data 0.000 (0.012) loss 0.2408 (0.9972) lr 1.9298e-03 eta 7:14:58
epoch [12/50] batch [60/796] time 0.830 (0.837) data 0.000 (0.008) loss 0.2383 (0.9434) lr 1.9298e-03 eta 7:12:08
epoch [12/50] batch [80/796] time 0.840 (0.835) data 0.000 (0.006) loss 1.4683 (0.9856) lr 1.9298e-03 eta 7:10:45
epoch [12/50] batch [100/796] time 0.818 (0.834) data 0.000 (0.005) loss 2.5694 (1.0580) lr 1.9298e-03 eta 7:09:52
epoch [12/50] batch [120/796] time 0.818 (0.832) data 0.000 (0.004) loss 0.9802 (1.0283) lr 1.9298e-03 eta 7:09:00
epoch [12/50] batch [140/796] time 0.829 (0.832) data 0.000 (0.004) loss 0.6043 (1.0060) lr 1.9298e-03 eta 7:08:32
epoch [12/50] batch [160/796] time 0.837 (0.832) data 0.000 (0.003) loss 1.8240 (1.0257) lr 1.9298e-03 eta 7:08:05
epoch [12/50] batch [180/796] time 0.828 (0.831) data 0.000 (0.003) loss 0.3062 (1.0092) lr 1.9298e-03 eta 7:07:40
epoch [12/50] batch [200/796] time 0.838 (0.831) data 0.000 (0.003) loss 3.6311 (1.0379) lr 1.9298e-03 eta 7:07:16
epoch [12/50] batch [220/796] time 0.829 (0.831) data 0.000 (0.002) loss 1.0649 (1.0360) lr 1.9298e-03 eta 7:07:03
epoch [12/50] batch [240/796] time 0.838 (0.831) data 0.000 (0.002) loss 0.2945 (1.0445) lr 1.9298e-03 eta 7:06:44
epoch [12/50] batch [260/796] time 0.838 (0.831) data 0.000 (0.002) loss 0.1619 (1.0288) lr 1.9298e-03 eta 7:06:25
epoch [12/50] batch [280/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.4216 (1.0145) lr 1.9298e-03 eta 7:06:05
epoch [12/50] batch [300/796] time 0.829 (0.831) data 0.000 (0.002) loss 1.2314 (1.0139) lr 1.9298e-03 eta 7:05:45
epoch [12/50] batch [320/796] time 0.817 (0.831) data 0.000 (0.002) loss 0.3873 (1.0019) lr 1.9298e-03 eta 7:05:25
epoch [12/50] batch [340/796] time 0.817 (0.831) data 0.000 (0.002) loss 0.5552 (0.9974) lr 1.9298e-03 eta 7:05:13
epoch [12/50] batch [360/796] time 0.820 (0.831) data 0.000 (0.001) loss 0.9793 (0.9933) lr 1.9298e-03 eta 7:04:57
epoch [12/50] batch [380/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.1865 (0.9912) lr 1.9298e-03 eta 7:04:39
epoch [12/50] batch [400/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.2393 (0.9955) lr 1.9298e-03 eta 7:04:26
epoch [12/50] batch [420/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.8908 (0.9886) lr 1.9298e-03 eta 7:04:06
epoch [12/50] batch [440/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.5053 (1.0022) lr 1.9298e-03 eta 7:03:47
epoch [12/50] batch [460/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.2650 (1.0024) lr 1.9298e-03 eta 7:03:25
epoch [12/50] batch [480/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.7928 (0.9979) lr 1.9298e-03 eta 7:03:07
epoch [12/50] batch [500/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.1498 (1.0067) lr 1.9298e-03 eta 7:02:48
epoch [12/50] batch [520/796] time 0.829 (0.830) data 0.000 (0.001) loss 1.6264 (1.0125) lr 1.9298e-03 eta 7:02:30
epoch [12/50] batch [540/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.4986 (1.0128) lr 1.9298e-03 eta 7:02:16
epoch [12/50] batch [560/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.5440 (1.0039) lr 1.9298e-03 eta 7:01:59
epoch [12/50] batch [580/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.7198 (1.0012) lr 1.9298e-03 eta 7:01:40
epoch [12/50] batch [600/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.4163 (1.0024) lr 1.9298e-03 eta 7:01:25
epoch [12/50] batch [620/796] time 0.809 (0.831) data 0.000 (0.001) loss 0.7872 (1.0081) lr 1.9298e-03 eta 7:01:08
epoch [12/50] batch [640/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.6512 (1.0110) lr 1.9298e-03 eta 7:00:51
epoch [12/50] batch [660/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.3610 (1.0121) lr 1.9298e-03 eta 7:00:34
epoch [12/50] batch [680/796] time 0.817 (0.830) data 0.000 (0.001) loss 0.6256 (1.0066) lr 1.9298e-03 eta 7:00:15
epoch [12/50] batch [700/796] time 0.829 (0.830) data 0.000 (0.001) loss 0.5493 (0.9979) lr 1.9298e-03 eta 6:59:59
epoch [12/50] batch [720/796] time 0.819 (0.830) data 0.000 (0.001) loss 0.2129 (0.9986) lr 1.9298e-03 eta 6:59:41
epoch [12/50] batch [740/796] time 0.833 (0.830) data 0.000 (0.001) loss 0.6984 (0.9986) lr 1.9298e-03 eta 6:59:26
epoch [12/50] batch [760/796] time 0.808 (0.830) data 0.000 (0.001) loss 0.2849 (0.9959) lr 1.9298e-03 eta 6:59:07
epoch [12/50] batch [780/796] time 0.839 (0.830) data 0.000 (0.001) loss 0.3514 (0.9997) lr 1.9298e-03 eta 6:58:50
epoch [13/50] batch [20/796] time 0.837 (0.859) data 0.000 (0.027) loss 0.1541 (0.8370) lr 1.9048e-03 eta 7:12:31
epoch [13/50] batch [40/796] time 0.839 (0.844) data 0.000 (0.014) loss 2.6206 (0.8799) lr 1.9048e-03 eta 7:04:47
epoch [13/50] batch [60/796] time 0.837 (0.838) data 0.000 (0.009) loss 1.7949 (0.9138) lr 1.9048e-03 eta 7:01:49
epoch [13/50] batch [80/796] time 0.837 (0.835) data 0.000 (0.007) loss 2.6308 (0.9724) lr 1.9048e-03 eta 6:59:48
epoch [13/50] batch [100/796] time 0.808 (0.834) data 0.000 (0.006) loss 1.1287 (0.9714) lr 1.9048e-03 eta 6:58:58
epoch [13/50] batch [120/796] time 0.829 (0.833) data 0.000 (0.005) loss 0.3444 (0.9824) lr 1.9048e-03 eta 6:58:12
epoch [13/50] batch [140/796] time 0.818 (0.833) data 0.000 (0.004) loss 0.2592 (0.9949) lr 1.9048e-03 eta 6:57:50
epoch [13/50] batch [160/796] time 0.838 (0.833) data 0.000 (0.004) loss 0.3755 (1.0251) lr 1.9048e-03 eta 6:57:40
epoch [13/50] batch [180/796] time 0.821 (0.833) data 0.000 (0.003) loss 0.6252 (1.0196) lr 1.9048e-03 eta 6:57:16
epoch [13/50] batch [200/796] time 0.839 (0.832) data 0.000 (0.003) loss 0.2024 (1.0165) lr 1.9048e-03 eta 6:56:52
epoch [13/50] batch [220/796] time 0.817 (0.832) data 0.000 (0.003) loss 0.0960 (1.0035) lr 1.9048e-03 eta 6:56:23
epoch [13/50] batch [240/796] time 0.817 (0.832) data 0.000 (0.002) loss 1.3253 (1.0027) lr 1.9048e-03 eta 6:56:02
epoch [13/50] batch [260/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.1123 (1.0086) lr 1.9048e-03 eta 6:55:40
epoch [13/50] batch [280/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.7968 (0.9903) lr 1.9048e-03 eta 6:55:23
epoch [13/50] batch [300/796] time 0.838 (0.831) data 0.000 (0.002) loss 1.6441 (0.9935) lr 1.9048e-03 eta 6:54:59
epoch [13/50] batch [320/796] time 0.829 (0.831) data 0.000 (0.002) loss 0.4227 (0.9781) lr 1.9048e-03 eta 6:54:42
epoch [13/50] batch [340/796] time 0.840 (0.831) data 0.000 (0.002) loss 0.3454 (0.9699) lr 1.9048e-03 eta 6:54:23
epoch [13/50] batch [360/796] time 0.817 (0.831) data 0.000 (0.002) loss 0.6554 (0.9655) lr 1.9048e-03 eta 6:54:03
epoch [13/50] batch [380/796] time 0.817 (0.831) data 0.000 (0.002) loss 0.6764 (0.9755) lr 1.9048e-03 eta 6:53:48
epoch [13/50] batch [400/796] time 0.837 (0.831) data 0.000 (0.002) loss 0.3904 (0.9745) lr 1.9048e-03 eta 6:53:30
epoch [13/50] batch [420/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.3554 (0.9874) lr 1.9048e-03 eta 6:53:07
epoch [13/50] batch [440/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.8137 (0.9923) lr 1.9048e-03 eta 6:52:52
epoch [13/50] batch [460/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.0351 (0.9789) lr 1.9048e-03 eta 6:52:31
epoch [13/50] batch [480/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.8836 (0.9882) lr 1.9048e-03 eta 6:52:14
epoch [13/50] batch [500/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.5719 (0.9908) lr 1.9048e-03 eta 6:51:56
epoch [13/50] batch [520/796] time 0.839 (0.831) data 0.000 (0.001) loss 2.1261 (0.9964) lr 1.9048e-03 eta 6:51:38
epoch [13/50] batch [540/796] time 0.831 (0.831) data 0.000 (0.001) loss 2.0914 (0.9943) lr 1.9048e-03 eta 6:51:21
epoch [13/50] batch [560/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.2883 (0.9975) lr 1.9048e-03 eta 6:51:03
epoch [13/50] batch [580/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.7299 (0.9947) lr 1.9048e-03 eta 6:50:47
epoch [13/50] batch [600/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.2761 (0.9928) lr 1.9048e-03 eta 6:50:28
epoch [13/50] batch [620/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.1379 (0.9973) lr 1.9048e-03 eta 6:50:11
epoch [13/50] batch [640/796] time 0.817 (0.831) data 0.000 (0.001) loss 2.2260 (1.0014) lr 1.9048e-03 eta 6:49:53
epoch [13/50] batch [660/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.5319 (1.0028) lr 1.9048e-03 eta 6:49:37
epoch [13/50] batch [680/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.6183 (0.9977) lr 1.9048e-03 eta 6:49:20
epoch [13/50] batch [700/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.1679 (0.9952) lr 1.9048e-03 eta 6:49:03
epoch [13/50] batch [720/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.0235 (1.0016) lr 1.9048e-03 eta 6:48:46
epoch [13/50] batch [740/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.6146 (0.9993) lr 1.9048e-03 eta 6:48:30
epoch [13/50] batch [760/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.7618 (0.9950) lr 1.9048e-03 eta 6:48:13
epoch [13/50] batch [780/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.6449 (0.9871) lr 1.9048e-03 eta 6:47:54
epoch [14/50] batch [20/796] time 0.838 (0.854) data 0.000 (0.025) loss 2.0674 (1.0991) lr 1.8763e-03 eta 6:58:48
epoch [14/50] batch [40/796] time 0.830 (0.843) data 0.000 (0.013) loss 1.7398 (1.1815) lr 1.8763e-03 eta 6:53:05
epoch [14/50] batch [60/796] time 0.840 (0.839) data 0.000 (0.009) loss 1.8258 (1.2057) lr 1.8763e-03 eta 6:50:50
epoch [14/50] batch [80/796] time 0.817 (0.836) data 0.000 (0.006) loss 0.2465 (1.0763) lr 1.8763e-03 eta 6:49:24
epoch [14/50] batch [100/796] time 0.830 (0.835) data 0.000 (0.005) loss 1.3310 (1.0501) lr 1.8763e-03 eta 6:48:31
epoch [14/50] batch [120/796] time 0.839 (0.834) data 0.000 (0.004) loss 0.4331 (1.0529) lr 1.8763e-03 eta 6:47:46
epoch [14/50] batch [140/796] time 0.828 (0.833) data 0.000 (0.004) loss 0.6243 (1.0347) lr 1.8763e-03 eta 6:47:06
epoch [14/50] batch [160/796] time 0.838 (0.833) data 0.000 (0.003) loss 0.8730 (1.0336) lr 1.8763e-03 eta 6:46:33
epoch [14/50] batch [180/796] time 0.818 (0.833) data 0.000 (0.003) loss 0.5400 (1.0267) lr 1.8763e-03 eta 6:46:12
epoch [14/50] batch [200/796] time 0.838 (0.833) data 0.000 (0.003) loss 3.8086 (1.0428) lr 1.8763e-03 eta 6:45:52
epoch [14/50] batch [220/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.7048 (1.0499) lr 1.8763e-03 eta 6:45:25
epoch [14/50] batch [240/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.4704 (1.0428) lr 1.8763e-03 eta 6:45:00
epoch [14/50] batch [260/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.4707 (1.0561) lr 1.8763e-03 eta 6:44:40
epoch [14/50] batch [280/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.3970 (1.0384) lr 1.8763e-03 eta 6:44:21
epoch [14/50] batch [300/796] time 0.837 (0.832) data 0.000 (0.002) loss 0.7486 (1.0274) lr 1.8763e-03 eta 6:44:05
epoch [14/50] batch [320/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.3228 (1.0255) lr 1.8763e-03 eta 6:43:43
epoch [14/50] batch [340/796] time 0.841 (0.832) data 0.000 (0.002) loss 1.3391 (1.0191) lr 1.8763e-03 eta 6:43:27
epoch [14/50] batch [360/796] time 0.837 (0.831) data 0.000 (0.002) loss 0.3692 (1.0164) lr 1.8763e-03 eta 6:43:09
epoch [14/50] batch [380/796] time 0.818 (0.831) data 0.000 (0.002) loss 0.3021 (1.0085) lr 1.8763e-03 eta 6:42:52
epoch [14/50] batch [400/796] time 0.836 (0.831) data 0.000 (0.001) loss 0.0580 (1.0016) lr 1.8763e-03 eta 6:42:32
epoch [14/50] batch [420/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.0346 (1.0006) lr 1.8763e-03 eta 6:42:14
epoch [14/50] batch [440/796] time 0.838 (0.831) data 0.000 (0.001) loss 2.2351 (1.0015) lr 1.8763e-03 eta 6:41:54
epoch [14/50] batch [460/796] time 0.828 (0.831) data 0.000 (0.001) loss 1.1239 (0.9840) lr 1.8763e-03 eta 6:41:35
epoch [14/50] batch [480/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.8314 (0.9847) lr 1.8763e-03 eta 6:41:16
epoch [14/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.1976 (0.9792) lr 1.8763e-03 eta 6:41:00
epoch [14/50] batch [520/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.9207 (0.9764) lr 1.8763e-03 eta 6:40:44
epoch [14/50] batch [540/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.5868 (0.9679) lr 1.8763e-03 eta 6:40:24
epoch [14/50] batch [560/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.3497 (0.9769) lr 1.8763e-03 eta 6:40:07
epoch [14/50] batch [580/796] time 0.818 (0.831) data 0.000 (0.001) loss 2.8707 (0.9815) lr 1.8763e-03 eta 6:39:47
epoch [14/50] batch [600/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.2923 (0.9874) lr 1.8763e-03 eta 6:39:30
epoch [14/50] batch [620/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.3429 (0.9902) lr 1.8763e-03 eta 6:39:16
epoch [14/50] batch [640/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.6540 (0.9970) lr 1.8763e-03 eta 6:39:00
epoch [14/50] batch [660/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.9654 (0.9945) lr 1.8763e-03 eta 6:38:42
epoch [14/50] batch [680/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.7761 (0.9919) lr 1.8763e-03 eta 6:38:25
epoch [14/50] batch [700/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.1670 (0.9954) lr 1.8763e-03 eta 6:38:07
epoch [14/50] batch [720/796] time 0.818 (0.831) data 0.000 (0.001) loss 2.8161 (0.9996) lr 1.8763e-03 eta 6:37:48
epoch [14/50] batch [740/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.9063 (1.0007) lr 1.8763e-03 eta 6:37:29
epoch [14/50] batch [760/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.0120 (1.0073) lr 1.8763e-03 eta 6:37:11
epoch [14/50] batch [780/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.4168 (0.9999) lr 1.8763e-03 eta 6:36:55
epoch [15/50] batch [20/796] time 0.817 (0.855) data 0.000 (0.024) loss 3.2706 (0.9449) lr 1.8443e-03 eta 6:48:10
epoch [15/50] batch [40/796] time 0.839 (0.844) data 0.000 (0.012) loss 0.4606 (1.0685) lr 1.8443e-03 eta 6:42:23
epoch [15/50] batch [60/796] time 0.829 (0.839) data 0.000 (0.008) loss 0.4267 (0.9393) lr 1.8443e-03 eta 6:39:49
epoch [15/50] batch [80/796] time 0.830 (0.836) data 0.000 (0.006) loss 0.7032 (0.9761) lr 1.8443e-03 eta 6:38:05
epoch [15/50] batch [100/796] time 0.817 (0.835) data 0.000 (0.005) loss 1.7895 (0.9907) lr 1.8443e-03 eta 6:37:26
epoch [15/50] batch [120/796] time 0.837 (0.834) data 0.000 (0.004) loss 2.9171 (0.9628) lr 1.8443e-03 eta 6:36:39
epoch [15/50] batch [140/796] time 0.838 (0.833) data 0.000 (0.004) loss 1.9847 (0.9826) lr 1.8443e-03 eta 6:36:04
epoch [15/50] batch [160/796] time 0.808 (0.833) data 0.000 (0.003) loss 2.2930 (1.0004) lr 1.8443e-03 eta 6:35:33
epoch [15/50] batch [180/796] time 0.837 (0.832) data 0.000 (0.003) loss 1.0064 (0.9716) lr 1.8443e-03 eta 6:35:02
epoch [15/50] batch [200/796] time 0.838 (0.832) data 0.000 (0.003) loss 0.2742 (0.9614) lr 1.8443e-03 eta 6:34:38
epoch [15/50] batch [220/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.1081 (0.9500) lr 1.8443e-03 eta 6:34:07
epoch [15/50] batch [240/796] time 0.837 (0.832) data 0.000 (0.002) loss 0.5937 (0.9479) lr 1.8443e-03 eta 6:33:49
epoch [15/50] batch [260/796] time 0.818 (0.831) data 0.000 (0.002) loss 0.4334 (0.9607) lr 1.8443e-03 eta 6:33:28
epoch [15/50] batch [280/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.7915 (0.9554) lr 1.8443e-03 eta 6:33:06
epoch [15/50] batch [300/796] time 0.839 (0.831) data 0.000 (0.002) loss 1.3529 (0.9576) lr 1.8443e-03 eta 6:32:49
epoch [15/50] batch [320/796] time 0.830 (0.831) data 0.000 (0.002) loss 0.4290 (0.9708) lr 1.8443e-03 eta 6:32:29
epoch [15/50] batch [340/796] time 0.838 (0.831) data 0.000 (0.002) loss 0.7744 (0.9615) lr 1.8443e-03 eta 6:32:11
epoch [15/50] batch [360/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.6318 (0.9660) lr 1.8443e-03 eta 6:31:51
epoch [15/50] batch [380/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.3276 (0.9576) lr 1.8443e-03 eta 6:31:33
epoch [15/50] batch [400/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.8416 (0.9554) lr 1.8443e-03 eta 6:31:16
epoch [15/50] batch [420/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.7807 (0.9656) lr 1.8443e-03 eta 6:30:58
epoch [15/50] batch [440/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.1311 (0.9618) lr 1.8443e-03 eta 6:30:38
epoch [15/50] batch [460/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.2836 (0.9660) lr 1.8443e-03 eta 6:30:22
epoch [15/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.3383 (0.9640) lr 1.8443e-03 eta 6:30:07
epoch [15/50] batch [500/796] time 0.843 (0.831) data 0.000 (0.001) loss 2.9546 (0.9735) lr 1.8443e-03 eta 6:29:52
epoch [15/50] batch [520/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2354 (0.9752) lr 1.8443e-03 eta 6:29:32
epoch [15/50] batch [540/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.2560 (0.9659) lr 1.8443e-03 eta 6:29:14
epoch [15/50] batch [560/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.5225 (0.9636) lr 1.8443e-03 eta 6:28:57
epoch [15/50] batch [580/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.8904 (0.9716) lr 1.8443e-03 eta 6:28:42
epoch [15/50] batch [600/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.3977 (0.9571) lr 1.8443e-03 eta 6:28:25
epoch [15/50] batch [620/796] time 0.821 (0.831) data 0.000 (0.001) loss 0.6052 (0.9502) lr 1.8443e-03 eta 6:28:09
epoch [15/50] batch [640/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.7304 (0.9540) lr 1.8443e-03 eta 6:27:53
epoch [15/50] batch [660/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.2151 (0.9524) lr 1.8443e-03 eta 6:27:37
epoch [15/50] batch [680/796] time 0.809 (0.831) data 0.000 (0.001) loss 0.7796 (0.9552) lr 1.8443e-03 eta 6:27:18
epoch [15/50] batch [700/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.4323 (0.9535) lr 1.8443e-03 eta 6:27:00
epoch [15/50] batch [720/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.0173 (0.9511) lr 1.8443e-03 eta 6:26:42
epoch [15/50] batch [740/796] time 0.829 (0.831) data 0.000 (0.001) loss 2.1018 (0.9471) lr 1.8443e-03 eta 6:26:25
epoch [15/50] batch [760/796] time 0.810 (0.831) data 0.000 (0.001) loss 0.2775 (0.9506) lr 1.8443e-03 eta 6:26:09
epoch [15/50] batch [780/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.0101 (0.9546) lr 1.8443e-03 eta 6:25:51
epoch [16/50] batch [20/796] time 0.830 (0.859) data 0.000 (0.026) loss 1.0248 (0.8801) lr 1.8090e-03 eta 6:38:29
epoch [16/50] batch [40/796] time 0.818 (0.845) data 0.000 (0.013) loss 0.4369 (0.8831) lr 1.8090e-03 eta 6:31:42
epoch [16/50] batch [60/796] time 0.841 (0.839) data 0.000 (0.009) loss 0.9572 (0.8502) lr 1.8090e-03 eta 6:28:52
epoch [16/50] batch [80/796] time 0.836 (0.837) data 0.000 (0.007) loss 0.8446 (0.8275) lr 1.8090e-03 eta 6:27:23
epoch [16/50] batch [100/796] time 0.820 (0.836) data 0.000 (0.005) loss 1.2356 (0.8695) lr 1.8090e-03 eta 6:26:42
epoch [16/50] batch [120/796] time 0.819 (0.835) data 0.000 (0.004) loss 1.1991 (0.8893) lr 1.8090e-03 eta 6:26:12
epoch [16/50] batch [140/796] time 0.840 (0.835) data 0.000 (0.004) loss 0.3607 (0.9148) lr 1.8090e-03 eta 6:25:37
epoch [16/50] batch [160/796] time 0.818 (0.834) data 0.000 (0.003) loss 0.0600 (0.9598) lr 1.8090e-03 eta 6:25:00
epoch [16/50] batch [180/796] time 0.817 (0.833) data 0.000 (0.003) loss 0.3522 (0.9413) lr 1.8090e-03 eta 6:24:30
epoch [16/50] batch [200/796] time 0.839 (0.833) data 0.000 (0.003) loss 2.3792 (0.9421) lr 1.8090e-03 eta 6:24:07
epoch [16/50] batch [220/796] time 0.830 (0.833) data 0.000 (0.003) loss 0.4493 (0.9600) lr 1.8090e-03 eta 6:23:37
epoch [16/50] batch [240/796] time 0.831 (0.833) data 0.000 (0.002) loss 0.2054 (0.9601) lr 1.8090e-03 eta 6:23:17
epoch [16/50] batch [260/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.5133 (0.9599) lr 1.8090e-03 eta 6:22:54
epoch [16/50] batch [280/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.5572 (0.9737) lr 1.8090e-03 eta 6:22:32
epoch [16/50] batch [300/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.4986 (0.9734) lr 1.8090e-03 eta 6:22:16
epoch [16/50] batch [320/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.3965 (0.9603) lr 1.8090e-03 eta 6:22:00
epoch [16/50] batch [340/796] time 0.818 (0.832) data 0.000 (0.002) loss 1.3983 (0.9730) lr 1.8090e-03 eta 6:21:42
epoch [16/50] batch [360/796] time 0.808 (0.832) data 0.000 (0.002) loss 1.8176 (0.9722) lr 1.8090e-03 eta 6:21:23
epoch [16/50] batch [380/796] time 0.840 (0.832) data 0.000 (0.002) loss 0.2712 (0.9584) lr 1.8090e-03 eta 6:21:05
epoch [16/50] batch [400/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.3611 (0.9483) lr 1.8090e-03 eta 6:20:47
epoch [16/50] batch [420/796] time 0.832 (0.832) data 0.000 (0.001) loss 0.8712 (0.9455) lr 1.8090e-03 eta 6:20:31
epoch [16/50] batch [440/796] time 0.809 (0.832) data 0.000 (0.001) loss 0.1305 (0.9470) lr 1.8090e-03 eta 6:20:12
epoch [16/50] batch [460/796] time 0.819 (0.832) data 0.000 (0.001) loss 1.0953 (0.9438) lr 1.8090e-03 eta 6:19:55
epoch [16/50] batch [480/796] time 0.829 (0.832) data 0.000 (0.001) loss 0.0376 (0.9453) lr 1.8090e-03 eta 6:19:36
epoch [16/50] batch [500/796] time 0.837 (0.832) data 0.000 (0.001) loss 0.1132 (0.9403) lr 1.8090e-03 eta 6:19:17
epoch [16/50] batch [520/796] time 0.830 (0.832) data 0.000 (0.001) loss 1.5813 (0.9374) lr 1.8090e-03 eta 6:18:58
epoch [16/50] batch [540/796] time 0.819 (0.832) data 0.000 (0.001) loss 0.6329 (0.9312) lr 1.8090e-03 eta 6:18:40
epoch [16/50] batch [560/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.4752 (0.9280) lr 1.8090e-03 eta 6:18:22
epoch [16/50] batch [580/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.7551 (0.9227) lr 1.8090e-03 eta 6:18:05
epoch [16/50] batch [600/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.5739 (0.9229) lr 1.8090e-03 eta 6:17:45
epoch [16/50] batch [620/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.4380 (0.9257) lr 1.8090e-03 eta 6:17:28
epoch [16/50] batch [640/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.6457 (0.9300) lr 1.8090e-03 eta 6:17:10
epoch [16/50] batch [660/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.5212 (0.9280) lr 1.8090e-03 eta 6:16:52
epoch [16/50] batch [680/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.3099 (0.9275) lr 1.8090e-03 eta 6:16:33
epoch [16/50] batch [700/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.5443 (0.9310) lr 1.8090e-03 eta 6:16:15
epoch [16/50] batch [720/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.4717 (0.9341) lr 1.8090e-03 eta 6:15:57
epoch [16/50] batch [740/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.8394 (0.9317) lr 1.8090e-03 eta 6:15:39
epoch [16/50] batch [760/796] time 0.822 (0.831) data 0.000 (0.001) loss 1.2217 (0.9324) lr 1.8090e-03 eta 6:15:22
epoch [16/50] batch [780/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.9131 (0.9343) lr 1.8090e-03 eta 6:15:04
epoch [17/50] batch [20/796] time 0.837 (0.858) data 0.000 (0.028) loss 1.4269 (1.1323) lr 1.7705e-03 eta 6:26:35
epoch [17/50] batch [40/796] time 0.830 (0.845) data 0.000 (0.014) loss 0.4133 (0.9288) lr 1.7705e-03 eta 6:20:24
epoch [17/50] batch [60/796] time 0.838 (0.840) data 0.000 (0.010) loss 0.7944 (0.9820) lr 1.7705e-03 eta 6:18:15
epoch [17/50] batch [80/796] time 0.829 (0.837) data 0.000 (0.007) loss 2.4485 (0.9741) lr 1.7705e-03 eta 6:16:26
epoch [17/50] batch [100/796] time 0.839 (0.836) data 0.000 (0.006) loss 0.3398 (1.0020) lr 1.7705e-03 eta 6:15:30
epoch [17/50] batch [120/796] time 0.820 (0.835) data 0.000 (0.005) loss 2.4155 (1.0371) lr 1.7705e-03 eta 6:14:47
epoch [17/50] batch [140/796] time 0.840 (0.835) data 0.000 (0.004) loss 1.2125 (1.0021) lr 1.7705e-03 eta 6:14:31
epoch [17/50] batch [160/796] time 0.837 (0.834) data 0.000 (0.004) loss 0.6051 (1.0104) lr 1.7705e-03 eta 6:13:56
epoch [17/50] batch [180/796] time 0.829 (0.834) data 0.000 (0.003) loss 0.4452 (0.9749) lr 1.7705e-03 eta 6:13:32
epoch [17/50] batch [200/796] time 0.838 (0.833) data 0.000 (0.003) loss 1.5266 (1.0240) lr 1.7705e-03 eta 6:13:05
epoch [17/50] batch [220/796] time 0.817 (0.833) data 0.000 (0.003) loss 0.4768 (0.9988) lr 1.7705e-03 eta 6:12:39
epoch [17/50] batch [240/796] time 0.830 (0.833) data 0.000 (0.003) loss 0.4714 (0.9872) lr 1.7705e-03 eta 6:12:16
epoch [17/50] batch [260/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.8645 (0.9804) lr 1.7705e-03 eta 6:11:56
epoch [17/50] batch [280/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.9109 (0.9756) lr 1.7705e-03 eta 6:11:33
epoch [17/50] batch [300/796] time 0.818 (0.832) data 0.000 (0.002) loss 1.2279 (0.9833) lr 1.7705e-03 eta 6:11:14
epoch [17/50] batch [320/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.4651 (0.9781) lr 1.7705e-03 eta 6:10:53
epoch [17/50] batch [340/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.8763 (0.9722) lr 1.7705e-03 eta 6:10:29
epoch [17/50] batch [360/796] time 0.818 (0.832) data 0.000 (0.002) loss 1.8627 (0.9677) lr 1.7705e-03 eta 6:10:09
epoch [17/50] batch [380/796] time 0.818 (0.832) data 0.000 (0.002) loss 1.6050 (0.9608) lr 1.7705e-03 eta 6:09:55
epoch [17/50] batch [400/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.0421 (0.9512) lr 1.7705e-03 eta 6:09:35
epoch [17/50] batch [420/796] time 0.829 (0.831) data 0.000 (0.002) loss 0.2100 (0.9511) lr 1.7705e-03 eta 6:09:13
epoch [17/50] batch [440/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.1737 (0.9448) lr 1.7705e-03 eta 6:08:54
epoch [17/50] batch [460/796] time 0.819 (0.831) data 0.000 (0.001) loss 2.6573 (0.9478) lr 1.7705e-03 eta 6:08:34
epoch [17/50] batch [480/796] time 0.819 (0.831) data 0.000 (0.001) loss 2.4456 (0.9622) lr 1.7705e-03 eta 6:08:18
epoch [17/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.1949 (0.9704) lr 1.7705e-03 eta 6:08:02
epoch [17/50] batch [520/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.1072 (0.9653) lr 1.7705e-03 eta 6:07:44
epoch [17/50] batch [540/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.0633 (0.9785) lr 1.7705e-03 eta 6:07:26
epoch [17/50] batch [560/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.7476 (0.9750) lr 1.7705e-03 eta 6:07:06
epoch [17/50] batch [580/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.5164 (0.9710) lr 1.7705e-03 eta 6:06:48
epoch [17/50] batch [600/796] time 0.818 (0.831) data 0.001 (0.001) loss 1.8876 (0.9729) lr 1.7705e-03 eta 6:06:31
epoch [17/50] batch [620/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.1902 (0.9710) lr 1.7705e-03 eta 6:06:11
epoch [17/50] batch [640/796] time 0.845 (0.831) data 0.000 (0.001) loss 0.2276 (0.9735) lr 1.7705e-03 eta 6:05:54
epoch [17/50] batch [660/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.4058 (0.9712) lr 1.7705e-03 eta 6:05:35
epoch [17/50] batch [680/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.1779 (0.9654) lr 1.7705e-03 eta 6:05:18
epoch [17/50] batch [700/796] time 0.817 (0.831) data 0.000 (0.001) loss 2.2118 (0.9630) lr 1.7705e-03 eta 6:05:01
epoch [17/50] batch [720/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.2743 (0.9628) lr 1.7705e-03 eta 6:04:43
epoch [17/50] batch [740/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.7573 (0.9593) lr 1.7705e-03 eta 6:04:28
epoch [17/50] batch [760/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.3926 (0.9528) lr 1.7705e-03 eta 6:04:11
epoch [17/50] batch [780/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2111 (0.9489) lr 1.7705e-03 eta 6:03:54
epoch [18/50] batch [20/796] time 0.839 (0.862) data 0.000 (0.031) loss 0.9811 (0.8330) lr 1.7290e-03 eta 6:16:52
epoch [18/50] batch [40/796] time 0.818 (0.845) data 0.000 (0.015) loss 3.3805 (0.7853) lr 1.7290e-03 eta 6:09:10
epoch [18/50] batch [60/796] time 0.818 (0.840) data 0.000 (0.010) loss 0.6431 (0.7943) lr 1.7290e-03 eta 6:07:03
epoch [18/50] batch [80/796] time 0.818 (0.838) data 0.000 (0.008) loss 0.3573 (0.8305) lr 1.7290e-03 eta 6:05:36
epoch [18/50] batch [100/796] time 0.838 (0.836) data 0.000 (0.006) loss 1.7117 (0.8520) lr 1.7290e-03 eta 6:04:26
epoch [18/50] batch [120/796] time 0.809 (0.835) data 0.000 (0.005) loss 2.3009 (0.8631) lr 1.7290e-03 eta 6:03:45
epoch [18/50] batch [140/796] time 0.818 (0.834) data 0.000 (0.005) loss 1.7038 (0.8647) lr 1.7290e-03 eta 6:03:17
epoch [18/50] batch [160/796] time 0.809 (0.834) data 0.000 (0.004) loss 0.1471 (0.8641) lr 1.7290e-03 eta 6:02:53
epoch [18/50] batch [180/796] time 0.838 (0.834) data 0.000 (0.004) loss 1.7633 (0.8751) lr 1.7290e-03 eta 6:02:28
epoch [18/50] batch [200/796] time 0.839 (0.833) data 0.000 (0.003) loss 0.3003 (0.8722) lr 1.7290e-03 eta 6:01:56
epoch [18/50] batch [220/796] time 0.829 (0.833) data 0.000 (0.003) loss 0.3133 (0.8907) lr 1.7290e-03 eta 6:01:27
epoch [18/50] batch [240/796] time 0.838 (0.832) data 0.000 (0.003) loss 0.1879 (0.8876) lr 1.7290e-03 eta 6:01:05
epoch [18/50] batch [260/796] time 0.840 (0.832) data 0.000 (0.003) loss 2.1175 (0.9086) lr 1.7290e-03 eta 6:00:40
epoch [18/50] batch [280/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.3221 (0.9311) lr 1.7290e-03 eta 6:00:19
epoch [18/50] batch [300/796] time 0.843 (0.832) data 0.000 (0.002) loss 0.0924 (0.9254) lr 1.7290e-03 eta 6:00:03
epoch [18/50] batch [320/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.3582 (0.9261) lr 1.7290e-03 eta 5:59:44
epoch [18/50] batch [340/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.6279 (0.9408) lr 1.7290e-03 eta 5:59:22
epoch [18/50] batch [360/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.9756 (0.9273) lr 1.7290e-03 eta 5:59:03
epoch [18/50] batch [380/796] time 0.819 (0.832) data 0.000 (0.002) loss 1.3409 (0.9432) lr 1.7290e-03 eta 5:58:45
epoch [18/50] batch [400/796] time 0.818 (0.831) data 0.000 (0.002) loss 2.3485 (0.9556) lr 1.7290e-03 eta 5:58:27
epoch [18/50] batch [420/796] time 0.839 (0.831) data 0.000 (0.002) loss 1.7136 (0.9446) lr 1.7290e-03 eta 5:58:12
epoch [18/50] batch [440/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.5420 (0.9375) lr 1.7290e-03 eta 5:57:56
epoch [18/50] batch [460/796] time 0.838 (0.831) data 0.000 (0.002) loss 1.2215 (0.9283) lr 1.7290e-03 eta 5:57:35
epoch [18/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2618 (0.9255) lr 1.7290e-03 eta 5:57:15
epoch [18/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.8026 (0.9380) lr 1.7290e-03 eta 5:56:58
epoch [18/50] batch [520/796] time 0.820 (0.831) data 0.000 (0.001) loss 0.8291 (0.9362) lr 1.7290e-03 eta 5:56:40
epoch [18/50] batch [540/796] time 0.831 (0.831) data 0.000 (0.001) loss 0.6118 (0.9354) lr 1.7290e-03 eta 5:56:20
epoch [18/50] batch [560/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.8948 (0.9342) lr 1.7290e-03 eta 5:56:05
epoch [18/50] batch [580/796] time 0.820 (0.831) data 0.000 (0.001) loss 0.8659 (0.9367) lr 1.7290e-03 eta 5:55:47
epoch [18/50] batch [600/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.3307 (0.9345) lr 1.7290e-03 eta 5:55:30
epoch [18/50] batch [620/796] time 0.841 (0.831) data 0.000 (0.001) loss 0.6718 (0.9329) lr 1.7290e-03 eta 5:55:11
epoch [18/50] batch [640/796] time 0.841 (0.831) data 0.000 (0.001) loss 1.4908 (0.9350) lr 1.7290e-03 eta 5:54:54
epoch [18/50] batch [660/796] time 0.841 (0.831) data 0.000 (0.001) loss 0.6808 (0.9352) lr 1.7290e-03 eta 5:54:38
epoch [18/50] batch [680/796] time 0.821 (0.831) data 0.000 (0.001) loss 0.4817 (0.9296) lr 1.7290e-03 eta 5:54:23
epoch [18/50] batch [700/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.3373 (0.9230) lr 1.7290e-03 eta 5:54:08
epoch [18/50] batch [720/796] time 0.811 (0.831) data 0.000 (0.001) loss 1.3617 (0.9211) lr 1.7290e-03 eta 5:53:50
epoch [18/50] batch [740/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.0480 (0.9248) lr 1.7290e-03 eta 5:53:33
epoch [18/50] batch [760/796] time 0.824 (0.831) data 0.000 (0.001) loss 0.5777 (0.9160) lr 1.7290e-03 eta 5:53:16
epoch [18/50] batch [780/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.6822 (0.9212) lr 1.7290e-03 eta 5:52:59
epoch [19/50] batch [20/796] time 0.831 (0.856) data 0.000 (0.027) loss 0.5453 (0.5659) lr 1.6845e-03 eta 6:02:57
epoch [19/50] batch [40/796] time 0.838 (0.844) data 0.000 (0.014) loss 2.8194 (0.9138) lr 1.6845e-03 eta 5:57:36
epoch [19/50] batch [60/796] time 0.838 (0.840) data 0.000 (0.009) loss 0.4227 (0.8983) lr 1.6845e-03 eta 5:55:55
epoch [19/50] batch [80/796] time 0.832 (0.837) data 0.000 (0.007) loss 0.2401 (0.9290) lr 1.6845e-03 eta 5:54:11
epoch [19/50] batch [100/796] time 0.829 (0.835) data 0.000 (0.006) loss 1.1311 (0.9334) lr 1.6845e-03 eta 5:53:16
epoch [19/50] batch [120/796] time 0.819 (0.834) data 0.000 (0.005) loss 0.0676 (0.9624) lr 1.6845e-03 eta 5:52:32
epoch [19/50] batch [140/796] time 0.838 (0.834) data 0.000 (0.004) loss 2.0231 (0.9586) lr 1.6845e-03 eta 5:52:03
epoch [19/50] batch [160/796] time 0.838 (0.834) data 0.000 (0.004) loss 1.0086 (0.9654) lr 1.6845e-03 eta 5:51:38
epoch [19/50] batch [180/796] time 0.839 (0.833) data 0.000 (0.003) loss 0.2331 (0.9397) lr 1.6845e-03 eta 5:51:15
epoch [19/50] batch [200/796] time 0.828 (0.833) data 0.000 (0.003) loss 0.0838 (0.9353) lr 1.6845e-03 eta 5:50:52
epoch [19/50] batch [220/796] time 0.837 (0.833) data 0.000 (0.003) loss 0.5003 (0.9330) lr 1.6845e-03 eta 5:50:26
epoch [19/50] batch [240/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.9878 (0.9319) lr 1.6845e-03 eta 5:50:00
epoch [19/50] batch [260/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.8164 (0.9274) lr 1.6845e-03 eta 5:49:34
epoch [19/50] batch [280/796] time 0.840 (0.832) data 0.000 (0.002) loss 1.0733 (0.9398) lr 1.6845e-03 eta 5:49:12
epoch [19/50] batch [300/796] time 0.819 (0.832) data 0.000 (0.002) loss 0.3824 (0.9236) lr 1.6845e-03 eta 5:48:50
epoch [19/50] batch [320/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.4527 (0.9048) lr 1.6845e-03 eta 5:48:33
epoch [19/50] batch [340/796] time 0.829 (0.831) data 0.000 (0.002) loss 0.8385 (0.9083) lr 1.6845e-03 eta 5:48:14
epoch [19/50] batch [360/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.8476 (0.9048) lr 1.6845e-03 eta 5:47:59
epoch [19/50] batch [380/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.2824 (0.8988) lr 1.6845e-03 eta 5:47:42
epoch [19/50] batch [400/796] time 0.844 (0.832) data 0.000 (0.002) loss 0.4183 (0.9056) lr 1.6845e-03 eta 5:47:27
epoch [19/50] batch [420/796] time 0.838 (0.831) data 0.000 (0.002) loss 0.2326 (0.9053) lr 1.6845e-03 eta 5:47:08
epoch [19/50] batch [440/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.4542 (0.9253) lr 1.6845e-03 eta 5:46:51
epoch [19/50] batch [460/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.4322 (0.9234) lr 1.6845e-03 eta 5:46:34
epoch [19/50] batch [480/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.8677 (0.9219) lr 1.6845e-03 eta 5:46:15
epoch [19/50] batch [500/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.8940 (0.9247) lr 1.6845e-03 eta 5:46:00
epoch [19/50] batch [520/796] time 0.811 (0.831) data 0.003 (0.001) loss 0.2326 (0.9262) lr 1.6845e-03 eta 5:45:44
epoch [19/50] batch [540/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.4237 (0.9188) lr 1.6845e-03 eta 5:45:24
epoch [19/50] batch [560/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.1632 (0.9148) lr 1.6845e-03 eta 5:45:07
epoch [19/50] batch [580/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.1319 (0.9233) lr 1.6845e-03 eta 5:44:51
epoch [19/50] batch [600/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.7558 (0.9305) lr 1.6845e-03 eta 5:44:34
epoch [19/50] batch [620/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.4925 (0.9383) lr 1.6845e-03 eta 5:44:15
epoch [19/50] batch [640/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.1154 (0.9368) lr 1.6845e-03 eta 5:43:58
epoch [19/50] batch [660/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.9298 (0.9421) lr 1.6845e-03 eta 5:43:38
epoch [19/50] batch [680/796] time 0.809 (0.831) data 0.000 (0.001) loss 0.9592 (0.9552) lr 1.6845e-03 eta 5:43:21
epoch [19/50] batch [700/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.6155 (0.9547) lr 1.6845e-03 eta 5:43:05
epoch [19/50] batch [720/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.5210 (0.9455) lr 1.6845e-03 eta 5:42:48
epoch [19/50] batch [740/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.4120 (0.9454) lr 1.6845e-03 eta 5:42:30
epoch [19/50] batch [760/796] time 0.820 (0.831) data 0.000 (0.001) loss 1.1158 (0.9502) lr 1.6845e-03 eta 5:42:12
epoch [19/50] batch [780/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2273 (0.9460) lr 1.6845e-03 eta 5:41:55
epoch [20/50] batch [20/796] time 0.830 (0.858) data 0.000 (0.027) loss 0.1852 (0.7901) lr 1.6374e-03 eta 5:52:24
epoch [20/50] batch [40/796] time 0.838 (0.845) data 0.000 (0.014) loss 0.3669 (0.9361) lr 1.6374e-03 eta 5:46:59
epoch [20/50] batch [60/796] time 0.841 (0.841) data 0.000 (0.009) loss 0.9750 (0.9288) lr 1.6374e-03 eta 5:45:10
epoch [20/50] batch [80/796] time 0.838 (0.839) data 0.000 (0.007) loss 0.3920 (0.9294) lr 1.6374e-03 eta 5:44:00
epoch [20/50] batch [100/796] time 0.830 (0.838) data 0.000 (0.006) loss 1.2911 (1.0258) lr 1.6374e-03 eta 5:43:06
epoch [20/50] batch [120/796] time 0.818 (0.836) data 0.000 (0.005) loss 0.2470 (0.9415) lr 1.6374e-03 eta 5:42:09
epoch [20/50] batch [140/796] time 0.855 (0.836) data 0.000 (0.004) loss 0.6163 (0.9472) lr 1.6374e-03 eta 5:41:59
epoch [20/50] batch [160/796] time 0.829 (0.836) data 0.000 (0.004) loss 1.6906 (0.9134) lr 1.6374e-03 eta 5:41:43
epoch [20/50] batch [180/796] time 0.818 (0.836) data 0.000 (0.003) loss 0.2294 (0.9538) lr 1.6374e-03 eta 5:41:12
epoch [20/50] batch [200/796] time 0.838 (0.835) data 0.000 (0.003) loss 0.2955 (0.9519) lr 1.6374e-03 eta 5:40:40
epoch [20/50] batch [220/796] time 0.837 (0.835) data 0.000 (0.003) loss 0.6992 (0.9337) lr 1.6374e-03 eta 5:40:12
epoch [20/50] batch [240/796] time 0.829 (0.834) data 0.000 (0.002) loss 1.2787 (0.9514) lr 1.6374e-03 eta 5:39:46
epoch [20/50] batch [260/796] time 0.839 (0.834) data 0.000 (0.002) loss 0.7756 (0.9491) lr 1.6374e-03 eta 5:39:25
epoch [20/50] batch [280/796] time 0.840 (0.834) data 0.000 (0.002) loss 0.2225 (0.9426) lr 1.6374e-03 eta 5:39:05
epoch [20/50] batch [300/796] time 0.819 (0.834) data 0.000 (0.002) loss 0.2916 (0.9416) lr 1.6374e-03 eta 5:38:39
epoch [20/50] batch [320/796] time 0.839 (0.834) data 0.000 (0.002) loss 1.5805 (0.9554) lr 1.6374e-03 eta 5:38:21
epoch [20/50] batch [340/796] time 0.839 (0.833) data 0.000 (0.002) loss 0.7598 (0.9528) lr 1.6374e-03 eta 5:38:01
epoch [20/50] batch [360/796] time 0.839 (0.833) data 0.000 (0.002) loss 0.1857 (0.9533) lr 1.6374e-03 eta 5:37:43
epoch [20/50] batch [380/796] time 0.848 (0.833) data 0.000 (0.002) loss 1.2060 (0.9436) lr 1.6374e-03 eta 5:37:24
epoch [20/50] batch [400/796] time 0.830 (0.833) data 0.000 (0.002) loss 0.4110 (0.9327) lr 1.6374e-03 eta 5:37:02
epoch [20/50] batch [420/796] time 0.809 (0.833) data 0.000 (0.002) loss 0.6628 (0.9198) lr 1.6374e-03 eta 5:36:43
epoch [20/50] batch [440/796] time 0.838 (0.833) data 0.000 (0.001) loss 0.7388 (0.9195) lr 1.6374e-03 eta 5:36:21
epoch [20/50] batch [460/796] time 0.830 (0.833) data 0.000 (0.001) loss 0.1059 (0.9117) lr 1.6374e-03 eta 5:36:05
epoch [20/50] batch [480/796] time 0.828 (0.833) data 0.000 (0.001) loss 0.1655 (0.9145) lr 1.6374e-03 eta 5:35:44
epoch [20/50] batch [500/796] time 0.808 (0.832) data 0.000 (0.001) loss 0.3795 (0.9266) lr 1.6374e-03 eta 5:35:23
epoch [20/50] batch [520/796] time 0.817 (0.832) data 0.000 (0.001) loss 0.0118 (0.9241) lr 1.6374e-03 eta 5:35:05
epoch [20/50] batch [540/796] time 0.817 (0.832) data 0.000 (0.001) loss 1.0058 (0.9173) lr 1.6374e-03 eta 5:34:45
epoch [20/50] batch [560/796] time 0.828 (0.832) data 0.000 (0.001) loss 0.5386 (0.9098) lr 1.6374e-03 eta 5:34:26
epoch [20/50] batch [580/796] time 0.817 (0.832) data 0.000 (0.001) loss 0.1917 (0.9025) lr 1.6374e-03 eta 5:34:05
epoch [20/50] batch [600/796] time 0.817 (0.832) data 0.000 (0.001) loss 1.0792 (0.9034) lr 1.6374e-03 eta 5:33:46
epoch [20/50] batch [620/796] time 0.818 (0.832) data 0.000 (0.001) loss 1.2911 (0.9078) lr 1.6374e-03 eta 5:33:29
epoch [20/50] batch [640/796] time 0.843 (0.832) data 0.000 (0.001) loss 0.1436 (0.9126) lr 1.6374e-03 eta 5:33:10
epoch [20/50] batch [660/796] time 0.829 (0.832) data 0.000 (0.001) loss 1.1401 (0.9135) lr 1.6374e-03 eta 5:32:51
epoch [20/50] batch [680/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.5051 (0.9230) lr 1.6374e-03 eta 5:32:30
epoch [20/50] batch [700/796] time 0.833 (0.831) data 0.000 (0.001) loss 1.3465 (0.9240) lr 1.6374e-03 eta 5:32:13
epoch [20/50] batch [720/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.3295 (0.9204) lr 1.6374e-03 eta 5:31:56
epoch [20/50] batch [740/796] time 0.839 (0.831) data 0.000 (0.001) loss 2.0075 (0.9252) lr 1.6374e-03 eta 5:31:37
epoch [20/50] batch [760/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.0076 (0.9299) lr 1.6374e-03 eta 5:31:19
epoch [20/50] batch [780/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.4040 (0.9301) lr 1.6374e-03 eta 5:31:02
epoch [21/50] batch [20/796] time 0.839 (0.860) data 0.000 (0.028) loss 0.6560 (0.9131) lr 1.5878e-03 eta 5:41:56
epoch [21/50] batch [40/796] time 0.816 (0.844) data 0.000 (0.014) loss 1.4358 (0.9305) lr 1.5878e-03 eta 5:35:24
epoch [21/50] batch [60/796] time 0.829 (0.839) data 0.000 (0.009) loss 0.7881 (0.9062) lr 1.5878e-03 eta 5:33:13
epoch [21/50] batch [80/796] time 0.839 (0.838) data 0.000 (0.007) loss 0.7887 (0.8277) lr 1.5878e-03 eta 5:32:14
epoch [21/50] batch [100/796] time 0.838 (0.836) data 0.000 (0.006) loss 1.5416 (0.8679) lr 1.5878e-03 eta 5:31:22
epoch [21/50] batch [120/796] time 0.828 (0.835) data 0.000 (0.005) loss 1.5189 (0.9078) lr 1.5878e-03 eta 5:30:39
epoch [21/50] batch [140/796] time 0.837 (0.834) data 0.000 (0.004) loss 0.4735 (0.9139) lr 1.5878e-03 eta 5:30:08
epoch [21/50] batch [160/796] time 0.817 (0.834) data 0.000 (0.004) loss 1.6385 (0.9263) lr 1.5878e-03 eta 5:29:38
epoch [21/50] batch [180/796] time 0.839 (0.833) data 0.000 (0.003) loss 0.4326 (0.9220) lr 1.5878e-03 eta 5:29:12
epoch [21/50] batch [200/796] time 0.838 (0.833) data 0.000 (0.003) loss 0.3801 (0.9081) lr 1.5878e-03 eta 5:28:44
epoch [21/50] batch [220/796] time 0.817 (0.832) data 0.000 (0.003) loss 1.7249 (0.8882) lr 1.5878e-03 eta 5:28:15
epoch [21/50] batch [240/796] time 0.839 (0.832) data 0.000 (0.003) loss 0.9528 (0.9116) lr 1.5878e-03 eta 5:27:54
epoch [21/50] batch [260/796] time 0.831 (0.832) data 0.000 (0.002) loss 0.0682 (0.9187) lr 1.5878e-03 eta 5:27:30
epoch [21/50] batch [280/796] time 0.817 (0.832) data 0.000 (0.002) loss 3.4188 (0.9272) lr 1.5878e-03 eta 5:27:12
epoch [21/50] batch [300/796] time 0.828 (0.832) data 0.000 (0.002) loss 1.2383 (0.9366) lr 1.5878e-03 eta 5:26:56
epoch [21/50] batch [320/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.5389 (0.9373) lr 1.5878e-03 eta 5:26:32
epoch [21/50] batch [340/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.5375 (0.9274) lr 1.5878e-03 eta 5:26:15
epoch [21/50] batch [360/796] time 0.840 (0.831) data 0.000 (0.002) loss 0.4743 (0.9305) lr 1.5878e-03 eta 5:25:56
epoch [21/50] batch [380/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.4150 (0.9356) lr 1.5878e-03 eta 5:25:41
epoch [21/50] batch [400/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.4404 (0.9374) lr 1.5878e-03 eta 5:25:23
epoch [21/50] batch [420/796] time 0.838 (0.831) data 0.000 (0.002) loss 0.6147 (0.9276) lr 1.5878e-03 eta 5:25:04
epoch [21/50] batch [440/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.7559 (0.9307) lr 1.5878e-03 eta 5:24:47
epoch [21/50] batch [460/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.9910 (0.9302) lr 1.5878e-03 eta 5:24:30
epoch [21/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.001) loss 2.2857 (0.9405) lr 1.5878e-03 eta 5:24:13
epoch [21/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.6816 (0.9396) lr 1.5878e-03 eta 5:23:55
epoch [21/50] batch [520/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.2755 (0.9382) lr 1.5878e-03 eta 5:23:38
epoch [21/50] batch [540/796] time 0.838 (0.831) data 0.000 (0.001) loss 2.6811 (0.9292) lr 1.5878e-03 eta 5:23:20
epoch [21/50] batch [560/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.5161 (0.9282) lr 1.5878e-03 eta 5:23:01
epoch [21/50] batch [580/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.2116 (0.9204) lr 1.5878e-03 eta 5:22:45
epoch [21/50] batch [600/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.6394 (0.9169) lr 1.5878e-03 eta 5:22:26
epoch [21/50] batch [620/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.7496 (0.9240) lr 1.5878e-03 eta 5:22:08
epoch [21/50] batch [640/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.1262 (0.9203) lr 1.5878e-03 eta 5:21:50
epoch [21/50] batch [660/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.1097 (0.9193) lr 1.5878e-03 eta 5:21:32
epoch [21/50] batch [680/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.4031 (0.9191) lr 1.5878e-03 eta 5:21:15
epoch [21/50] batch [700/796] time 0.839 (0.831) data 0.000 (0.001) loss 2.6108 (0.9180) lr 1.5878e-03 eta 5:20:59
epoch [21/50] batch [720/796] time 0.841 (0.831) data 0.000 (0.001) loss 1.4442 (0.9274) lr 1.5878e-03 eta 5:20:40
epoch [21/50] batch [740/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.1078 (0.9284) lr 1.5878e-03 eta 5:20:25
epoch [21/50] batch [760/796] time 0.833 (0.831) data 0.000 (0.001) loss 1.5830 (0.9274) lr 1.5878e-03 eta 5:20:09
epoch [21/50] batch [780/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.6575 (0.9266) lr 1.5878e-03 eta 5:19:53
epoch [22/50] batch [20/796] time 0.838 (0.857) data 0.000 (0.027) loss 3.1033 (0.9553) lr 1.5358e-03 eta 5:29:26
epoch [22/50] batch [40/796] time 0.838 (0.846) data 0.000 (0.014) loss 1.1376 (0.8442) lr 1.5358e-03 eta 5:24:44
epoch [22/50] batch [60/796] time 0.839 (0.840) data 0.000 (0.009) loss 0.7797 (0.8281) lr 1.5358e-03 eta 5:22:30
epoch [22/50] batch [80/796] time 0.839 (0.838) data 0.000 (0.007) loss 0.8339 (0.7771) lr 1.5358e-03 eta 5:21:23
epoch [22/50] batch [100/796] time 0.810 (0.836) data 0.000 (0.006) loss 1.0541 (0.8018) lr 1.5358e-03 eta 5:20:13
epoch [22/50] batch [120/796] time 0.837 (0.835) data 0.000 (0.005) loss 0.4536 (0.8113) lr 1.5358e-03 eta 5:19:31
epoch [22/50] batch [140/796] time 0.820 (0.834) data 0.000 (0.004) loss 0.4295 (0.8206) lr 1.5358e-03 eta 5:18:50
epoch [22/50] batch [160/796] time 0.829 (0.833) data 0.000 (0.004) loss 1.5703 (0.8849) lr 1.5358e-03 eta 5:18:23
epoch [22/50] batch [180/796] time 0.840 (0.833) data 0.000 (0.003) loss 0.9279 (0.8943) lr 1.5358e-03 eta 5:18:07
epoch [22/50] batch [200/796] time 0.840 (0.833) data 0.000 (0.003) loss 1.1331 (0.8801) lr 1.5358e-03 eta 5:17:45
epoch [22/50] batch [220/796] time 0.838 (0.833) data 0.000 (0.003) loss 1.0055 (0.8687) lr 1.5358e-03 eta 5:17:20
epoch [22/50] batch [240/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.7360 (0.8631) lr 1.5358e-03 eta 5:16:57
epoch [22/50] batch [260/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.4698 (0.8548) lr 1.5358e-03 eta 5:16:38
epoch [22/50] batch [280/796] time 0.828 (0.832) data 0.000 (0.002) loss 0.6691 (0.8631) lr 1.5358e-03 eta 5:16:15
epoch [22/50] batch [300/796] time 0.819 (0.832) data 0.000 (0.002) loss 0.5864 (0.8808) lr 1.5358e-03 eta 5:15:56
epoch [22/50] batch [320/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.9498 (0.8745) lr 1.5358e-03 eta 5:15:37
epoch [22/50] batch [340/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.2205 (0.8750) lr 1.5358e-03 eta 5:15:20
epoch [22/50] batch [360/796] time 0.843 (0.832) data 0.000 (0.002) loss 1.0022 (0.8863) lr 1.5358e-03 eta 5:15:03
epoch [22/50] batch [380/796] time 0.840 (0.832) data 0.000 (0.002) loss 1.1945 (0.8843) lr 1.5358e-03 eta 5:14:45
epoch [22/50] batch [400/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.3758 (0.8869) lr 1.5358e-03 eta 5:14:24
epoch [22/50] batch [420/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.2093 (0.8873) lr 1.5358e-03 eta 5:14:06
epoch [22/50] batch [440/796] time 0.843 (0.832) data 0.000 (0.001) loss 0.3318 (0.8801) lr 1.5358e-03 eta 5:13:48
epoch [22/50] batch [460/796] time 0.838 (0.831) data 0.000 (0.001) loss 3.6175 (0.8823) lr 1.5358e-03 eta 5:13:28
epoch [22/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.7652 (0.8849) lr 1.5358e-03 eta 5:13:13
epoch [22/50] batch [500/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.0130 (0.8853) lr 1.5358e-03 eta 5:12:56
epoch [22/50] batch [520/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.5924 (0.8913) lr 1.5358e-03 eta 5:12:38
epoch [22/50] batch [540/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.1079 (0.8909) lr 1.5358e-03 eta 5:12:21
epoch [22/50] batch [560/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2882 (0.8996) lr 1.5358e-03 eta 5:12:04
epoch [22/50] batch [580/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.7589 (0.9102) lr 1.5358e-03 eta 5:11:46
epoch [22/50] batch [600/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.2001 (0.9087) lr 1.5358e-03 eta 5:11:25
epoch [22/50] batch [620/796] time 0.816 (0.831) data 0.000 (0.001) loss 0.4286 (0.9183) lr 1.5358e-03 eta 5:11:08
epoch [22/50] batch [640/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.6202 (0.9128) lr 1.5358e-03 eta 5:10:49
epoch [22/50] batch [660/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.9671 (0.9122) lr 1.5358e-03 eta 5:10:33
epoch [22/50] batch [680/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.3446 (0.9093) lr 1.5358e-03 eta 5:10:16
epoch [22/50] batch [700/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.1192 (0.9056) lr 1.5358e-03 eta 5:09:59
epoch [22/50] batch [720/796] time 0.842 (0.831) data 0.000 (0.001) loss 1.2084 (0.9099) lr 1.5358e-03 eta 5:09:42
epoch [22/50] batch [740/796] time 0.832 (0.831) data 0.000 (0.001) loss 1.1464 (0.9108) lr 1.5358e-03 eta 5:09:28
epoch [22/50] batch [760/796] time 0.825 (0.831) data 0.000 (0.001) loss 0.2798 (0.9124) lr 1.5358e-03 eta 5:09:13
epoch [22/50] batch [780/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.1778 (0.9089) lr 1.5358e-03 eta 5:09:01
epoch [23/50] batch [20/796] time 0.822 (0.871) data 0.000 (0.035) loss 0.9783 (1.1691) lr 1.4818e-03 eta 5:23:05
epoch [23/50] batch [40/796] time 0.818 (0.851) data 0.000 (0.017) loss 3.8645 (1.2383) lr 1.4818e-03 eta 5:15:27
epoch [23/50] batch [60/796] time 0.829 (0.843) data 0.000 (0.012) loss 1.7662 (1.1321) lr 1.4818e-03 eta 5:12:24
epoch [23/50] batch [80/796] time 0.829 (0.840) data 0.000 (0.009) loss 0.4986 (1.0273) lr 1.4818e-03 eta 5:10:47
epoch [23/50] batch [100/796] time 0.829 (0.837) data 0.000 (0.007) loss 2.2798 (0.9979) lr 1.4818e-03 eta 5:09:41
epoch [23/50] batch [120/796] time 0.838 (0.836) data 0.000 (0.006) loss 0.9461 (0.9922) lr 1.4818e-03 eta 5:08:52
epoch [23/50] batch [140/796] time 0.827 (0.835) data 0.000 (0.005) loss 0.5201 (0.9548) lr 1.4818e-03 eta 5:08:11
epoch [23/50] batch [160/796] time 0.829 (0.835) data 0.000 (0.004) loss 1.7337 (0.9353) lr 1.4818e-03 eta 5:07:45
epoch [23/50] batch [180/796] time 0.829 (0.834) data 0.000 (0.004) loss 1.6233 (0.9229) lr 1.4818e-03 eta 5:07:14
epoch [23/50] batch [200/796] time 0.822 (0.833) data 0.000 (0.004) loss 0.3438 (0.9164) lr 1.4818e-03 eta 5:06:49
epoch [23/50] batch [220/796] time 0.832 (0.833) data 0.000 (0.003) loss 1.2665 (0.9139) lr 1.4818e-03 eta 5:06:25
epoch [23/50] batch [240/796] time 0.838 (0.833) data 0.000 (0.003) loss 0.6504 (0.9226) lr 1.4818e-03 eta 5:06:04
epoch [23/50] batch [260/796] time 0.828 (0.833) data 0.000 (0.003) loss 1.5402 (0.9199) lr 1.4818e-03 eta 5:05:42
epoch [23/50] batch [280/796] time 0.830 (0.832) data 0.000 (0.003) loss 0.4156 (0.9033) lr 1.4818e-03 eta 5:05:18
epoch [23/50] batch [300/796] time 0.808 (0.832) data 0.000 (0.002) loss 0.6124 (0.8994) lr 1.4818e-03 eta 5:04:54
epoch [23/50] batch [320/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.3952 (0.9028) lr 1.4818e-03 eta 5:04:35
epoch [23/50] batch [340/796] time 0.816 (0.832) data 0.000 (0.002) loss 0.3880 (0.9202) lr 1.4818e-03 eta 5:04:12
epoch [23/50] batch [360/796] time 0.817 (0.831) data 0.000 (0.002) loss 1.7107 (0.9196) lr 1.4818e-03 eta 5:03:52
epoch [23/50] batch [380/796] time 0.829 (0.831) data 0.000 (0.002) loss 0.7921 (0.9281) lr 1.4818e-03 eta 5:03:32
epoch [23/50] batch [400/796] time 0.818 (0.831) data 0.000 (0.002) loss 0.6887 (0.9192) lr 1.4818e-03 eta 5:03:13
epoch [23/50] batch [420/796] time 0.829 (0.831) data 0.000 (0.002) loss 1.3424 (0.9196) lr 1.4818e-03 eta 5:02:53
epoch [23/50] batch [440/796] time 0.828 (0.831) data 0.000 (0.002) loss 1.2027 (0.9309) lr 1.4818e-03 eta 5:02:36
epoch [23/50] batch [460/796] time 0.818 (0.831) data 0.000 (0.002) loss 1.3673 (0.9351) lr 1.4818e-03 eta 5:02:18
epoch [23/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.002) loss 0.0713 (0.9226) lr 1.4818e-03 eta 5:02:01
epoch [23/50] batch [500/796] time 0.831 (0.831) data 0.000 (0.002) loss 0.6818 (0.9241) lr 1.4818e-03 eta 5:01:42
epoch [23/50] batch [520/796] time 0.839 (0.831) data 0.000 (0.002) loss 1.5086 (0.9300) lr 1.4818e-03 eta 5:01:23
epoch [23/50] batch [540/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.6457 (0.9341) lr 1.4818e-03 eta 5:01:03
epoch [23/50] batch [560/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.2212 (0.9338) lr 1.4818e-03 eta 5:00:47
epoch [23/50] batch [580/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.6469 (0.9359) lr 1.4818e-03 eta 5:00:32
epoch [23/50] batch [600/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.1428 (0.9388) lr 1.4818e-03 eta 5:00:16
epoch [23/50] batch [620/796] time 0.837 (0.831) data 0.000 (0.001) loss 1.0733 (0.9419) lr 1.4818e-03 eta 4:59:59
epoch [23/50] batch [640/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.5021 (0.9461) lr 1.4818e-03 eta 4:59:43
epoch [23/50] batch [660/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.6017 (0.9509) lr 1.4818e-03 eta 4:59:25
epoch [23/50] batch [680/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.1187 (0.9396) lr 1.4818e-03 eta 4:59:10
epoch [23/50] batch [700/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.6987 (0.9433) lr 1.4818e-03 eta 4:58:53
epoch [23/50] batch [720/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.6521 (0.9384) lr 1.4818e-03 eta 4:58:37
epoch [23/50] batch [740/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.9096 (0.9381) lr 1.4818e-03 eta 4:58:18
epoch [23/50] batch [760/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.9348 (0.9331) lr 1.4818e-03 eta 4:58:00
epoch [23/50] batch [780/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.5764 (0.9331) lr 1.4818e-03 eta 4:57:44
epoch [24/50] batch [20/796] time 0.828 (0.858) data 0.000 (0.028) loss 0.7996 (1.2968) lr 1.4258e-03 eta 5:07:05
epoch [24/50] batch [40/796] time 0.838 (0.844) data 0.000 (0.014) loss 0.7966 (1.1295) lr 1.4258e-03 eta 5:01:41
epoch [24/50] batch [60/796] time 0.838 (0.839) data 0.000 (0.010) loss 0.4013 (1.0004) lr 1.4258e-03 eta 4:59:40
epoch [24/50] batch [80/796] time 0.819 (0.837) data 0.000 (0.007) loss 1.1411 (0.9693) lr 1.4258e-03 eta 4:58:43
epoch [24/50] batch [100/796] time 0.829 (0.836) data 0.000 (0.006) loss 0.9447 (0.9942) lr 1.4258e-03 eta 4:57:55
epoch [24/50] batch [120/796] time 0.830 (0.834) data 0.000 (0.005) loss 0.4547 (0.9867) lr 1.4258e-03 eta 4:57:14
epoch [24/50] batch [140/796] time 0.811 (0.834) data 0.000 (0.004) loss 1.7541 (1.0295) lr 1.4258e-03 eta 4:56:44
epoch [24/50] batch [160/796] time 0.830 (0.834) data 0.000 (0.004) loss 1.7164 (1.0256) lr 1.4258e-03 eta 4:56:27
epoch [24/50] batch [180/796] time 0.816 (0.833) data 0.000 (0.003) loss 0.2065 (1.0106) lr 1.4258e-03 eta 4:55:57
epoch [24/50] batch [200/796] time 0.829 (0.832) data 0.000 (0.003) loss 1.6421 (1.0114) lr 1.4258e-03 eta 4:55:24
epoch [24/50] batch [220/796] time 0.837 (0.832) data 0.000 (0.003) loss 1.0927 (0.9929) lr 1.4258e-03 eta 4:54:59
epoch [24/50] batch [240/796] time 0.817 (0.832) data 0.000 (0.003) loss 0.5727 (0.9758) lr 1.4258e-03 eta 4:54:40
epoch [24/50] batch [260/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.2258 (0.9871) lr 1.4258e-03 eta 4:54:16
epoch [24/50] batch [280/796] time 0.829 (0.831) data 0.000 (0.002) loss 0.9518 (0.9794) lr 1.4258e-03 eta 4:53:53
epoch [24/50] batch [300/796] time 0.837 (0.831) data 0.000 (0.002) loss 0.1467 (0.9721) lr 1.4258e-03 eta 4:53:35
epoch [24/50] batch [320/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.8185 (0.9705) lr 1.4258e-03 eta 4:53:16
epoch [24/50] batch [340/796] time 0.838 (0.831) data 0.000 (0.002) loss 1.9936 (0.9753) lr 1.4258e-03 eta 4:53:00
epoch [24/50] batch [360/796] time 0.829 (0.831) data 0.000 (0.002) loss 1.7981 (0.9726) lr 1.4258e-03 eta 4:52:41
epoch [24/50] batch [380/796] time 0.828 (0.831) data 0.000 (0.002) loss 1.4960 (0.9704) lr 1.4258e-03 eta 4:52:21
epoch [24/50] batch [400/796] time 0.837 (0.831) data 0.000 (0.002) loss 0.1827 (0.9667) lr 1.4258e-03 eta 4:52:03
epoch [24/50] batch [420/796] time 0.838 (0.831) data 0.000 (0.002) loss 0.7641 (0.9589) lr 1.4258e-03 eta 4:51:42
epoch [24/50] batch [440/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.4707 (0.9481) lr 1.4258e-03 eta 4:51:25
epoch [24/50] batch [460/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.7989 (0.9430) lr 1.4258e-03 eta 4:51:07
epoch [24/50] batch [480/796] time 0.817 (0.830) data 0.000 (0.001) loss 0.0938 (0.9421) lr 1.4258e-03 eta 4:50:48
epoch [24/50] batch [500/796] time 0.817 (0.830) data 0.000 (0.001) loss 0.3210 (0.9313) lr 1.4258e-03 eta 4:50:29
epoch [24/50] batch [520/796] time 0.829 (0.830) data 0.000 (0.001) loss 0.2118 (0.9181) lr 1.4258e-03 eta 4:50:10
epoch [24/50] batch [540/796] time 0.839 (0.830) data 0.000 (0.001) loss 1.3829 (0.9160) lr 1.4258e-03 eta 4:49:53
epoch [24/50] batch [560/796] time 0.839 (0.830) data 0.000 (0.001) loss 2.3421 (0.9251) lr 1.4258e-03 eta 4:49:36
epoch [24/50] batch [580/796] time 0.838 (0.830) data 0.000 (0.001) loss 1.0778 (0.9271) lr 1.4258e-03 eta 4:49:18
epoch [24/50] batch [600/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.9157 (0.9280) lr 1.4258e-03 eta 4:49:02
epoch [24/50] batch [620/796] time 0.816 (0.830) data 0.000 (0.001) loss 0.1715 (0.9305) lr 1.4258e-03 eta 4:48:45
epoch [24/50] batch [640/796] time 0.830 (0.830) data 0.000 (0.001) loss 0.2693 (0.9281) lr 1.4258e-03 eta 4:48:27
epoch [24/50] batch [660/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.6002 (0.9248) lr 1.4258e-03 eta 4:48:09
epoch [24/50] batch [680/796] time 0.817 (0.830) data 0.000 (0.001) loss 0.8178 (0.9370) lr 1.4258e-03 eta 4:47:52
epoch [24/50] batch [700/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.8909 (0.9378) lr 1.4258e-03 eta 4:47:36
epoch [24/50] batch [720/796] time 0.830 (0.830) data 0.000 (0.001) loss 0.3829 (0.9322) lr 1.4258e-03 eta 4:47:19
epoch [24/50] batch [740/796] time 0.817 (0.830) data 0.000 (0.001) loss 2.1428 (0.9260) lr 1.4258e-03 eta 4:47:00
epoch [24/50] batch [760/796] time 0.809 (0.830) data 0.000 (0.001) loss 1.3638 (0.9269) lr 1.4258e-03 eta 4:46:42
epoch [24/50] batch [780/796] time 0.839 (0.830) data 0.000 (0.001) loss 0.4998 (0.9345) lr 1.4258e-03 eta 4:46:26
epoch [25/50] batch [20/796] time 0.838 (0.859) data 0.000 (0.027) loss 0.8679 (0.7287) lr 1.3681e-03 eta 4:55:51
epoch [25/50] batch [40/796] time 0.832 (0.843) data 0.000 (0.014) loss 0.6412 (0.7457) lr 1.3681e-03 eta 4:50:19
epoch [25/50] batch [60/796] time 0.838 (0.839) data 0.000 (0.009) loss 1.3854 (0.8227) lr 1.3681e-03 eta 4:48:28
epoch [25/50] batch [80/796] time 0.807 (0.837) data 0.000 (0.007) loss 0.4967 (0.8055) lr 1.3681e-03 eta 4:47:29
epoch [25/50] batch [100/796] time 0.839 (0.835) data 0.000 (0.006) loss 0.4046 (0.8513) lr 1.3681e-03 eta 4:46:38
epoch [25/50] batch [120/796] time 0.818 (0.834) data 0.000 (0.005) loss 1.1830 (0.8398) lr 1.3681e-03 eta 4:46:04
epoch [25/50] batch [140/796] time 0.818 (0.833) data 0.000 (0.004) loss 0.2157 (0.8481) lr 1.3681e-03 eta 4:45:32
epoch [25/50] batch [160/796] time 0.838 (0.833) data 0.000 (0.004) loss 0.2095 (0.8309) lr 1.3681e-03 eta 4:45:00
epoch [25/50] batch [180/796] time 0.817 (0.832) data 0.000 (0.003) loss 1.5873 (0.8367) lr 1.3681e-03 eta 4:44:38
epoch [25/50] batch [200/796] time 0.829 (0.832) data 0.000 (0.003) loss 1.0758 (0.8301) lr 1.3681e-03 eta 4:44:11
epoch [25/50] batch [220/796] time 0.830 (0.832) data 0.000 (0.003) loss 0.3431 (0.8289) lr 1.3681e-03 eta 4:43:59
epoch [25/50] batch [240/796] time 0.840 (0.832) data 0.000 (0.002) loss 1.8279 (0.8399) lr 1.3681e-03 eta 4:43:41
epoch [25/50] batch [260/796] time 0.818 (0.832) data 0.000 (0.002) loss 1.1574 (0.8428) lr 1.3681e-03 eta 4:43:21
epoch [25/50] batch [280/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.3755 (0.8660) lr 1.3681e-03 eta 4:43:01
epoch [25/50] batch [300/796] time 0.829 (0.831) data 0.000 (0.002) loss 0.2444 (0.8600) lr 1.3681e-03 eta 4:42:37
epoch [25/50] batch [320/796] time 0.828 (0.831) data 0.000 (0.002) loss 2.3510 (0.8781) lr 1.3681e-03 eta 4:42:21
epoch [25/50] batch [340/796] time 0.837 (0.831) data 0.000 (0.002) loss 0.3065 (0.8774) lr 1.3681e-03 eta 4:42:02
epoch [25/50] batch [360/796] time 0.837 (0.831) data 0.000 (0.002) loss 0.1468 (0.8840) lr 1.3681e-03 eta 4:41:40
epoch [25/50] batch [380/796] time 0.828 (0.831) data 0.000 (0.002) loss 3.2506 (0.8927) lr 1.3681e-03 eta 4:41:23
epoch [25/50] batch [400/796] time 0.821 (0.831) data 0.000 (0.002) loss 0.5304 (0.8889) lr 1.3681e-03 eta 4:41:04
epoch [25/50] batch [420/796] time 0.832 (0.831) data 0.000 (0.001) loss 1.1001 (0.8988) lr 1.3681e-03 eta 4:40:49
epoch [25/50] batch [440/796] time 0.842 (0.831) data 0.000 (0.001) loss 0.0562 (0.8990) lr 1.3681e-03 eta 4:40:37
epoch [25/50] batch [460/796] time 0.833 (0.831) data 0.000 (0.001) loss 0.4695 (0.8991) lr 1.3681e-03 eta 4:40:20
epoch [25/50] batch [480/796] time 0.842 (0.831) data 0.000 (0.001) loss 0.2467 (0.9142) lr 1.3681e-03 eta 4:40:07
epoch [25/50] batch [500/796] time 0.841 (0.831) data 0.000 (0.001) loss 1.5432 (0.9208) lr 1.3681e-03 eta 4:39:50
epoch [25/50] batch [520/796] time 0.831 (0.832) data 0.000 (0.001) loss 0.7200 (0.9275) lr 1.3681e-03 eta 4:39:37
epoch [25/50] batch [540/796] time 0.843 (0.832) data 0.000 (0.001) loss 0.7742 (0.9317) lr 1.3681e-03 eta 4:39:21
epoch [25/50] batch [560/796] time 0.842 (0.832) data 0.000 (0.001) loss 0.9424 (0.9334) lr 1.3681e-03 eta 4:39:06
epoch [25/50] batch [580/796] time 0.833 (0.832) data 0.000 (0.001) loss 0.5979 (0.9231) lr 1.3681e-03 eta 4:38:51
epoch [25/50] batch [600/796] time 0.843 (0.832) data 0.001 (0.001) loss 1.4512 (0.9227) lr 1.3681e-03 eta 4:38:33
epoch [25/50] batch [620/796] time 0.841 (0.832) data 0.000 (0.001) loss 0.1421 (0.9260) lr 1.3681e-03 eta 4:38:18
epoch [25/50] batch [640/796] time 0.820 (0.832) data 0.000 (0.001) loss 1.3157 (0.9306) lr 1.3681e-03 eta 4:38:02
epoch [25/50] batch [660/796] time 0.844 (0.832) data 0.000 (0.001) loss 0.0348 (0.9261) lr 1.3681e-03 eta 4:37:47
epoch [25/50] batch [680/796] time 0.842 (0.832) data 0.000 (0.001) loss 0.5452 (0.9236) lr 1.3681e-03 eta 4:37:32
epoch [25/50] batch [700/796] time 0.832 (0.832) data 0.000 (0.001) loss 0.9486 (0.9230) lr 1.3681e-03 eta 4:37:17
epoch [25/50] batch [720/796] time 0.820 (0.832) data 0.000 (0.001) loss 0.1443 (0.9250) lr 1.3681e-03 eta 4:37:01
epoch [25/50] batch [740/796] time 0.842 (0.832) data 0.000 (0.001) loss 0.1864 (0.9263) lr 1.3681e-03 eta 4:36:45
epoch [25/50] batch [760/796] time 0.841 (0.832) data 0.000 (0.001) loss 0.1518 (0.9196) lr 1.3681e-03 eta 4:36:30
epoch [25/50] batch [780/796] time 0.820 (0.832) data 0.000 (0.001) loss 1.5354 (0.9182) lr 1.3681e-03 eta 4:36:13
epoch [26/50] batch [20/796] time 0.833 (0.878) data 0.000 (0.045) loss 1.2489 (0.8684) lr 1.3090e-03 eta 4:51:01
epoch [26/50] batch [40/796] time 0.844 (0.856) data 0.000 (0.023) loss 0.4081 (0.8173) lr 1.3090e-03 eta 4:43:29
epoch [26/50] batch [60/796] time 0.847 (0.849) data 0.001 (0.015) loss 0.8616 (0.7897) lr 1.3090e-03 eta 4:40:39
epoch [26/50] batch [80/796] time 0.841 (0.846) data 0.000 (0.011) loss 1.3380 (0.7880) lr 1.3090e-03 eta 4:39:19
epoch [26/50] batch [100/796] time 0.833 (0.843) data 0.000 (0.009) loss 2.3753 (0.8725) lr 1.3090e-03 eta 4:38:18
epoch [26/50] batch [120/796] time 0.833 (0.842) data 0.000 (0.008) loss 0.0921 (0.8402) lr 1.3090e-03 eta 4:37:27
epoch [26/50] batch [140/796] time 0.839 (0.840) data 0.000 (0.007) loss 1.4155 (0.8575) lr 1.3090e-03 eta 4:36:46
epoch [26/50] batch [160/796] time 0.843 (0.840) data 0.000 (0.006) loss 1.1018 (0.8895) lr 1.3090e-03 eta 4:36:13
epoch [26/50] batch [180/796] time 0.822 (0.839) data 0.000 (0.005) loss 0.4066 (0.8986) lr 1.3090e-03 eta 4:35:40
epoch [26/50] batch [200/796] time 0.832 (0.838) data 0.000 (0.005) loss 1.0830 (0.8987) lr 1.3090e-03 eta 4:35:17
epoch [26/50] batch [220/796] time 0.832 (0.838) data 0.000 (0.004) loss 0.2690 (0.9144) lr 1.3090e-03 eta 4:34:49
epoch [26/50] batch [240/796] time 0.820 (0.837) data 0.000 (0.004) loss 0.8677 (0.8974) lr 1.3090e-03 eta 4:34:23
epoch [26/50] batch [260/796] time 0.821 (0.837) data 0.000 (0.004) loss 1.8475 (0.8974) lr 1.3090e-03 eta 4:34:01
epoch [26/50] batch [280/796] time 0.820 (0.837) data 0.000 (0.003) loss 1.2506 (0.8805) lr 1.3090e-03 eta 4:33:38
epoch [26/50] batch [300/796] time 0.832 (0.837) data 0.000 (0.003) loss 0.2667 (0.8801) lr 1.3090e-03 eta 4:33:16
epoch [26/50] batch [320/796] time 0.820 (0.836) data 0.000 (0.003) loss 1.4286 (0.8648) lr 1.3090e-03 eta 4:32:52
epoch [26/50] batch [340/796] time 0.833 (0.836) data 0.000 (0.003) loss 0.4820 (0.8642) lr 1.3090e-03 eta 4:32:31
epoch [26/50] batch [360/796] time 0.820 (0.836) data 0.000 (0.003) loss 2.0162 (0.8771) lr 1.3090e-03 eta 4:32:10
epoch [26/50] batch [380/796] time 0.821 (0.835) data 0.000 (0.003) loss 0.3161 (0.8809) lr 1.3090e-03 eta 4:31:46
epoch [26/50] batch [400/796] time 0.822 (0.835) data 0.000 (0.002) loss 0.4708 (0.8853) lr 1.3090e-03 eta 4:31:28
epoch [26/50] batch [420/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.0885 (0.8794) lr 1.3090e-03 eta 4:31:08
epoch [26/50] batch [440/796] time 0.841 (0.835) data 0.000 (0.002) loss 0.5675 (0.8827) lr 1.3090e-03 eta 4:30:50
epoch [26/50] batch [460/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.8252 (0.8810) lr 1.3090e-03 eta 4:30:30
epoch [26/50] batch [480/796] time 0.820 (0.835) data 0.000 (0.002) loss 1.2098 (0.8845) lr 1.3090e-03 eta 4:30:11
epoch [26/50] batch [500/796] time 0.822 (0.835) data 0.000 (0.002) loss 3.0894 (0.8915) lr 1.3090e-03 eta 4:29:53
epoch [26/50] batch [520/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.6006 (0.8978) lr 1.3090e-03 eta 4:29:35
epoch [26/50] batch [540/796] time 0.841 (0.834) data 0.000 (0.002) loss 2.1053 (0.8969) lr 1.3090e-03 eta 4:29:15
epoch [26/50] batch [560/796] time 0.824 (0.835) data 0.000 (0.002) loss 1.0630 (0.9061) lr 1.3090e-03 eta 4:29:00
epoch [26/50] batch [580/796] time 0.834 (0.835) data 0.000 (0.002) loss 0.8034 (0.9044) lr 1.3090e-03 eta 4:28:42
epoch [26/50] batch [600/796] time 0.842 (0.834) data 0.001 (0.002) loss 0.5870 (0.9043) lr 1.3090e-03 eta 4:28:24
epoch [26/50] batch [620/796] time 0.834 (0.834) data 0.000 (0.002) loss 0.4853 (0.9018) lr 1.3090e-03 eta 4:28:07
epoch [26/50] batch [640/796] time 0.833 (0.834) data 0.000 (0.002) loss 1.5921 (0.9071) lr 1.3090e-03 eta 4:27:52
epoch [26/50] batch [660/796] time 0.819 (0.835) data 0.000 (0.002) loss 1.7815 (0.9066) lr 1.3090e-03 eta 4:27:35
epoch [26/50] batch [680/796] time 0.841 (0.834) data 0.000 (0.002) loss 0.1695 (0.9020) lr 1.3090e-03 eta 4:27:18
epoch [26/50] batch [700/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.9032 (0.9049) lr 1.3090e-03 eta 4:27:00
epoch [26/50] batch [720/796] time 0.843 (0.834) data 0.001 (0.001) loss 1.4465 (0.9061) lr 1.3090e-03 eta 4:26:43
epoch [26/50] batch [740/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.5195 (0.9082) lr 1.3090e-03 eta 4:26:25
epoch [26/50] batch [760/796] time 0.809 (0.834) data 0.000 (0.001) loss 1.4233 (0.9042) lr 1.3090e-03 eta 4:26:08
epoch [26/50] batch [780/796] time 0.826 (0.834) data 0.000 (0.001) loss 1.5949 (0.8980) lr 1.3090e-03 eta 4:25:52
epoch [27/50] batch [20/796] time 0.816 (0.862) data 0.000 (0.036) loss 1.6493 (1.0999) lr 1.2487e-03 eta 4:34:08
epoch [27/50] batch [40/796] time 0.817 (0.846) data 0.000 (0.018) loss 3.0680 (1.0821) lr 1.2487e-03 eta 4:28:39
epoch [27/50] batch [60/796] time 0.817 (0.840) data 0.000 (0.012) loss 0.4484 (0.9882) lr 1.2487e-03 eta 4:26:39
epoch [27/50] batch [80/796] time 0.837 (0.838) data 0.000 (0.009) loss 0.9705 (0.9889) lr 1.2487e-03 eta 4:25:33
epoch [27/50] batch [100/796] time 0.836 (0.836) data 0.000 (0.007) loss 0.9578 (0.9552) lr 1.2487e-03 eta 4:24:50
epoch [27/50] batch [120/796] time 0.829 (0.835) data 0.000 (0.006) loss 0.4979 (0.9479) lr 1.2487e-03 eta 4:24:14
epoch [27/50] batch [140/796] time 0.828 (0.834) data 0.000 (0.005) loss 2.1993 (0.9275) lr 1.2487e-03 eta 4:23:38
epoch [27/50] batch [160/796] time 0.808 (0.833) data 0.000 (0.005) loss 0.8997 (0.9547) lr 1.2487e-03 eta 4:23:07
epoch [27/50] batch [180/796] time 0.837 (0.833) data 0.000 (0.004) loss 0.4874 (0.9378) lr 1.2487e-03 eta 4:22:45
epoch [27/50] batch [200/796] time 0.842 (0.833) data 0.000 (0.004) loss 0.4080 (0.9229) lr 1.2487e-03 eta 4:22:24
epoch [27/50] batch [220/796] time 0.842 (0.833) data 0.000 (0.003) loss 0.3909 (0.9078) lr 1.2487e-03 eta 4:22:13
epoch [27/50] batch [240/796] time 0.842 (0.833) data 0.000 (0.003) loss 0.4308 (0.9097) lr 1.2487e-03 eta 4:21:58
epoch [27/50] batch [260/796] time 0.821 (0.833) data 0.000 (0.003) loss 0.6970 (0.9206) lr 1.2487e-03 eta 4:21:43
epoch [27/50] batch [280/796] time 0.841 (0.833) data 0.000 (0.003) loss 0.1396 (0.9328) lr 1.2487e-03 eta 4:21:26
epoch [27/50] batch [300/796] time 0.833 (0.833) data 0.000 (0.003) loss 0.2081 (0.9323) lr 1.2487e-03 eta 4:21:09
epoch [27/50] batch [320/796] time 0.823 (0.833) data 0.000 (0.002) loss 0.5815 (0.9293) lr 1.2487e-03 eta 4:20:52
epoch [27/50] batch [340/796] time 0.820 (0.833) data 0.000 (0.002) loss 0.0597 (0.9121) lr 1.2487e-03 eta 4:20:35
epoch [27/50] batch [360/796] time 0.842 (0.833) data 0.000 (0.002) loss 0.6590 (0.9129) lr 1.2487e-03 eta 4:20:18
epoch [27/50] batch [380/796] time 0.821 (0.833) data 0.000 (0.002) loss 2.0809 (0.9160) lr 1.2487e-03 eta 4:20:03
epoch [27/50] batch [400/796] time 0.832 (0.833) data 0.000 (0.002) loss 1.1617 (0.9205) lr 1.2487e-03 eta 4:19:45
epoch [27/50] batch [420/796] time 0.842 (0.833) data 0.000 (0.002) loss 0.6708 (0.9189) lr 1.2487e-03 eta 4:19:28
epoch [27/50] batch [440/796] time 0.843 (0.833) data 0.000 (0.002) loss 0.2150 (0.9212) lr 1.2487e-03 eta 4:19:13
epoch [27/50] batch [460/796] time 0.831 (0.833) data 0.000 (0.002) loss 0.4524 (0.9261) lr 1.2487e-03 eta 4:18:56
epoch [27/50] batch [480/796] time 0.820 (0.833) data 0.000 (0.002) loss 0.9606 (0.9259) lr 1.2487e-03 eta 4:18:39
epoch [27/50] batch [500/796] time 0.820 (0.833) data 0.000 (0.002) loss 0.7527 (0.9186) lr 1.2487e-03 eta 4:18:21
epoch [27/50] batch [520/796] time 0.842 (0.833) data 0.000 (0.002) loss 0.4367 (0.9141) lr 1.2487e-03 eta 4:18:04
epoch [27/50] batch [540/796] time 0.824 (0.833) data 0.000 (0.002) loss 0.5262 (0.9079) lr 1.2487e-03 eta 4:17:45
epoch [27/50] batch [560/796] time 0.841 (0.833) data 0.000 (0.001) loss 1.0833 (0.9067) lr 1.2487e-03 eta 4:17:30
epoch [27/50] batch [580/796] time 0.841 (0.833) data 0.000 (0.001) loss 0.6960 (0.9033) lr 1.2487e-03 eta 4:17:13
epoch [27/50] batch [600/796] time 0.832 (0.833) data 0.000 (0.001) loss 0.7300 (0.8937) lr 1.2487e-03 eta 4:16:56
epoch [27/50] batch [620/796] time 0.821 (0.833) data 0.000 (0.001) loss 0.6079 (0.8890) lr 1.2487e-03 eta 4:16:38
epoch [27/50] batch [640/796] time 0.842 (0.833) data 0.000 (0.001) loss 1.0090 (0.8943) lr 1.2487e-03 eta 4:16:22
epoch [27/50] batch [660/796] time 0.811 (0.833) data 0.000 (0.001) loss 0.3249 (0.8873) lr 1.2487e-03 eta 4:16:04
epoch [27/50] batch [680/796] time 0.820 (0.833) data 0.000 (0.001) loss 0.6720 (0.8909) lr 1.2487e-03 eta 4:15:49
epoch [27/50] batch [700/796] time 0.821 (0.833) data 0.000 (0.001) loss 0.8849 (0.8964) lr 1.2487e-03 eta 4:15:33
epoch [27/50] batch [720/796] time 0.822 (0.833) data 0.000 (0.001) loss 1.3005 (0.8978) lr 1.2487e-03 eta 4:15:17
epoch [27/50] batch [740/796] time 0.843 (0.833) data 0.000 (0.001) loss 0.7332 (0.8996) lr 1.2487e-03 eta 4:15:02
epoch [27/50] batch [760/796] time 0.843 (0.833) data 0.000 (0.001) loss 0.6060 (0.9016) lr 1.2487e-03 eta 4:14:47
epoch [27/50] batch [780/796] time 0.834 (0.833) data 0.000 (0.001) loss 1.7367 (0.9055) lr 1.2487e-03 eta 4:14:32
epoch [28/50] batch [20/796] time 0.842 (0.872) data 0.000 (0.037) loss 0.6487 (0.9154) lr 1.1874e-03 eta 4:25:42
epoch [28/50] batch [40/796] time 0.831 (0.853) data 0.000 (0.018) loss 1.8628 (0.9646) lr 1.1874e-03 eta 4:19:44
epoch [28/50] batch [60/796] time 0.821 (0.846) data 0.001 (0.012) loss 1.7910 (0.9340) lr 1.1874e-03 eta 4:17:24
epoch [28/50] batch [80/796] time 0.843 (0.844) data 0.000 (0.009) loss 0.0971 (0.8857) lr 1.1874e-03 eta 4:16:19
epoch [28/50] batch [100/796] time 0.832 (0.842) data 0.000 (0.007) loss 1.5156 (0.9111) lr 1.1874e-03 eta 4:15:24
epoch [28/50] batch [120/796] time 0.832 (0.840) data 0.000 (0.006) loss 2.0812 (0.9300) lr 1.1874e-03 eta 4:14:37
epoch [28/50] batch [140/796] time 0.842 (0.839) data 0.000 (0.005) loss 1.1975 (0.9302) lr 1.1874e-03 eta 4:14:02
epoch [28/50] batch [160/796] time 0.833 (0.838) data 0.000 (0.005) loss 0.6006 (0.9539) lr 1.1874e-03 eta 4:13:30
epoch [28/50] batch [180/796] time 0.841 (0.837) data 0.000 (0.004) loss 1.6279 (0.9455) lr 1.1874e-03 eta 4:12:59
epoch [28/50] batch [200/796] time 0.820 (0.837) data 0.000 (0.004) loss 1.3524 (0.9444) lr 1.1874e-03 eta 4:12:27
epoch [28/50] batch [220/796] time 0.844 (0.836) data 0.000 (0.003) loss 0.3400 (0.9552) lr 1.1874e-03 eta 4:12:02
epoch [28/50] batch [240/796] time 0.821 (0.836) data 0.000 (0.003) loss 1.1432 (0.9503) lr 1.1874e-03 eta 4:11:39
epoch [28/50] batch [260/796] time 0.842 (0.835) data 0.000 (0.003) loss 0.3443 (0.9396) lr 1.1874e-03 eta 4:11:17
epoch [28/50] batch [280/796] time 0.848 (0.835) data 0.000 (0.003) loss 1.1706 (0.9307) lr 1.1874e-03 eta 4:11:01
epoch [28/50] batch [300/796] time 0.833 (0.835) data 0.000 (0.003) loss 0.6839 (0.9190) lr 1.1874e-03 eta 4:10:39
epoch [28/50] batch [320/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.9010 (0.9294) lr 1.1874e-03 eta 4:10:21
epoch [28/50] batch [340/796] time 0.841 (0.835) data 0.000 (0.002) loss 0.2404 (0.9111) lr 1.1874e-03 eta 4:10:01
epoch [28/50] batch [360/796] time 0.831 (0.835) data 0.000 (0.002) loss 1.1354 (0.9026) lr 1.1874e-03 eta 4:09:43
epoch [28/50] batch [380/796] time 0.822 (0.835) data 0.000 (0.002) loss 2.1337 (0.9003) lr 1.1874e-03 eta 4:09:25
epoch [28/50] batch [400/796] time 0.820 (0.835) data 0.000 (0.002) loss 2.7876 (0.9127) lr 1.1874e-03 eta 4:09:08
epoch [28/50] batch [420/796] time 0.833 (0.835) data 0.000 (0.002) loss 0.4045 (0.9052) lr 1.1874e-03 eta 4:08:51
epoch [28/50] batch [440/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.2313 (0.9072) lr 1.1874e-03 eta 4:08:34
epoch [28/50] batch [460/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.0562 (0.9064) lr 1.1874e-03 eta 4:08:15
epoch [28/50] batch [480/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.7376 (0.9018) lr 1.1874e-03 eta 4:07:57
epoch [28/50] batch [500/796] time 0.821 (0.834) data 0.000 (0.002) loss 1.5924 (0.9070) lr 1.1874e-03 eta 4:07:38
epoch [28/50] batch [520/796] time 0.821 (0.834) data 0.000 (0.002) loss 0.6283 (0.9083) lr 1.1874e-03 eta 4:07:19
epoch [28/50] batch [540/796] time 0.841 (0.834) data 0.000 (0.002) loss 0.2215 (0.9026) lr 1.1874e-03 eta 4:07:02
epoch [28/50] batch [560/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.2902 (0.9004) lr 1.1874e-03 eta 4:06:46
epoch [28/50] batch [580/796] time 0.834 (0.834) data 0.000 (0.001) loss 0.7898 (0.8981) lr 1.1874e-03 eta 4:06:28
epoch [28/50] batch [600/796] time 0.811 (0.834) data 0.001 (0.001) loss 0.4655 (0.8935) lr 1.1874e-03 eta 4:06:12
epoch [28/50] batch [620/796] time 0.844 (0.834) data 0.000 (0.001) loss 0.8534 (0.8911) lr 1.1874e-03 eta 4:05:55
epoch [28/50] batch [640/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.0537 (0.8842) lr 1.1874e-03 eta 4:05:38
epoch [28/50] batch [660/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.7117 (0.8830) lr 1.1874e-03 eta 4:05:21
epoch [28/50] batch [680/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.2135 (0.8770) lr 1.1874e-03 eta 4:05:03
epoch [28/50] batch [700/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.7925 (0.8832) lr 1.1874e-03 eta 4:04:46
epoch [28/50] batch [720/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.6172 (0.8871) lr 1.1874e-03 eta 4:04:31
epoch [28/50] batch [740/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.0601 (0.8862) lr 1.1874e-03 eta 4:04:13
epoch [28/50] batch [760/796] time 0.823 (0.834) data 0.000 (0.001) loss 1.6060 (0.8874) lr 1.1874e-03 eta 4:03:57
epoch [28/50] batch [780/796] time 0.833 (0.834) data 0.000 (0.001) loss 0.1369 (0.8842) lr 1.1874e-03 eta 4:03:40
epoch [29/50] batch [20/796] time 0.843 (0.872) data 0.000 (0.037) loss 1.0840 (1.1742) lr 1.1253e-03 eta 4:14:12
epoch [29/50] batch [40/796] time 0.821 (0.853) data 0.000 (0.019) loss 0.1424 (1.0187) lr 1.1253e-03 eta 4:08:21
epoch [29/50] batch [60/796] time 0.842 (0.846) data 0.001 (0.013) loss 0.9340 (0.9393) lr 1.1253e-03 eta 4:06:10
epoch [29/50] batch [80/796] time 0.842 (0.843) data 0.000 (0.010) loss 1.0948 (0.9168) lr 1.1253e-03 eta 4:04:54
epoch [29/50] batch [100/796] time 0.833 (0.841) data 0.000 (0.008) loss 0.1423 (0.9193) lr 1.1253e-03 eta 4:04:07
epoch [29/50] batch [120/796] time 0.820 (0.840) data 0.000 (0.006) loss 0.0262 (0.9113) lr 1.1253e-03 eta 4:03:30
epoch [29/50] batch [140/796] time 0.812 (0.839) data 0.000 (0.006) loss 1.4181 (0.9033) lr 1.1253e-03 eta 4:02:59
epoch [29/50] batch [160/796] time 0.843 (0.839) data 0.000 (0.005) loss 0.5736 (0.9139) lr 1.1253e-03 eta 4:02:30
epoch [29/50] batch [180/796] time 0.820 (0.838) data 0.000 (0.004) loss 1.9458 (0.9236) lr 1.1253e-03 eta 4:01:59
epoch [29/50] batch [200/796] time 0.842 (0.837) data 0.000 (0.004) loss 0.7628 (0.9190) lr 1.1253e-03 eta 4:01:36
epoch [29/50] batch [220/796] time 0.843 (0.837) data 0.000 (0.004) loss 0.1757 (0.8948) lr 1.1253e-03 eta 4:01:14
epoch [29/50] batch [240/796] time 0.832 (0.837) data 0.000 (0.003) loss 2.3123 (0.9101) lr 1.1253e-03 eta 4:00:52
epoch [29/50] batch [260/796] time 0.822 (0.837) data 0.000 (0.003) loss 0.0305 (0.9123) lr 1.1253e-03 eta 4:00:31
epoch [29/50] batch [280/796] time 0.832 (0.836) data 0.000 (0.003) loss 1.4296 (0.9275) lr 1.1253e-03 eta 4:00:10
epoch [29/50] batch [300/796] time 0.833 (0.836) data 0.000 (0.003) loss 0.5322 (0.9322) lr 1.1253e-03 eta 3:59:49
epoch [29/50] batch [320/796] time 0.821 (0.836) data 0.000 (0.003) loss 1.2802 (0.9236) lr 1.1253e-03 eta 3:59:29
epoch [29/50] batch [340/796] time 0.822 (0.836) data 0.000 (0.002) loss 0.0591 (0.9063) lr 1.1253e-03 eta 3:59:10
epoch [29/50] batch [360/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.7762 (0.9073) lr 1.1253e-03 eta 3:58:49
epoch [29/50] batch [380/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.2423 (0.9031) lr 1.1253e-03 eta 3:58:30
epoch [29/50] batch [400/796] time 0.821 (0.835) data 0.000 (0.002) loss 1.0883 (0.9067) lr 1.1253e-03 eta 3:58:11
epoch [29/50] batch [420/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.0893 (0.8974) lr 1.1253e-03 eta 3:57:53
epoch [29/50] batch [440/796] time 0.821 (0.835) data 0.000 (0.002) loss 1.4958 (0.9147) lr 1.1253e-03 eta 3:57:33
epoch [29/50] batch [460/796] time 0.841 (0.835) data 0.000 (0.002) loss 1.5868 (0.9149) lr 1.1253e-03 eta 3:57:16
epoch [29/50] batch [480/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.6040 (0.9113) lr 1.1253e-03 eta 3:56:58
epoch [29/50] batch [500/796] time 0.811 (0.835) data 0.000 (0.002) loss 0.2203 (0.9069) lr 1.1253e-03 eta 3:56:42
epoch [29/50] batch [520/796] time 0.813 (0.835) data 0.000 (0.002) loss 0.4814 (0.8930) lr 1.1253e-03 eta 3:56:24
epoch [29/50] batch [540/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.4371 (0.8950) lr 1.1253e-03 eta 3:56:06
epoch [29/50] batch [560/796] time 0.844 (0.835) data 0.000 (0.002) loss 0.1913 (0.8878) lr 1.1253e-03 eta 3:55:49
epoch [29/50] batch [580/796] time 0.812 (0.835) data 0.000 (0.001) loss 0.8259 (0.8849) lr 1.1253e-03 eta 3:55:33
epoch [29/50] batch [600/796] time 0.825 (0.835) data 0.001 (0.001) loss 1.0037 (0.8846) lr 1.1253e-03 eta 3:55:16
epoch [29/50] batch [620/796] time 0.842 (0.835) data 0.000 (0.001) loss 1.8167 (0.8928) lr 1.1253e-03 eta 3:54:59
epoch [29/50] batch [640/796] time 0.822 (0.835) data 0.000 (0.001) loss 0.8004 (0.8912) lr 1.1253e-03 eta 3:54:43
epoch [29/50] batch [660/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.8449 (0.8996) lr 1.1253e-03 eta 3:54:25
epoch [29/50] batch [680/796] time 0.831 (0.835) data 0.000 (0.001) loss 2.6695 (0.8936) lr 1.1253e-03 eta 3:54:09
epoch [29/50] batch [700/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.3588 (0.8899) lr 1.1253e-03 eta 3:53:50
epoch [29/50] batch [720/796] time 0.834 (0.835) data 0.000 (0.001) loss 2.3267 (0.8868) lr 1.1253e-03 eta 3:53:33
epoch [29/50] batch [740/796] time 0.832 (0.835) data 0.000 (0.001) loss 2.2504 (0.8891) lr 1.1253e-03 eta 3:53:17
epoch [29/50] batch [760/796] time 0.823 (0.835) data 0.000 (0.001) loss 1.3399 (0.8873) lr 1.1253e-03 eta 3:53:00
epoch [29/50] batch [780/796] time 0.841 (0.835) data 0.000 (0.001) loss 1.6833 (0.8924) lr 1.1253e-03 eta 3:52:43
epoch [30/50] batch [20/796] time 0.837 (0.864) data 0.000 (0.035) loss 1.3384 (1.0129) lr 1.0628e-03 eta 4:00:29
epoch [30/50] batch [40/796] time 0.836 (0.848) data 0.000 (0.018) loss 1.4588 (0.9156) lr 1.0628e-03 eta 3:55:38
epoch [30/50] batch [60/796] time 0.816 (0.841) data 0.000 (0.012) loss 0.1736 (0.8496) lr 1.0628e-03 eta 3:53:33
epoch [30/50] batch [80/796] time 0.838 (0.839) data 0.000 (0.009) loss 0.6958 (0.9151) lr 1.0628e-03 eta 3:52:29
epoch [30/50] batch [100/796] time 0.830 (0.837) data 0.000 (0.007) loss 0.9261 (0.8643) lr 1.0628e-03 eta 3:51:39
epoch [30/50] batch [120/796] time 0.818 (0.835) data 0.000 (0.006) loss 1.2146 (0.8265) lr 1.0628e-03 eta 3:51:01
epoch [30/50] batch [140/796] time 0.829 (0.834) data 0.000 (0.005) loss 0.1547 (0.8175) lr 1.0628e-03 eta 3:50:32
epoch [30/50] batch [160/796] time 0.829 (0.834) data 0.000 (0.005) loss 0.2755 (0.8013) lr 1.0628e-03 eta 3:49:59
epoch [30/50] batch [180/796] time 0.837 (0.833) data 0.000 (0.004) loss 0.2209 (0.8140) lr 1.0628e-03 eta 3:49:31
epoch [30/50] batch [200/796] time 0.827 (0.832) data 0.000 (0.004) loss 0.9881 (0.8275) lr 1.0628e-03 eta 3:49:08
epoch [30/50] batch [220/796] time 0.837 (0.832) data 0.000 (0.003) loss 0.4011 (0.8498) lr 1.0628e-03 eta 3:48:41
epoch [30/50] batch [240/796] time 0.836 (0.831) data 0.000 (0.003) loss 0.5179 (0.8677) lr 1.0628e-03 eta 3:48:17
epoch [30/50] batch [260/796] time 0.838 (0.831) data 0.000 (0.003) loss 0.8689 (0.8725) lr 1.0628e-03 eta 3:47:53
epoch [30/50] batch [280/796] time 0.837 (0.831) data 0.000 (0.003) loss 0.6033 (0.8791) lr 1.0628e-03 eta 3:47:33
epoch [30/50] batch [300/796] time 0.836 (0.830) data 0.000 (0.003) loss 1.3115 (0.8876) lr 1.0628e-03 eta 3:47:13
epoch [30/50] batch [320/796] time 0.839 (0.830) data 0.000 (0.002) loss 0.1156 (0.8801) lr 1.0628e-03 eta 3:46:54
epoch [30/50] batch [340/796] time 0.837 (0.830) data 0.000 (0.002) loss 1.5231 (0.8811) lr 1.0628e-03 eta 3:46:37
epoch [30/50] batch [360/796] time 0.816 (0.830) data 0.000 (0.002) loss 1.1258 (0.8796) lr 1.0628e-03 eta 3:46:20
epoch [30/50] batch [380/796] time 0.838 (0.830) data 0.000 (0.002) loss 1.8324 (0.8894) lr 1.0628e-03 eta 3:46:03
epoch [30/50] batch [400/796] time 0.829 (0.830) data 0.000 (0.002) loss 0.6155 (0.8865) lr 1.0628e-03 eta 3:45:45
epoch [30/50] batch [420/796] time 0.810 (0.830) data 0.000 (0.002) loss 0.4183 (0.8836) lr 1.0628e-03 eta 3:45:27
epoch [30/50] batch [440/796] time 0.827 (0.830) data 0.000 (0.002) loss 0.9859 (0.8804) lr 1.0628e-03 eta 3:45:11
epoch [30/50] batch [460/796] time 0.837 (0.830) data 0.000 (0.002) loss 1.1635 (0.8893) lr 1.0628e-03 eta 3:44:54
epoch [30/50] batch [480/796] time 0.837 (0.830) data 0.000 (0.002) loss 3.2666 (0.8893) lr 1.0628e-03 eta 3:44:35
epoch [30/50] batch [500/796] time 0.836 (0.830) data 0.000 (0.002) loss 1.4118 (0.8797) lr 1.0628e-03 eta 3:44:17
epoch [30/50] batch [520/796] time 0.838 (0.830) data 0.000 (0.002) loss 0.1193 (0.8822) lr 1.0628e-03 eta 3:44:00
epoch [30/50] batch [540/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.4308 (0.8756) lr 1.0628e-03 eta 3:43:42
epoch [30/50] batch [560/796] time 0.836 (0.830) data 0.000 (0.001) loss 1.5786 (0.8738) lr 1.0628e-03 eta 3:43:25
epoch [30/50] batch [580/796] time 0.838 (0.830) data 0.000 (0.001) loss 2.6390 (0.8815) lr 1.0628e-03 eta 3:43:09
epoch [30/50] batch [600/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.1733 (0.8814) lr 1.0628e-03 eta 3:42:52
epoch [30/50] batch [620/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.7072 (0.8824) lr 1.0628e-03 eta 3:42:35
epoch [30/50] batch [640/796] time 0.829 (0.830) data 0.000 (0.001) loss 1.3351 (0.8781) lr 1.0628e-03 eta 3:42:19
epoch [30/50] batch [660/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.9553 (0.8727) lr 1.0628e-03 eta 3:42:03
epoch [30/50] batch [680/796] time 0.818 (0.830) data 0.000 (0.001) loss 0.4675 (0.8761) lr 1.0628e-03 eta 3:41:45
epoch [30/50] batch [700/796] time 0.839 (0.830) data 0.000 (0.001) loss 1.2731 (0.8776) lr 1.0628e-03 eta 3:41:28
epoch [30/50] batch [720/796] time 0.816 (0.830) data 0.000 (0.001) loss 1.0071 (0.8754) lr 1.0628e-03 eta 3:41:11
epoch [30/50] batch [740/796] time 0.836 (0.830) data 0.000 (0.001) loss 2.1767 (0.8790) lr 1.0628e-03 eta 3:40:54
epoch [30/50] batch [760/796] time 0.829 (0.830) data 0.000 (0.001) loss 0.4793 (0.8759) lr 1.0628e-03 eta 3:40:38
epoch [30/50] batch [780/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.4288 (0.8744) lr 1.0628e-03 eta 3:40:21
epoch [31/50] batch [20/796] time 0.838 (0.867) data 0.000 (0.036) loss 1.3354 (0.8864) lr 1.0000e-03 eta 3:49:48
epoch [31/50] batch [40/796] time 0.817 (0.848) data 0.000 (0.018) loss 0.5398 (0.8711) lr 1.0000e-03 eta 3:44:26
epoch [31/50] batch [60/796] time 0.829 (0.842) data 0.000 (0.012) loss 0.5796 (0.8744) lr 1.0000e-03 eta 3:42:29
epoch [31/50] batch [80/796] time 0.841 (0.839) data 0.000 (0.009) loss 0.1562 (0.8945) lr 1.0000e-03 eta 3:41:34
epoch [31/50] batch [100/796] time 0.829 (0.838) data 0.000 (0.007) loss 0.3195 (0.8831) lr 1.0000e-03 eta 3:40:49
epoch [31/50] batch [120/796] time 0.838 (0.836) data 0.000 (0.006) loss 0.1196 (0.8729) lr 1.0000e-03 eta 3:40:09
epoch [31/50] batch [140/796] time 0.818 (0.835) data 0.000 (0.005) loss 0.8499 (0.8680) lr 1.0000e-03 eta 3:39:32
epoch [31/50] batch [160/796] time 0.809 (0.834) data 0.000 (0.005) loss 0.4442 (0.8689) lr 1.0000e-03 eta 3:39:08
epoch [31/50] batch [180/796] time 0.838 (0.834) data 0.000 (0.004) loss 0.1570 (0.8591) lr 1.0000e-03 eta 3:38:48
epoch [31/50] batch [200/796] time 0.818 (0.834) data 0.000 (0.004) loss 1.1023 (0.8561) lr 1.0000e-03 eta 3:38:24
epoch [31/50] batch [220/796] time 0.817 (0.833) data 0.000 (0.003) loss 0.8213 (0.8703) lr 1.0000e-03 eta 3:38:02
epoch [31/50] batch [240/796] time 0.817 (0.833) data 0.000 (0.003) loss 0.3984 (0.8827) lr 1.0000e-03 eta 3:37:44
epoch [31/50] batch [260/796] time 0.838 (0.833) data 0.000 (0.003) loss 0.9082 (0.8622) lr 1.0000e-03 eta 3:37:26
epoch [31/50] batch [280/796] time 0.831 (0.833) data 0.000 (0.003) loss 0.2707 (0.8641) lr 1.0000e-03 eta 3:37:05
epoch [31/50] batch [300/796] time 0.828 (0.833) data 0.000 (0.003) loss 0.5718 (0.8750) lr 1.0000e-03 eta 3:36:43
epoch [31/50] batch [320/796] time 0.819 (0.832) data 0.000 (0.002) loss 0.8176 (0.8803) lr 1.0000e-03 eta 3:36:23
epoch [31/50] batch [340/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.9700 (0.8906) lr 1.0000e-03 eta 3:36:05
epoch [31/50] batch [360/796] time 0.830 (0.832) data 0.000 (0.002) loss 1.0959 (0.8993) lr 1.0000e-03 eta 3:35:45
epoch [31/50] batch [380/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.3168 (0.8993) lr 1.0000e-03 eta 3:35:28
epoch [31/50] batch [400/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.5601 (0.8973) lr 1.0000e-03 eta 3:35:11
epoch [31/50] batch [420/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.4727 (0.8923) lr 1.0000e-03 eta 3:34:54
epoch [31/50] batch [440/796] time 0.839 (0.832) data 0.000 (0.002) loss 2.7360 (0.8984) lr 1.0000e-03 eta 3:34:35
epoch [31/50] batch [460/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.2829 (0.9021) lr 1.0000e-03 eta 3:34:16
epoch [31/50] batch [480/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.9622 (0.9022) lr 1.0000e-03 eta 3:33:57
epoch [31/50] batch [500/796] time 0.829 (0.832) data 0.000 (0.002) loss 2.0710 (0.9135) lr 1.0000e-03 eta 3:33:42
epoch [31/50] batch [520/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.8771 (0.9054) lr 1.0000e-03 eta 3:33:23
epoch [31/50] batch [540/796] time 0.829 (0.831) data 0.000 (0.002) loss 3.3563 (0.9025) lr 1.0000e-03 eta 3:33:06
epoch [31/50] batch [560/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.4463 (0.8926) lr 1.0000e-03 eta 3:32:49
epoch [31/50] batch [580/796] time 0.819 (0.831) data 0.000 (0.001) loss 2.6290 (0.8885) lr 1.0000e-03 eta 3:32:31
epoch [31/50] batch [600/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.2857 (0.8905) lr 1.0000e-03 eta 3:32:13
epoch [31/50] batch [620/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.9996 (0.8876) lr 1.0000e-03 eta 3:31:55
epoch [31/50] batch [640/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.3905 (0.8923) lr 1.0000e-03 eta 3:31:38
epoch [31/50] batch [660/796] time 0.839 (0.831) data 0.000 (0.001) loss 2.2826 (0.8982) lr 1.0000e-03 eta 3:31:21
epoch [31/50] batch [680/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.6682 (0.8980) lr 1.0000e-03 eta 3:31:04
epoch [31/50] batch [700/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.1622 (0.8936) lr 1.0000e-03 eta 3:30:48
epoch [31/50] batch [720/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.2073 (0.8862) lr 1.0000e-03 eta 3:30:31
epoch [31/50] batch [740/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.6130 (0.8813) lr 1.0000e-03 eta 3:30:14
epoch [31/50] batch [760/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.3880 (0.8754) lr 1.0000e-03 eta 3:29:57
epoch [31/50] batch [780/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.2698 (0.8741) lr 1.0000e-03 eta 3:29:40
epoch [32/50] batch [20/796] time 0.837 (0.858) data 0.000 (0.028) loss 0.2776 (0.7006) lr 9.3721e-04 eta 3:36:03
epoch [32/50] batch [40/796] time 0.840 (0.844) data 0.000 (0.014) loss 0.7327 (0.7662) lr 9.3721e-04 eta 3:32:12
epoch [32/50] batch [60/796] time 0.838 (0.840) data 0.000 (0.009) loss 0.2016 (0.7977) lr 9.3721e-04 eta 3:30:56
epoch [32/50] batch [80/796] time 0.837 (0.838) data 0.000 (0.007) loss 2.1495 (0.8021) lr 9.3721e-04 eta 3:30:03
epoch [32/50] batch [100/796] time 0.831 (0.836) data 0.000 (0.006) loss 0.1976 (0.7786) lr 9.3721e-04 eta 3:29:23
epoch [32/50] batch [120/796] time 0.837 (0.835) data 0.000 (0.005) loss 0.7289 (0.7653) lr 9.3721e-04 eta 3:28:44
epoch [32/50] batch [140/796] time 0.838 (0.834) data 0.000 (0.004) loss 1.6419 (0.8098) lr 9.3721e-04 eta 3:28:18
epoch [32/50] batch [160/796] time 0.840 (0.834) data 0.000 (0.004) loss 1.7323 (0.8030) lr 9.3721e-04 eta 3:27:53
epoch [32/50] batch [180/796] time 0.837 (0.833) data 0.000 (0.003) loss 0.3846 (0.8088) lr 9.3721e-04 eta 3:27:35
epoch [32/50] batch [200/796] time 0.819 (0.833) data 0.000 (0.003) loss 0.4624 (0.8190) lr 9.3721e-04 eta 3:27:13
epoch [32/50] batch [220/796] time 0.839 (0.833) data 0.000 (0.003) loss 1.4268 (0.8420) lr 9.3721e-04 eta 3:26:51
epoch [32/50] batch [240/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.2151 (0.8452) lr 9.3721e-04 eta 3:26:29
epoch [32/50] batch [260/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.9071 (0.8563) lr 9.3721e-04 eta 3:26:10
epoch [32/50] batch [280/796] time 0.818 (0.832) data 0.000 (0.002) loss 2.5199 (0.8744) lr 9.3721e-04 eta 3:25:50
epoch [32/50] batch [300/796] time 0.840 (0.832) data 0.000 (0.002) loss 0.7879 (0.8620) lr 9.3721e-04 eta 3:25:32
epoch [32/50] batch [320/796] time 0.818 (0.832) data 0.000 (0.002) loss 1.1856 (0.8614) lr 9.3721e-04 eta 3:25:15
epoch [32/50] batch [340/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.4257 (0.8515) lr 9.3721e-04 eta 3:24:57
epoch [32/50] batch [360/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.8297 (0.8525) lr 9.3721e-04 eta 3:24:39
epoch [32/50] batch [380/796] time 0.838 (0.831) data 0.000 (0.002) loss 0.3364 (0.8444) lr 9.3721e-04 eta 3:24:18
epoch [32/50] batch [400/796] time 0.831 (0.831) data 0.000 (0.002) loss 1.3059 (0.8399) lr 9.3721e-04 eta 3:23:59
epoch [32/50] batch [420/796] time 0.837 (0.831) data 0.000 (0.002) loss 0.8555 (0.8464) lr 9.3721e-04 eta 3:23:42
epoch [32/50] batch [440/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.9159 (0.8438) lr 9.3721e-04 eta 3:23:25
epoch [32/50] batch [460/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.0349 (0.8427) lr 9.3721e-04 eta 3:23:09
epoch [32/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.6850 (0.8492) lr 9.3721e-04 eta 3:22:53
epoch [32/50] batch [500/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.4999 (0.8488) lr 9.3721e-04 eta 3:22:35
epoch [32/50] batch [520/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.3832 (0.8399) lr 9.3721e-04 eta 3:22:19
epoch [32/50] batch [540/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.0598 (0.8426) lr 9.3721e-04 eta 3:22:02
epoch [32/50] batch [560/796] time 0.832 (0.831) data 0.000 (0.001) loss 0.4871 (0.8426) lr 9.3721e-04 eta 3:21:45
epoch [32/50] batch [580/796] time 0.838 (0.831) data 0.000 (0.001) loss 2.6142 (0.8442) lr 9.3721e-04 eta 3:21:27
epoch [32/50] batch [600/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.6210 (0.8476) lr 9.3721e-04 eta 3:21:10
epoch [32/50] batch [620/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.7988 (0.8394) lr 9.3721e-04 eta 3:20:51
epoch [32/50] batch [640/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.2801 (0.8377) lr 9.3721e-04 eta 3:20:34
epoch [32/50] batch [660/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.8873 (0.8368) lr 9.3721e-04 eta 3:20:17
epoch [32/50] batch [680/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.3465 (0.8334) lr 9.3721e-04 eta 3:19:59
epoch [32/50] batch [700/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.5755 (0.8314) lr 9.3721e-04 eta 3:19:42
epoch [32/50] batch [720/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.9676 (0.8315) lr 9.3721e-04 eta 3:19:25
epoch [32/50] batch [740/796] time 0.840 (0.831) data 0.000 (0.001) loss 2.0300 (0.8268) lr 9.3721e-04 eta 3:19:09
epoch [32/50] batch [760/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.4211 (0.8282) lr 9.3721e-04 eta 3:18:51
epoch [32/50] batch [780/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.8509 (0.8299) lr 9.3721e-04 eta 3:18:35
epoch [33/50] batch [20/796] time 0.840 (0.867) data 0.000 (0.036) loss 2.3697 (0.8149) lr 8.7467e-04 eta 3:26:42
epoch [33/50] batch [40/796] time 0.839 (0.847) data 0.000 (0.018) loss 1.3003 (0.7921) lr 8.7467e-04 eta 3:21:35
epoch [33/50] batch [60/796] time 0.818 (0.841) data 0.000 (0.012) loss 1.1483 (0.7999) lr 8.7467e-04 eta 3:19:57
epoch [33/50] batch [80/796] time 0.839 (0.838) data 0.000 (0.009) loss 1.6458 (0.8637) lr 8.7467e-04 eta 3:19:02
epoch [33/50] batch [100/796] time 0.830 (0.836) data 0.000 (0.007) loss 0.3172 (0.8785) lr 8.7467e-04 eta 3:18:14
epoch [33/50] batch [120/796] time 0.840 (0.835) data 0.000 (0.006) loss 0.5099 (0.8539) lr 8.7467e-04 eta 3:17:46
epoch [33/50] batch [140/796] time 0.838 (0.835) data 0.000 (0.005) loss 1.8543 (0.8541) lr 8.7467e-04 eta 3:17:24
epoch [33/50] batch [160/796] time 0.829 (0.834) data 0.000 (0.005) loss 0.3523 (0.8721) lr 8.7467e-04 eta 3:16:57
epoch [33/50] batch [180/796] time 0.818 (0.834) data 0.000 (0.004) loss 1.3434 (0.8683) lr 8.7467e-04 eta 3:16:34
epoch [33/50] batch [200/796] time 0.818 (0.833) data 0.000 (0.004) loss 1.1941 (0.8581) lr 8.7467e-04 eta 3:16:08
epoch [33/50] batch [220/796] time 0.818 (0.833) data 0.000 (0.003) loss 0.3657 (0.8324) lr 8.7467e-04 eta 3:15:48
epoch [33/50] batch [240/796] time 0.829 (0.833) data 0.000 (0.003) loss 0.3595 (0.8610) lr 8.7467e-04 eta 3:15:30
epoch [33/50] batch [260/796] time 0.839 (0.832) data 0.000 (0.003) loss 0.5777 (0.8385) lr 8.7467e-04 eta 3:15:11
epoch [33/50] batch [280/796] time 0.838 (0.832) data 0.000 (0.003) loss 2.5738 (0.8331) lr 8.7467e-04 eta 3:14:51
epoch [33/50] batch [300/796] time 0.838 (0.832) data 0.000 (0.003) loss 0.5998 (0.8310) lr 8.7467e-04 eta 3:14:32
epoch [33/50] batch [320/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.3173 (0.8240) lr 8.7467e-04 eta 3:14:16
epoch [33/50] batch [340/796] time 0.837 (0.832) data 0.000 (0.002) loss 1.1363 (0.8338) lr 8.7467e-04 eta 3:13:57
epoch [33/50] batch [360/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.1925 (0.8493) lr 8.7467e-04 eta 3:13:40
epoch [33/50] batch [380/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.2366 (0.8524) lr 8.7467e-04 eta 3:13:24
epoch [33/50] batch [400/796] time 0.830 (0.832) data 0.000 (0.002) loss 1.1988 (0.8530) lr 8.7467e-04 eta 3:13:06
epoch [33/50] batch [420/796] time 0.841 (0.832) data 0.000 (0.002) loss 0.6402 (0.8507) lr 8.7467e-04 eta 3:12:48
epoch [33/50] batch [440/796] time 0.839 (0.832) data 0.000 (0.002) loss 5.2747 (0.8604) lr 8.7467e-04 eta 3:12:29
epoch [33/50] batch [460/796] time 0.818 (0.831) data 0.000 (0.002) loss 0.2232 (0.8606) lr 8.7467e-04 eta 3:12:09
epoch [33/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.002) loss 0.2123 (0.8632) lr 8.7467e-04 eta 3:11:51
epoch [33/50] batch [500/796] time 0.839 (0.831) data 0.000 (0.002) loss 1.5486 (0.8570) lr 8.7467e-04 eta 3:11:35
epoch [33/50] batch [520/796] time 0.829 (0.831) data 0.000 (0.002) loss 0.1090 (0.8523) lr 8.7467e-04 eta 3:11:16
epoch [33/50] batch [540/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.1065 (0.8562) lr 8.7467e-04 eta 3:10:58
epoch [33/50] batch [560/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.3424 (0.8553) lr 8.7467e-04 eta 3:10:42
epoch [33/50] batch [580/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.8276 (0.8522) lr 8.7467e-04 eta 3:10:24
epoch [33/50] batch [600/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.4217 (0.8549) lr 8.7467e-04 eta 3:10:07
epoch [33/50] batch [620/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.9651 (0.8549) lr 8.7467e-04 eta 3:09:51
epoch [33/50] batch [640/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.3341 (0.8585) lr 8.7467e-04 eta 3:09:32
epoch [33/50] batch [660/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.4998 (0.8648) lr 8.7467e-04 eta 3:09:16
epoch [33/50] batch [680/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.6807 (0.8661) lr 8.7467e-04 eta 3:08:58
epoch [33/50] batch [700/796] time 0.808 (0.831) data 0.000 (0.001) loss 1.3731 (0.8612) lr 8.7467e-04 eta 3:08:40
epoch [33/50] batch [720/796] time 0.829 (0.831) data 0.000 (0.001) loss 2.2596 (0.8608) lr 8.7467e-04 eta 3:08:23
epoch [33/50] batch [740/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.3946 (0.8701) lr 8.7467e-04 eta 3:08:06
epoch [33/50] batch [760/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.9080 (0.8676) lr 8.7467e-04 eta 3:07:50
epoch [33/50] batch [780/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.6004 (0.8607) lr 8.7467e-04 eta 3:07:33
epoch [34/50] batch [20/796] time 0.838 (0.855) data 0.000 (0.027) loss 1.5152 (0.8186) lr 8.1262e-04 eta 3:12:31
epoch [34/50] batch [40/796] time 0.830 (0.842) data 0.000 (0.014) loss 0.8028 (0.8938) lr 8.1262e-04 eta 3:09:25
epoch [34/50] batch [60/796] time 0.838 (0.838) data 0.000 (0.009) loss 0.4810 (0.8528) lr 8.1262e-04 eta 3:08:13
epoch [34/50] batch [80/796] time 0.808 (0.836) data 0.000 (0.007) loss 0.1508 (0.8400) lr 8.1262e-04 eta 3:07:29
epoch [34/50] batch [100/796] time 0.831 (0.834) data 0.000 (0.006) loss 1.4719 (0.8650) lr 8.1262e-04 eta 3:06:48
epoch [34/50] batch [120/796] time 0.832 (0.833) data 0.000 (0.005) loss 1.9599 (0.8635) lr 8.1262e-04 eta 3:06:16
epoch [34/50] batch [140/796] time 0.817 (0.833) data 0.000 (0.004) loss 0.3838 (0.8749) lr 8.1262e-04 eta 3:05:54
epoch [34/50] batch [160/796] time 0.839 (0.833) data 0.000 (0.004) loss 1.4391 (0.8424) lr 8.1262e-04 eta 3:05:37
epoch [34/50] batch [180/796] time 0.838 (0.833) data 0.000 (0.003) loss 1.6952 (0.8482) lr 8.1262e-04 eta 3:05:19
epoch [34/50] batch [200/796] time 0.830 (0.832) data 0.000 (0.003) loss 0.0504 (0.8206) lr 8.1262e-04 eta 3:04:54
epoch [34/50] batch [220/796] time 0.838 (0.832) data 0.000 (0.003) loss 0.4582 (0.8290) lr 8.1262e-04 eta 3:04:37
epoch [34/50] batch [240/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.1666 (0.8259) lr 8.1262e-04 eta 3:04:19
epoch [34/50] batch [260/796] time 0.818 (0.832) data 0.000 (0.002) loss 1.6079 (0.8391) lr 8.1262e-04 eta 3:03:59
epoch [34/50] batch [280/796] time 0.829 (0.831) data 0.000 (0.002) loss 0.9191 (0.8281) lr 8.1262e-04 eta 3:03:37
epoch [34/50] batch [300/796] time 0.823 (0.831) data 0.000 (0.002) loss 1.3848 (0.8437) lr 8.1262e-04 eta 3:03:21
epoch [34/50] batch [320/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.7094 (0.8508) lr 8.1262e-04 eta 3:03:04
epoch [34/50] batch [340/796] time 0.817 (0.831) data 0.000 (0.002) loss 2.4719 (0.8432) lr 8.1262e-04 eta 3:02:45
epoch [34/50] batch [360/796] time 0.828 (0.831) data 0.000 (0.002) loss 0.6441 (0.8420) lr 8.1262e-04 eta 3:02:27
epoch [34/50] batch [380/796] time 0.831 (0.831) data 0.000 (0.002) loss 0.1742 (0.8393) lr 8.1262e-04 eta 3:02:10
epoch [34/50] batch [400/796] time 0.817 (0.831) data 0.000 (0.002) loss 0.5567 (0.8385) lr 8.1262e-04 eta 3:01:52
epoch [34/50] batch [420/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.8576 (0.8313) lr 8.1262e-04 eta 3:01:35
epoch [34/50] batch [440/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.5051 (0.8353) lr 8.1262e-04 eta 3:01:18
epoch [34/50] batch [460/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.7675 (0.8392) lr 8.1262e-04 eta 3:01:01
epoch [34/50] batch [480/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.3432 (0.8387) lr 8.1262e-04 eta 3:00:45
epoch [34/50] batch [500/796] time 0.831 (0.831) data 0.000 (0.001) loss 1.1992 (0.8456) lr 8.1262e-04 eta 3:00:28
epoch [34/50] batch [520/796] time 0.837 (0.831) data 0.000 (0.001) loss 1.0758 (0.8452) lr 8.1262e-04 eta 3:00:12
epoch [34/50] batch [540/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.8988 (0.8522) lr 8.1262e-04 eta 2:59:56
epoch [34/50] batch [560/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.6597 (0.8551) lr 8.1262e-04 eta 2:59:39
epoch [34/50] batch [580/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.5877 (0.8516) lr 8.1262e-04 eta 2:59:22
epoch [34/50] batch [600/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.2585 (0.8534) lr 8.1262e-04 eta 2:59:05
epoch [34/50] batch [620/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.0467 (0.8563) lr 8.1262e-04 eta 2:58:48
epoch [34/50] batch [640/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.3910 (0.8528) lr 8.1262e-04 eta 2:58:31
epoch [34/50] batch [660/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.8552 (0.8584) lr 8.1262e-04 eta 2:58:14
epoch [34/50] batch [680/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.4908 (0.8554) lr 8.1262e-04 eta 2:57:56
epoch [34/50] batch [700/796] time 0.809 (0.831) data 0.000 (0.001) loss 1.3801 (0.8493) lr 8.1262e-04 eta 2:57:39
epoch [34/50] batch [720/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.1079 (0.8520) lr 8.1262e-04 eta 2:57:22
epoch [34/50] batch [740/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.0700 (0.8472) lr 8.1262e-04 eta 2:57:04
epoch [34/50] batch [760/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.3726 (0.8491) lr 8.1262e-04 eta 2:56:47
epoch [34/50] batch [780/796] time 0.839 (0.830) data 0.000 (0.001) loss 1.7792 (0.8510) lr 8.1262e-04 eta 2:56:30
epoch [35/50] batch [20/796] time 0.829 (0.857) data 0.000 (0.026) loss 1.5270 (0.8812) lr 7.5131e-04 eta 3:01:37
epoch [35/50] batch [40/796] time 0.816 (0.844) data 0.000 (0.013) loss 0.4256 (0.9609) lr 7.5131e-04 eta 2:58:35
epoch [35/50] batch [60/796] time 0.838 (0.839) data 0.000 (0.009) loss 0.8838 (0.9229) lr 7.5131e-04 eta 2:57:21
epoch [35/50] batch [80/796] time 0.823 (0.837) data 0.000 (0.007) loss 0.7782 (0.9248) lr 7.5131e-04 eta 2:56:32
epoch [35/50] batch [100/796] time 0.818 (0.836) data 0.000 (0.005) loss 0.4793 (0.8929) lr 7.5131e-04 eta 2:55:57
epoch [35/50] batch [120/796] time 0.829 (0.834) data 0.000 (0.005) loss 1.0815 (0.9117) lr 7.5131e-04 eta 2:55:27
epoch [35/50] batch [140/796] time 0.838 (0.834) data 0.000 (0.004) loss 1.7685 (0.9153) lr 7.5131e-04 eta 2:55:00
epoch [35/50] batch [160/796] time 0.838 (0.833) data 0.000 (0.003) loss 0.1248 (0.9349) lr 7.5131e-04 eta 2:54:38
epoch [35/50] batch [180/796] time 0.837 (0.833) data 0.000 (0.003) loss 1.8753 (0.9372) lr 7.5131e-04 eta 2:54:17
epoch [35/50] batch [200/796] time 0.838 (0.832) data 0.000 (0.003) loss 0.3384 (0.9475) lr 7.5131e-04 eta 2:53:54
epoch [35/50] batch [220/796] time 0.838 (0.832) data 0.000 (0.003) loss 0.9961 (0.9492) lr 7.5131e-04 eta 2:53:33
epoch [35/50] batch [240/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.6501 (0.9434) lr 7.5131e-04 eta 2:53:12
epoch [35/50] batch [260/796] time 0.833 (0.832) data 0.000 (0.002) loss 0.7243 (0.9244) lr 7.5131e-04 eta 2:52:54
epoch [35/50] batch [280/796] time 0.819 (0.832) data 0.000 (0.002) loss 0.4544 (0.9160) lr 7.5131e-04 eta 2:52:38
epoch [35/50] batch [300/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.5294 (0.8968) lr 7.5131e-04 eta 2:52:21
epoch [35/50] batch [320/796] time 0.839 (0.832) data 0.000 (0.002) loss 2.0069 (0.8974) lr 7.5131e-04 eta 2:52:05
epoch [35/50] batch [340/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.6031 (0.8829) lr 7.5131e-04 eta 2:51:48
epoch [35/50] batch [360/796] time 0.837 (0.832) data 0.000 (0.002) loss 0.0221 (0.8855) lr 7.5131e-04 eta 2:51:32
epoch [35/50] batch [380/796] time 0.830 (0.832) data 0.000 (0.002) loss 1.4908 (0.8727) lr 7.5131e-04 eta 2:51:15
epoch [35/50] batch [400/796] time 0.838 (0.832) data 0.000 (0.001) loss 1.1163 (0.8865) lr 7.5131e-04 eta 2:50:57
epoch [35/50] batch [420/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.0365 (0.8776) lr 7.5131e-04 eta 2:50:39
epoch [35/50] batch [440/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.2768 (0.8802) lr 7.5131e-04 eta 2:50:23
epoch [35/50] batch [460/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.1436 (0.8794) lr 7.5131e-04 eta 2:50:06
epoch [35/50] batch [480/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.3100 (0.8757) lr 7.5131e-04 eta 2:49:48
epoch [35/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2718 (0.8691) lr 7.5131e-04 eta 2:49:30
epoch [35/50] batch [520/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.5310 (0.8670) lr 7.5131e-04 eta 2:49:13
epoch [35/50] batch [540/796] time 0.819 (0.831) data 0.000 (0.001) loss 2.7561 (0.8637) lr 7.5131e-04 eta 2:48:56
epoch [35/50] batch [560/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.0501 (0.8607) lr 7.5131e-04 eta 2:48:39
epoch [35/50] batch [580/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.5001 (0.8498) lr 7.5131e-04 eta 2:48:21
epoch [35/50] batch [600/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.3649 (0.8454) lr 7.5131e-04 eta 2:48:04
epoch [35/50] batch [620/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.3803 (0.8399) lr 7.5131e-04 eta 2:47:47
epoch [35/50] batch [640/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.3512 (0.8413) lr 7.5131e-04 eta 2:47:29
epoch [35/50] batch [660/796] time 0.837 (0.831) data 0.000 (0.001) loss 1.1489 (0.8479) lr 7.5131e-04 eta 2:47:12
epoch [35/50] batch [680/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.4563 (0.8497) lr 7.5131e-04 eta 2:46:56
epoch [35/50] batch [700/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.8138 (0.8488) lr 7.5131e-04 eta 2:46:40
epoch [35/50] batch [720/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.4901 (0.8479) lr 7.5131e-04 eta 2:46:22
epoch [35/50] batch [740/796] time 0.842 (0.831) data 0.000 (0.001) loss 0.5032 (0.8528) lr 7.5131e-04 eta 2:46:06
epoch [35/50] batch [760/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.1681 (0.8540) lr 7.5131e-04 eta 2:45:50
epoch [35/50] batch [780/796] time 0.833 (0.831) data 0.000 (0.001) loss 0.0249 (0.8525) lr 7.5131e-04 eta 2:45:34
epoch [36/50] batch [20/796] time 0.843 (0.863) data 0.000 (0.031) loss 0.3992 (0.8231) lr 6.9098e-04 eta 2:51:31
epoch [36/50] batch [40/796] time 0.838 (0.847) data 0.000 (0.015) loss 0.8455 (0.7714) lr 6.9098e-04 eta 2:47:58
epoch [36/50] batch [60/796] time 0.813 (0.841) data 0.000 (0.010) loss 0.0321 (0.9224) lr 6.9098e-04 eta 2:46:28
epoch [36/50] batch [80/796] time 0.829 (0.838) data 0.000 (0.008) loss 0.4979 (0.8863) lr 6.9098e-04 eta 2:45:43
epoch [36/50] batch [100/796] time 0.818 (0.837) data 0.000 (0.006) loss 0.2365 (0.9025) lr 6.9098e-04 eta 2:45:07
epoch [36/50] batch [120/796] time 0.829 (0.836) data 0.000 (0.005) loss 0.3244 (0.8859) lr 6.9098e-04 eta 2:44:39
epoch [36/50] batch [140/796] time 0.838 (0.835) data 0.000 (0.005) loss 0.2230 (0.9048) lr 6.9098e-04 eta 2:44:15
epoch [36/50] batch [160/796] time 0.838 (0.835) data 0.000 (0.004) loss 0.9322 (0.9158) lr 6.9098e-04 eta 2:43:53
epoch [36/50] batch [180/796] time 0.837 (0.834) data 0.000 (0.004) loss 0.4131 (0.9151) lr 6.9098e-04 eta 2:43:31
epoch [36/50] batch [200/796] time 0.818 (0.834) data 0.000 (0.003) loss 2.6386 (0.9100) lr 6.9098e-04 eta 2:43:08
epoch [36/50] batch [220/796] time 0.818 (0.834) data 0.000 (0.003) loss 0.8432 (0.8882) lr 6.9098e-04 eta 2:42:48
epoch [36/50] batch [240/796] time 0.817 (0.833) data 0.000 (0.003) loss 2.4768 (0.9064) lr 6.9098e-04 eta 2:42:27
epoch [36/50] batch [260/796] time 0.819 (0.833) data 0.000 (0.003) loss 1.6928 (0.9061) lr 6.9098e-04 eta 2:42:08
epoch [36/50] batch [280/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.8998 (0.9109) lr 6.9098e-04 eta 2:41:49
epoch [36/50] batch [300/796] time 0.839 (0.833) data 0.000 (0.002) loss 0.4210 (0.9038) lr 6.9098e-04 eta 2:41:31
epoch [36/50] batch [320/796] time 0.839 (0.833) data 0.000 (0.002) loss 0.9793 (0.8927) lr 6.9098e-04 eta 2:41:14
epoch [36/50] batch [340/796] time 0.817 (0.832) data 0.000 (0.002) loss 1.1224 (0.9082) lr 6.9098e-04 eta 2:40:56
epoch [36/50] batch [360/796] time 0.829 (0.833) data 0.000 (0.002) loss 0.6168 (0.9030) lr 6.9098e-04 eta 2:40:40
epoch [36/50] batch [380/796] time 0.831 (0.832) data 0.000 (0.002) loss 0.0190 (0.9002) lr 6.9098e-04 eta 2:40:22
epoch [36/50] batch [400/796] time 0.842 (0.832) data 0.000 (0.002) loss 0.3610 (0.8849) lr 6.9098e-04 eta 2:40:06
epoch [36/50] batch [420/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.7070 (0.8883) lr 6.9098e-04 eta 2:39:48
epoch [36/50] batch [440/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.3073 (0.8828) lr 6.9098e-04 eta 2:39:29
epoch [36/50] batch [460/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.6914 (0.8805) lr 6.9098e-04 eta 2:39:11
epoch [36/50] batch [480/796] time 0.817 (0.832) data 0.000 (0.001) loss 0.6670 (0.8751) lr 6.9098e-04 eta 2:38:52
epoch [36/50] batch [500/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.2364 (0.8725) lr 6.9098e-04 eta 2:38:36
epoch [36/50] batch [520/796] time 0.829 (0.832) data 0.000 (0.001) loss 0.7671 (0.8738) lr 6.9098e-04 eta 2:38:18
epoch [36/50] batch [540/796] time 0.838 (0.832) data 0.000 (0.001) loss 0.4076 (0.8760) lr 6.9098e-04 eta 2:37:59
epoch [36/50] batch [560/796] time 0.826 (0.831) data 0.000 (0.001) loss 0.1563 (0.8761) lr 6.9098e-04 eta 2:37:41
epoch [36/50] batch [580/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.4458 (0.8794) lr 6.9098e-04 eta 2:37:24
epoch [36/50] batch [600/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.0316 (0.8785) lr 6.9098e-04 eta 2:37:07
epoch [36/50] batch [620/796] time 0.839 (0.831) data 0.000 (0.001) loss 2.0577 (0.8811) lr 6.9098e-04 eta 2:36:48
epoch [36/50] batch [640/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.1440 (0.8709) lr 6.9098e-04 eta 2:36:30
epoch [36/50] batch [660/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.3348 (0.8645) lr 6.9098e-04 eta 2:36:14
epoch [36/50] batch [680/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.7805 (0.8600) lr 6.9098e-04 eta 2:35:57
epoch [36/50] batch [700/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.1819 (0.8546) lr 6.9098e-04 eta 2:35:40
epoch [36/50] batch [720/796] time 0.809 (0.831) data 0.000 (0.001) loss 1.4009 (0.8575) lr 6.9098e-04 eta 2:35:23
epoch [36/50] batch [740/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.3764 (0.8600) lr 6.9098e-04 eta 2:35:06
epoch [36/50] batch [760/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.2986 (0.8570) lr 6.9098e-04 eta 2:34:48
epoch [36/50] batch [780/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.9031 (0.8587) lr 6.9098e-04 eta 2:34:31
epoch [37/50] batch [20/796] time 0.838 (0.863) data 0.000 (0.030) loss 0.3854 (0.8100) lr 6.3188e-04 eta 2:40:00
epoch [37/50] batch [40/796] time 0.818 (0.846) data 0.000 (0.015) loss 0.6095 (0.8779) lr 6.3188e-04 eta 2:36:36
epoch [37/50] batch [60/796] time 0.820 (0.840) data 0.000 (0.010) loss 0.3076 (0.7566) lr 6.3188e-04 eta 2:35:14
epoch [37/50] batch [80/796] time 0.810 (0.838) data 0.000 (0.008) loss 0.3290 (0.7651) lr 6.3188e-04 eta 2:34:31
epoch [37/50] batch [100/796] time 0.830 (0.836) data 0.000 (0.006) loss 1.7509 (0.8140) lr 6.3188e-04 eta 2:33:51
epoch [37/50] batch [120/796] time 0.839 (0.835) data 0.000 (0.005) loss 1.8552 (0.8216) lr 6.3188e-04 eta 2:33:26
epoch [37/50] batch [140/796] time 0.838 (0.835) data 0.000 (0.004) loss 0.3738 (0.8313) lr 6.3188e-04 eta 2:33:03
epoch [37/50] batch [160/796] time 0.838 (0.834) data 0.000 (0.004) loss 0.4771 (0.8526) lr 6.3188e-04 eta 2:32:37
epoch [37/50] batch [180/796] time 0.830 (0.833) data 0.000 (0.004) loss 0.0942 (0.8506) lr 6.3188e-04 eta 2:32:16
epoch [37/50] batch [200/796] time 0.817 (0.833) data 0.000 (0.003) loss 0.6727 (0.8677) lr 6.3188e-04 eta 2:31:54
epoch [37/50] batch [220/796] time 0.840 (0.833) data 0.000 (0.003) loss 1.6448 (0.8688) lr 6.3188e-04 eta 2:31:36
epoch [37/50] batch [240/796] time 0.817 (0.832) data 0.000 (0.003) loss 1.0313 (0.8865) lr 6.3188e-04 eta 2:31:16
epoch [37/50] batch [260/796] time 0.818 (0.832) data 0.000 (0.003) loss 3.7418 (0.8983) lr 6.3188e-04 eta 2:30:58
epoch [37/50] batch [280/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.2837 (0.8898) lr 6.3188e-04 eta 2:30:39
epoch [37/50] batch [300/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.3583 (0.9053) lr 6.3188e-04 eta 2:30:20
epoch [37/50] batch [320/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.2305 (0.9103) lr 6.3188e-04 eta 2:30:03
epoch [37/50] batch [340/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.2686 (0.9070) lr 6.3188e-04 eta 2:29:45
epoch [37/50] batch [360/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.5164 (0.8997) lr 6.3188e-04 eta 2:29:28
epoch [37/50] batch [380/796] time 0.837 (0.832) data 0.000 (0.002) loss 1.2924 (0.8900) lr 6.3188e-04 eta 2:29:11
epoch [37/50] batch [400/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.3167 (0.8762) lr 6.3188e-04 eta 2:28:54
epoch [37/50] batch [420/796] time 0.841 (0.832) data 0.000 (0.002) loss 0.1428 (0.8711) lr 6.3188e-04 eta 2:28:38
epoch [37/50] batch [440/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.2109 (0.8782) lr 6.3188e-04 eta 2:28:20
epoch [37/50] batch [460/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.1873 (0.8776) lr 6.3188e-04 eta 2:28:03
epoch [37/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.1404 (0.8781) lr 6.3188e-04 eta 2:27:45
epoch [37/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.0757 (0.8823) lr 6.3188e-04 eta 2:27:28
epoch [37/50] batch [520/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.5417 (0.8761) lr 6.3188e-04 eta 2:27:10
epoch [37/50] batch [540/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.0507 (0.8741) lr 6.3188e-04 eta 2:26:53
epoch [37/50] batch [560/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.0662 (0.8639) lr 6.3188e-04 eta 2:26:36
epoch [37/50] batch [580/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.8983 (0.8644) lr 6.3188e-04 eta 2:26:18
epoch [37/50] batch [600/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.4045 (0.8633) lr 6.3188e-04 eta 2:26:01
epoch [37/50] batch [620/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.9244 (0.8609) lr 6.3188e-04 eta 2:25:44
epoch [37/50] batch [640/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.4747 (0.8721) lr 6.3188e-04 eta 2:25:27
epoch [37/50] batch [660/796] time 0.839 (0.831) data 0.000 (0.001) loss 2.4392 (0.8776) lr 6.3188e-04 eta 2:25:10
epoch [37/50] batch [680/796] time 0.835 (0.831) data 0.000 (0.001) loss 0.1877 (0.8717) lr 6.3188e-04 eta 2:24:53
epoch [37/50] batch [700/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.3382 (0.8719) lr 6.3188e-04 eta 2:24:36
epoch [37/50] batch [720/796] time 0.819 (0.831) data 0.000 (0.001) loss 1.9127 (0.8741) lr 6.3188e-04 eta 2:24:19
epoch [37/50] batch [740/796] time 0.831 (0.831) data 0.000 (0.001) loss 0.3105 (0.8770) lr 6.3188e-04 eta 2:24:03
epoch [37/50] batch [760/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.8635 (0.8764) lr 6.3188e-04 eta 2:23:46
epoch [37/50] batch [780/796] time 0.844 (0.831) data 0.000 (0.001) loss 0.3531 (0.8766) lr 6.3188e-04 eta 2:23:30
epoch [38/50] batch [20/796] time 0.828 (0.858) data 0.000 (0.029) loss 1.8476 (0.7948) lr 5.7422e-04 eta 2:27:44
epoch [38/50] batch [40/796] time 0.829 (0.844) data 0.000 (0.015) loss 1.7605 (0.8026) lr 5.7422e-04 eta 2:25:00
epoch [38/50] batch [60/796] time 0.828 (0.839) data 0.000 (0.010) loss 0.5499 (0.7651) lr 5.7422e-04 eta 2:23:54
epoch [38/50] batch [80/796] time 0.808 (0.837) data 0.000 (0.007) loss 0.1229 (0.7363) lr 5.7422e-04 eta 2:23:13
epoch [38/50] batch [100/796] time 0.818 (0.835) data 0.000 (0.006) loss 0.9917 (0.7859) lr 5.7422e-04 eta 2:22:39
epoch [38/50] batch [120/796] time 0.818 (0.834) data 0.000 (0.005) loss 0.6020 (0.7800) lr 5.7422e-04 eta 2:22:14
epoch [38/50] batch [140/796] time 0.817 (0.834) data 0.000 (0.004) loss 1.3187 (0.7887) lr 5.7422e-04 eta 2:21:49
epoch [38/50] batch [160/796] time 0.818 (0.833) data 0.000 (0.004) loss 0.5048 (0.7977) lr 5.7422e-04 eta 2:21:28
epoch [38/50] batch [180/796] time 0.818 (0.833) data 0.000 (0.003) loss 2.1078 (0.8071) lr 5.7422e-04 eta 2:21:07
epoch [38/50] batch [200/796] time 0.818 (0.832) data 0.000 (0.003) loss 2.8155 (0.8258) lr 5.7422e-04 eta 2:20:48
epoch [38/50] batch [220/796] time 0.820 (0.832) data 0.000 (0.003) loss 1.9095 (0.8157) lr 5.7422e-04 eta 2:20:27
epoch [38/50] batch [240/796] time 0.819 (0.832) data 0.000 (0.003) loss 1.7874 (0.8211) lr 5.7422e-04 eta 2:20:08
epoch [38/50] batch [260/796] time 0.816 (0.832) data 0.000 (0.002) loss 1.1626 (0.8537) lr 5.7422e-04 eta 2:19:49
epoch [38/50] batch [280/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.5171 (0.8702) lr 5.7422e-04 eta 2:19:31
epoch [38/50] batch [300/796] time 0.818 (0.831) data 0.000 (0.002) loss 0.2823 (0.8677) lr 5.7422e-04 eta 2:19:13
epoch [38/50] batch [320/796] time 0.829 (0.831) data 0.000 (0.002) loss 1.1189 (0.8595) lr 5.7422e-04 eta 2:18:55
epoch [38/50] batch [340/796] time 0.839 (0.831) data 0.000 (0.002) loss 0.9214 (0.8674) lr 5.7422e-04 eta 2:18:37
epoch [38/50] batch [360/796] time 0.808 (0.831) data 0.000 (0.002) loss 1.9239 (0.8703) lr 5.7422e-04 eta 2:18:17
epoch [38/50] batch [380/796] time 0.817 (0.831) data 0.000 (0.002) loss 0.6043 (0.8608) lr 5.7422e-04 eta 2:17:59
epoch [38/50] batch [400/796] time 0.829 (0.830) data 0.000 (0.002) loss 0.1805 (0.8557) lr 5.7422e-04 eta 2:17:41
epoch [38/50] batch [420/796] time 0.819 (0.830) data 0.000 (0.002) loss 0.5148 (0.8538) lr 5.7422e-04 eta 2:17:23
epoch [38/50] batch [440/796] time 0.838 (0.830) data 0.000 (0.001) loss 1.6014 (0.8555) lr 5.7422e-04 eta 2:17:06
epoch [38/50] batch [460/796] time 0.838 (0.830) data 0.000 (0.001) loss 0.3301 (0.8547) lr 5.7422e-04 eta 2:16:49
epoch [38/50] batch [480/796] time 0.808 (0.830) data 0.000 (0.001) loss 0.4958 (0.8472) lr 5.7422e-04 eta 2:16:33
epoch [38/50] batch [500/796] time 0.809 (0.830) data 0.000 (0.001) loss 0.5657 (0.8418) lr 5.7422e-04 eta 2:16:14
epoch [38/50] batch [520/796] time 0.818 (0.830) data 0.000 (0.001) loss 1.2891 (0.8367) lr 5.7422e-04 eta 2:15:57
epoch [38/50] batch [540/796] time 0.839 (0.830) data 0.000 (0.001) loss 1.1520 (0.8374) lr 5.7422e-04 eta 2:15:42
epoch [38/50] batch [560/796] time 0.817 (0.830) data 0.000 (0.001) loss 1.0134 (0.8375) lr 5.7422e-04 eta 2:15:25
epoch [38/50] batch [580/796] time 0.828 (0.830) data 0.000 (0.001) loss 1.6108 (0.8506) lr 5.7422e-04 eta 2:15:08
epoch [38/50] batch [600/796] time 0.818 (0.830) data 0.000 (0.001) loss 0.4429 (0.8482) lr 5.7422e-04 eta 2:14:52
epoch [38/50] batch [620/796] time 0.839 (0.830) data 0.000 (0.001) loss 1.1560 (0.8539) lr 5.7422e-04 eta 2:14:35
epoch [38/50] batch [640/796] time 0.838 (0.830) data 0.000 (0.001) loss 1.0547 (0.8510) lr 5.7422e-04 eta 2:14:18
epoch [38/50] batch [660/796] time 0.820 (0.830) data 0.000 (0.001) loss 0.4062 (0.8441) lr 5.7422e-04 eta 2:14:02
epoch [38/50] batch [680/796] time 0.829 (0.830) data 0.000 (0.001) loss 1.1981 (0.8422) lr 5.7422e-04 eta 2:13:45
epoch [38/50] batch [700/796] time 0.831 (0.830) data 0.000 (0.001) loss 0.4705 (0.8379) lr 5.7422e-04 eta 2:13:29
epoch [38/50] batch [720/796] time 0.829 (0.830) data 0.000 (0.001) loss 0.4450 (0.8300) lr 5.7422e-04 eta 2:13:13
epoch [38/50] batch [740/796] time 0.818 (0.830) data 0.000 (0.001) loss 1.1052 (0.8297) lr 5.7422e-04 eta 2:12:56
epoch [38/50] batch [760/796] time 0.818 (0.830) data 0.000 (0.001) loss 1.0860 (0.8306) lr 5.7422e-04 eta 2:12:39
epoch [38/50] batch [780/796] time 0.840 (0.830) data 0.000 (0.001) loss 1.6314 (0.8371) lr 5.7422e-04 eta 2:12:23
epoch [39/50] batch [20/796] time 0.830 (0.861) data 0.000 (0.028) loss 1.1275 (0.5128) lr 5.1825e-04 eta 2:16:43
epoch [39/50] batch [40/796] time 0.838 (0.847) data 0.000 (0.014) loss 0.3119 (0.6107) lr 5.1825e-04 eta 2:14:15
epoch [39/50] batch [60/796] time 0.839 (0.842) data 0.000 (0.010) loss 0.9135 (0.6857) lr 5.1825e-04 eta 2:13:11
epoch [39/50] batch [80/796] time 0.831 (0.839) data 0.000 (0.007) loss 0.1613 (0.7279) lr 5.1825e-04 eta 2:12:22
epoch [39/50] batch [100/796] time 0.818 (0.837) data 0.000 (0.006) loss 2.8878 (0.7668) lr 5.1825e-04 eta 2:11:49
epoch [39/50] batch [120/796] time 0.838 (0.836) data 0.000 (0.005) loss 0.6869 (0.7565) lr 5.1825e-04 eta 2:11:25
epoch [39/50] batch [140/796] time 0.818 (0.835) data 0.000 (0.004) loss 0.2352 (0.7652) lr 5.1825e-04 eta 2:11:02
epoch [39/50] batch [160/796] time 0.817 (0.834) data 0.000 (0.004) loss 2.3825 (0.7793) lr 5.1825e-04 eta 2:10:36
epoch [39/50] batch [180/796] time 0.828 (0.834) data 0.000 (0.003) loss 0.7577 (0.7906) lr 5.1825e-04 eta 2:10:13
epoch [39/50] batch [200/796] time 0.817 (0.833) data 0.000 (0.003) loss 2.3425 (0.8063) lr 5.1825e-04 eta 2:09:54
epoch [39/50] batch [220/796] time 0.817 (0.833) data 0.000 (0.003) loss 1.0727 (0.8183) lr 5.1825e-04 eta 2:09:31
epoch [39/50] batch [240/796] time 0.818 (0.833) data 0.000 (0.003) loss 0.2764 (0.8162) lr 5.1825e-04 eta 2:09:12
epoch [39/50] batch [260/796] time 0.843 (0.832) data 0.000 (0.002) loss 1.2981 (0.8210) lr 5.1825e-04 eta 2:08:53
epoch [39/50] batch [280/796] time 0.821 (0.832) data 0.000 (0.002) loss 0.1581 (0.8102) lr 5.1825e-04 eta 2:08:35
epoch [39/50] batch [300/796] time 0.819 (0.832) data 0.000 (0.002) loss 0.2554 (0.8137) lr 5.1825e-04 eta 2:08:16
epoch [39/50] batch [320/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.9544 (0.8402) lr 5.1825e-04 eta 2:07:58
epoch [39/50] batch [340/796] time 0.819 (0.832) data 0.000 (0.002) loss 0.0348 (0.8426) lr 5.1825e-04 eta 2:07:41
epoch [39/50] batch [360/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.4647 (0.8425) lr 5.1825e-04 eta 2:07:25
epoch [39/50] batch [380/796] time 0.837 (0.832) data 0.000 (0.002) loss 1.2456 (0.8492) lr 5.1825e-04 eta 2:07:07
epoch [39/50] batch [400/796] time 0.838 (0.831) data 0.000 (0.002) loss 1.6577 (0.8506) lr 5.1825e-04 eta 2:06:49
epoch [39/50] batch [420/796] time 0.808 (0.831) data 0.000 (0.002) loss 0.2176 (0.8436) lr 5.1825e-04 eta 2:06:31
epoch [39/50] batch [440/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.0764 (0.8434) lr 5.1825e-04 eta 2:06:13
epoch [39/50] batch [460/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.0226 (0.8524) lr 5.1825e-04 eta 2:05:56
epoch [39/50] batch [480/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.6733 (0.8545) lr 5.1825e-04 eta 2:05:40
epoch [39/50] batch [500/796] time 0.810 (0.831) data 0.000 (0.001) loss 0.6293 (0.8610) lr 5.1825e-04 eta 2:05:22
epoch [39/50] batch [520/796] time 0.843 (0.831) data 0.000 (0.001) loss 1.3905 (0.8622) lr 5.1825e-04 eta 2:05:05
epoch [39/50] batch [540/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.5102 (0.8593) lr 5.1825e-04 eta 2:04:49
epoch [39/50] batch [560/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.6318 (0.8581) lr 5.1825e-04 eta 2:04:31
epoch [39/50] batch [580/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.0961 (0.8520) lr 5.1825e-04 eta 2:04:15
epoch [39/50] batch [600/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.4471 (0.8517) lr 5.1825e-04 eta 2:03:58
epoch [39/50] batch [620/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.8099 (0.8590) lr 5.1825e-04 eta 2:03:41
epoch [39/50] batch [640/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.2290 (0.8628) lr 5.1825e-04 eta 2:03:24
epoch [39/50] batch [660/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.1061 (0.8641) lr 5.1825e-04 eta 2:03:08
epoch [39/50] batch [680/796] time 0.810 (0.831) data 0.000 (0.001) loss 0.8401 (0.8638) lr 5.1825e-04 eta 2:02:51
epoch [39/50] batch [700/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.3000 (0.8708) lr 5.1825e-04 eta 2:02:34
epoch [39/50] batch [720/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.0265 (0.8769) lr 5.1825e-04 eta 2:02:18
epoch [39/50] batch [740/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.6209 (0.8738) lr 5.1825e-04 eta 2:02:01
epoch [39/50] batch [760/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.4766 (0.8771) lr 5.1825e-04 eta 2:01:44
epoch [39/50] batch [780/796] time 0.831 (0.831) data 0.000 (0.001) loss 0.4011 (0.8752) lr 5.1825e-04 eta 2:01:27
epoch [40/50] batch [20/796] time 0.838 (0.863) data 0.000 (0.031) loss 0.2404 (0.8702) lr 4.6417e-04 eta 2:05:40
epoch [40/50] batch [40/796] time 0.811 (0.847) data 0.000 (0.015) loss 0.5617 (0.7657) lr 4.6417e-04 eta 2:03:00
epoch [40/50] batch [60/796] time 0.818 (0.842) data 0.000 (0.010) loss 0.0844 (0.7920) lr 4.6417e-04 eta 2:02:00
epoch [40/50] batch [80/796] time 0.818 (0.839) data 0.000 (0.008) loss 0.4077 (0.8270) lr 4.6417e-04 eta 2:01:19
epoch [40/50] batch [100/796] time 0.839 (0.837) data 0.000 (0.006) loss 0.2372 (0.8154) lr 4.6417e-04 eta 2:00:46
epoch [40/50] batch [120/796] time 0.839 (0.836) data 0.000 (0.005) loss 1.9531 (0.8402) lr 4.6417e-04 eta 2:00:18
epoch [40/50] batch [140/796] time 0.829 (0.835) data 0.000 (0.005) loss 0.2672 (0.8219) lr 4.6417e-04 eta 1:59:52
epoch [40/50] batch [160/796] time 0.829 (0.834) data 0.000 (0.004) loss 1.2682 (0.8497) lr 4.6417e-04 eta 1:59:30
epoch [40/50] batch [180/796] time 0.838 (0.833) data 0.000 (0.004) loss 2.3039 (0.8591) lr 4.6417e-04 eta 1:59:07
epoch [40/50] batch [200/796] time 0.838 (0.833) data 0.000 (0.003) loss 1.2778 (0.8458) lr 4.6417e-04 eta 1:58:47
epoch [40/50] batch [220/796] time 0.847 (0.833) data 0.008 (0.003) loss 1.6504 (0.8392) lr 4.6417e-04 eta 1:58:28
epoch [40/50] batch [240/796] time 0.829 (0.833) data 0.000 (0.003) loss 0.3265 (0.8360) lr 4.6417e-04 eta 1:58:10
epoch [40/50] batch [260/796] time 0.809 (0.832) data 0.000 (0.003) loss 0.6813 (0.8599) lr 4.6417e-04 eta 1:57:50
epoch [40/50] batch [280/796] time 0.817 (0.832) data 0.000 (0.002) loss 1.9087 (0.8719) lr 4.6417e-04 eta 1:57:31
epoch [40/50] batch [300/796] time 0.842 (0.832) data 0.000 (0.002) loss 0.0214 (0.8663) lr 4.6417e-04 eta 1:57:13
epoch [40/50] batch [320/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.4278 (0.8790) lr 4.6417e-04 eta 1:56:56
epoch [40/50] batch [340/796] time 0.828 (0.832) data 0.000 (0.002) loss 0.5221 (0.8705) lr 4.6417e-04 eta 1:56:39
epoch [40/50] batch [360/796] time 0.817 (0.832) data 0.000 (0.002) loss 1.2629 (0.8778) lr 4.6417e-04 eta 1:56:22
epoch [40/50] batch [380/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.9523 (0.8736) lr 4.6417e-04 eta 1:56:05
epoch [40/50] batch [400/796] time 0.837 (0.831) data 0.000 (0.002) loss 0.4999 (0.8949) lr 4.6417e-04 eta 1:55:46
epoch [40/50] batch [420/796] time 0.829 (0.831) data 0.000 (0.002) loss 1.2311 (0.8912) lr 4.6417e-04 eta 1:55:30
epoch [40/50] batch [440/796] time 0.837 (0.831) data 0.000 (0.002) loss 0.8444 (0.8957) lr 4.6417e-04 eta 1:55:13
epoch [40/50] batch [460/796] time 0.818 (0.831) data 0.000 (0.002) loss 1.2746 (0.8950) lr 4.6417e-04 eta 1:54:55
epoch [40/50] batch [480/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.3993 (0.8845) lr 4.6417e-04 eta 1:54:38
epoch [40/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.9424 (0.8832) lr 4.6417e-04 eta 1:54:22
epoch [40/50] batch [520/796] time 0.830 (0.831) data 0.000 (0.001) loss 2.2470 (0.8851) lr 4.6417e-04 eta 1:54:05
epoch [40/50] batch [540/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.0558 (0.8785) lr 4.6417e-04 eta 1:53:48
epoch [40/50] batch [560/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.2360 (0.8761) lr 4.6417e-04 eta 1:53:30
epoch [40/50] batch [580/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.5662 (0.8830) lr 4.6417e-04 eta 1:53:13
epoch [40/50] batch [600/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.5317 (0.8720) lr 4.6417e-04 eta 1:52:56
epoch [40/50] batch [620/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.2745 (0.8698) lr 4.6417e-04 eta 1:52:40
epoch [40/50] batch [640/796] time 0.818 (0.831) data 0.000 (0.001) loss 1.2227 (0.8622) lr 4.6417e-04 eta 1:52:24
epoch [40/50] batch [660/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.8101 (0.8628) lr 4.6417e-04 eta 1:52:07
epoch [40/50] batch [680/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.6481 (0.8604) lr 4.6417e-04 eta 1:51:51
epoch [40/50] batch [700/796] time 0.808 (0.831) data 0.000 (0.001) loss 0.2569 (0.8639) lr 4.6417e-04 eta 1:51:34
epoch [40/50] batch [720/796] time 0.828 (0.831) data 0.000 (0.001) loss 0.1368 (0.8608) lr 4.6417e-04 eta 1:51:17
epoch [40/50] batch [740/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.2539 (0.8599) lr 4.6417e-04 eta 1:51:01
epoch [40/50] batch [760/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.1453 (0.8600) lr 4.6417e-04 eta 1:50:44
epoch [40/50] batch [780/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.0951 (0.8602) lr 4.6417e-04 eta 1:50:28
epoch [41/50] batch [20/796] time 0.838 (0.858) data 0.000 (0.027) loss 1.3539 (0.7042) lr 4.1221e-04 eta 1:53:28
epoch [41/50] batch [40/796] time 0.837 (0.843) data 0.000 (0.014) loss 1.5158 (0.7275) lr 4.1221e-04 eta 1:51:17
epoch [41/50] batch [60/796] time 0.818 (0.839) data 0.000 (0.009) loss 1.9232 (0.7832) lr 4.1221e-04 eta 1:50:27
epoch [41/50] batch [80/796] time 0.830 (0.837) data 0.000 (0.007) loss 1.2692 (0.7777) lr 4.1221e-04 eta 1:49:51
epoch [41/50] batch [100/796] time 0.839 (0.836) data 0.000 (0.006) loss 0.1670 (0.7883) lr 4.1221e-04 eta 1:49:31
epoch [41/50] batch [120/796] time 0.839 (0.835) data 0.000 (0.005) loss 0.5714 (0.8035) lr 4.1221e-04 eta 1:49:05
epoch [41/50] batch [140/796] time 0.820 (0.834) data 0.000 (0.004) loss 0.3568 (0.7744) lr 4.1221e-04 eta 1:48:44
epoch [41/50] batch [160/796] time 0.837 (0.834) data 0.000 (0.004) loss 1.5767 (0.8192) lr 4.1221e-04 eta 1:48:23
epoch [41/50] batch [180/796] time 0.838 (0.834) data 0.000 (0.003) loss 0.1989 (0.8258) lr 4.1221e-04 eta 1:48:06
epoch [41/50] batch [200/796] time 0.839 (0.833) data 0.000 (0.003) loss 0.1288 (0.7998) lr 4.1221e-04 eta 1:47:45
epoch [41/50] batch [220/796] time 0.818 (0.833) data 0.000 (0.003) loss 0.4059 (0.8070) lr 4.1221e-04 eta 1:47:26
epoch [41/50] batch [240/796] time 0.829 (0.832) data 0.000 (0.002) loss 0.1185 (0.8188) lr 4.1221e-04 eta 1:47:06
epoch [41/50] batch [260/796] time 0.841 (0.832) data 0.000 (0.002) loss 1.0610 (0.8306) lr 4.1221e-04 eta 1:46:47
epoch [41/50] batch [280/796] time 0.840 (0.832) data 0.000 (0.002) loss 1.8436 (0.8343) lr 4.1221e-04 eta 1:46:30
epoch [41/50] batch [300/796] time 0.842 (0.832) data 0.000 (0.002) loss 0.0961 (0.8265) lr 4.1221e-04 eta 1:46:12
epoch [41/50] batch [320/796] time 0.831 (0.832) data 0.000 (0.002) loss 0.1255 (0.8170) lr 4.1221e-04 eta 1:45:56
epoch [41/50] batch [340/796] time 0.809 (0.832) data 0.000 (0.002) loss 0.7426 (0.8057) lr 4.1221e-04 eta 1:45:39
epoch [41/50] batch [360/796] time 0.830 (0.832) data 0.000 (0.002) loss 1.7275 (0.8040) lr 4.1221e-04 eta 1:45:21
epoch [41/50] batch [380/796] time 0.830 (0.832) data 0.000 (0.002) loss 0.2838 (0.8043) lr 4.1221e-04 eta 1:45:03
epoch [41/50] batch [400/796] time 0.830 (0.831) data 0.000 (0.002) loss 1.5694 (0.7958) lr 4.1221e-04 eta 1:44:45
epoch [41/50] batch [420/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.2813 (0.7911) lr 4.1221e-04 eta 1:44:29
epoch [41/50] batch [440/796] time 0.844 (0.831) data 0.000 (0.001) loss 1.7650 (0.7980) lr 4.1221e-04 eta 1:44:12
epoch [41/50] batch [460/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.2544 (0.7859) lr 4.1221e-04 eta 1:43:55
epoch [41/50] batch [480/796] time 0.825 (0.831) data 0.000 (0.001) loss 0.6181 (0.7947) lr 4.1221e-04 eta 1:43:38
epoch [41/50] batch [500/796] time 0.840 (0.831) data 0.000 (0.001) loss 0.2341 (0.7987) lr 4.1221e-04 eta 1:43:22
epoch [41/50] batch [520/796] time 0.821 (0.831) data 0.000 (0.001) loss 0.3662 (0.7986) lr 4.1221e-04 eta 1:43:05
epoch [41/50] batch [540/796] time 0.833 (0.831) data 0.000 (0.001) loss 0.1836 (0.8066) lr 4.1221e-04 eta 1:42:48
epoch [41/50] batch [560/796] time 0.821 (0.831) data 0.000 (0.001) loss 0.7980 (0.8137) lr 4.1221e-04 eta 1:42:32
epoch [41/50] batch [580/796] time 0.821 (0.831) data 0.000 (0.001) loss 0.8052 (0.8164) lr 4.1221e-04 eta 1:42:15
epoch [41/50] batch [600/796] time 0.833 (0.832) data 0.001 (0.001) loss 0.5591 (0.8135) lr 4.1221e-04 eta 1:42:00
epoch [41/50] batch [620/796] time 0.842 (0.832) data 0.000 (0.001) loss 0.2328 (0.8090) lr 4.1221e-04 eta 1:41:43
epoch [41/50] batch [640/796] time 0.842 (0.832) data 0.000 (0.001) loss 0.9707 (0.8133) lr 4.1221e-04 eta 1:41:27
epoch [41/50] batch [660/796] time 0.832 (0.832) data 0.000 (0.001) loss 0.2837 (0.8128) lr 4.1221e-04 eta 1:41:10
epoch [41/50] batch [680/796] time 0.844 (0.832) data 0.000 (0.001) loss 1.8606 (0.8209) lr 4.1221e-04 eta 1:40:54
epoch [41/50] batch [700/796] time 0.834 (0.832) data 0.000 (0.001) loss 1.2591 (0.8252) lr 4.1221e-04 eta 1:40:38
epoch [41/50] batch [720/796] time 0.844 (0.832) data 0.000 (0.001) loss 0.0405 (0.8222) lr 4.1221e-04 eta 1:40:22
epoch [41/50] batch [740/796] time 0.821 (0.832) data 0.000 (0.001) loss 0.6841 (0.8257) lr 4.1221e-04 eta 1:40:06
epoch [41/50] batch [760/796] time 0.832 (0.832) data 0.000 (0.001) loss 1.2824 (0.8304) lr 4.1221e-04 eta 1:39:49
epoch [41/50] batch [780/796] time 0.842 (0.832) data 0.000 (0.001) loss 0.4462 (0.8310) lr 4.1221e-04 eta 1:39:33
epoch [42/50] batch [20/796] time 0.831 (0.872) data 0.000 (0.038) loss 0.6939 (1.0430) lr 3.6258e-04 eta 1:43:48
epoch [42/50] batch [40/796] time 0.844 (0.852) data 0.000 (0.019) loss 0.5109 (1.0016) lr 3.6258e-04 eta 1:41:12
epoch [42/50] batch [60/796] time 0.833 (0.846) data 0.000 (0.013) loss 0.6961 (0.9683) lr 3.6258e-04 eta 1:40:09
epoch [42/50] batch [80/796] time 0.843 (0.843) data 0.000 (0.010) loss 0.2705 (0.8901) lr 3.6258e-04 eta 1:39:29
epoch [42/50] batch [100/796] time 0.820 (0.841) data 0.000 (0.008) loss 1.0587 (0.8609) lr 3.6258e-04 eta 1:39:00
epoch [42/50] batch [120/796] time 0.843 (0.840) data 0.000 (0.007) loss 0.4806 (0.8213) lr 3.6258e-04 eta 1:38:35
epoch [42/50] batch [140/796] time 0.841 (0.839) data 0.000 (0.006) loss 0.9474 (0.8076) lr 3.6258e-04 eta 1:38:13
epoch [42/50] batch [160/796] time 0.844 (0.839) data 0.000 (0.005) loss 0.2173 (0.7924) lr 3.6258e-04 eta 1:37:53
epoch [42/50] batch [180/796] time 0.821 (0.838) data 0.000 (0.004) loss 0.0689 (0.8187) lr 3.6258e-04 eta 1:37:31
epoch [42/50] batch [200/796] time 0.843 (0.837) data 0.000 (0.004) loss 0.2049 (0.8308) lr 3.6258e-04 eta 1:37:12
epoch [42/50] batch [220/796] time 0.843 (0.837) data 0.000 (0.004) loss 1.3934 (0.8276) lr 3.6258e-04 eta 1:36:52
epoch [42/50] batch [240/796] time 0.844 (0.837) data 0.000 (0.003) loss 1.1724 (0.8269) lr 3.6258e-04 eta 1:36:34
epoch [42/50] batch [260/796] time 0.839 (0.837) data 0.000 (0.003) loss 1.9963 (0.8356) lr 3.6258e-04 eta 1:36:17
epoch [42/50] batch [280/796] time 0.817 (0.836) data 0.000 (0.003) loss 0.7739 (0.8344) lr 3.6258e-04 eta 1:35:56
epoch [42/50] batch [300/796] time 0.819 (0.836) data 0.000 (0.003) loss 0.2611 (0.8319) lr 3.6258e-04 eta 1:35:38
epoch [42/50] batch [320/796] time 0.818 (0.836) data 0.000 (0.003) loss 1.0791 (0.8123) lr 3.6258e-04 eta 1:35:19
epoch [42/50] batch [340/796] time 0.840 (0.835) data 0.000 (0.002) loss 0.3063 (0.7987) lr 3.6258e-04 eta 1:35:00
epoch [42/50] batch [360/796] time 0.839 (0.835) data 0.000 (0.002) loss 0.6425 (0.7976) lr 3.6258e-04 eta 1:34:40
epoch [42/50] batch [380/796] time 0.839 (0.835) data 0.000 (0.002) loss 1.0418 (0.7982) lr 3.6258e-04 eta 1:34:22
epoch [42/50] batch [400/796] time 0.818 (0.834) data 0.000 (0.002) loss 1.1831 (0.7870) lr 3.6258e-04 eta 1:34:03
epoch [42/50] batch [420/796] time 0.818 (0.834) data 0.000 (0.002) loss 0.6892 (0.7991) lr 3.6258e-04 eta 1:33:46
epoch [42/50] batch [440/796] time 0.839 (0.834) data 0.000 (0.002) loss 0.5732 (0.7922) lr 3.6258e-04 eta 1:33:27
epoch [42/50] batch [460/796] time 0.819 (0.834) data 0.000 (0.002) loss 0.1550 (0.7904) lr 3.6258e-04 eta 1:33:10
epoch [42/50] batch [480/796] time 0.842 (0.834) data 0.000 (0.002) loss 0.1672 (0.7862) lr 3.6258e-04 eta 1:32:53
epoch [42/50] batch [500/796] time 0.822 (0.834) data 0.000 (0.002) loss 0.5145 (0.7945) lr 3.6258e-04 eta 1:32:36
epoch [42/50] batch [520/796] time 0.833 (0.834) data 0.000 (0.002) loss 0.6179 (0.7977) lr 3.6258e-04 eta 1:32:19
epoch [42/50] batch [540/796] time 0.820 (0.834) data 0.000 (0.002) loss 1.2776 (0.7937) lr 3.6258e-04 eta 1:32:02
epoch [42/50] batch [560/796] time 0.832 (0.834) data 0.000 (0.002) loss 0.7946 (0.7959) lr 3.6258e-04 eta 1:31:46
epoch [42/50] batch [580/796] time 0.833 (0.834) data 0.000 (0.002) loss 0.2111 (0.7988) lr 3.6258e-04 eta 1:31:28
epoch [42/50] batch [600/796] time 0.844 (0.834) data 0.000 (0.001) loss 2.1773 (0.7991) lr 3.6258e-04 eta 1:31:12
epoch [42/50] batch [620/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.2796 (0.7996) lr 3.6258e-04 eta 1:30:56
epoch [42/50] batch [640/796] time 0.812 (0.834) data 0.000 (0.001) loss 0.2706 (0.7988) lr 3.6258e-04 eta 1:30:40
epoch [42/50] batch [660/796] time 0.819 (0.834) data 0.000 (0.001) loss 0.5084 (0.8042) lr 3.6258e-04 eta 1:30:22
epoch [42/50] batch [680/796] time 0.832 (0.834) data 0.000 (0.001) loss 0.6733 (0.8100) lr 3.6258e-04 eta 1:30:05
epoch [42/50] batch [700/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.6689 (0.8102) lr 3.6258e-04 eta 1:29:48
epoch [42/50] batch [720/796] time 0.835 (0.834) data 0.000 (0.001) loss 0.0982 (0.8094) lr 3.6258e-04 eta 1:29:31
epoch [42/50] batch [740/796] time 0.841 (0.834) data 0.000 (0.001) loss 0.3070 (0.8126) lr 3.6258e-04 eta 1:29:15
epoch [42/50] batch [760/796] time 0.822 (0.834) data 0.000 (0.001) loss 2.8555 (0.8212) lr 3.6258e-04 eta 1:28:58
epoch [42/50] batch [780/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.9833 (0.8273) lr 3.6258e-04 eta 1:28:42
epoch [43/50] batch [20/796] time 0.848 (0.868) data 0.000 (0.035) loss 0.9986 (1.2129) lr 3.1545e-04 eta 1:31:50
epoch [43/50] batch [40/796] time 0.825 (0.849) data 0.000 (0.018) loss 0.9686 (0.9712) lr 3.1545e-04 eta 1:29:34
epoch [43/50] batch [60/796] time 0.840 (0.845) data 0.001 (0.012) loss 0.7401 (0.9274) lr 3.1545e-04 eta 1:28:47
epoch [43/50] batch [80/796] time 0.842 (0.842) data 0.000 (0.009) loss 1.8687 (0.8708) lr 3.1545e-04 eta 1:28:14
epoch [43/50] batch [100/796] time 0.821 (0.840) data 0.000 (0.007) loss 0.3931 (0.8685) lr 3.1545e-04 eta 1:27:48
epoch [43/50] batch [120/796] time 0.833 (0.839) data 0.000 (0.006) loss 0.5628 (0.8958) lr 3.1545e-04 eta 1:27:20
epoch [43/50] batch [140/796] time 0.842 (0.838) data 0.000 (0.005) loss 1.0390 (0.8954) lr 3.1545e-04 eta 1:26:59
epoch [43/50] batch [160/796] time 0.821 (0.837) data 0.000 (0.005) loss 0.6088 (0.8910) lr 3.1545e-04 eta 1:26:38
epoch [43/50] batch [180/796] time 0.820 (0.837) data 0.000 (0.004) loss 1.9940 (0.8960) lr 3.1545e-04 eta 1:26:18
epoch [43/50] batch [200/796] time 0.834 (0.836) data 0.000 (0.004) loss 1.1368 (0.8809) lr 3.1545e-04 eta 1:25:58
epoch [43/50] batch [220/796] time 0.851 (0.836) data 0.000 (0.003) loss 1.0924 (0.8746) lr 3.1545e-04 eta 1:25:41
epoch [43/50] batch [240/796] time 0.832 (0.836) data 0.000 (0.003) loss 0.7793 (0.8659) lr 3.1545e-04 eta 1:25:23
epoch [43/50] batch [260/796] time 0.842 (0.836) data 0.000 (0.003) loss 1.0107 (0.8572) lr 3.1545e-04 eta 1:25:05
epoch [43/50] batch [280/796] time 0.843 (0.836) data 0.000 (0.003) loss 0.4027 (0.8475) lr 3.1545e-04 eta 1:24:49
epoch [43/50] batch [300/796] time 0.842 (0.836) data 0.000 (0.003) loss 0.0534 (0.8596) lr 3.1545e-04 eta 1:24:32
epoch [43/50] batch [320/796] time 0.834 (0.836) data 0.000 (0.002) loss 0.4603 (0.8597) lr 3.1545e-04 eta 1:24:14
epoch [43/50] batch [340/796] time 0.843 (0.836) data 0.000 (0.002) loss 0.3446 (0.8554) lr 3.1545e-04 eta 1:23:57
epoch [43/50] batch [360/796] time 0.823 (0.835) data 0.000 (0.002) loss 0.7513 (0.8434) lr 3.1545e-04 eta 1:23:39
epoch [43/50] batch [380/796] time 0.821 (0.835) data 0.000 (0.002) loss 1.1167 (0.8383) lr 3.1545e-04 eta 1:23:21
epoch [43/50] batch [400/796] time 0.823 (0.835) data 0.000 (0.002) loss 0.6219 (0.8435) lr 3.1545e-04 eta 1:23:03
epoch [43/50] batch [420/796] time 0.832 (0.835) data 0.000 (0.002) loss 0.3591 (0.8643) lr 3.1545e-04 eta 1:22:46
epoch [43/50] batch [440/796] time 0.823 (0.835) data 0.000 (0.002) loss 0.5452 (0.8594) lr 3.1545e-04 eta 1:22:30
epoch [43/50] batch [460/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.2306 (0.8516) lr 3.1545e-04 eta 1:22:13
epoch [43/50] batch [480/796] time 0.820 (0.835) data 0.000 (0.002) loss 1.7713 (0.8553) lr 3.1545e-04 eta 1:21:56
epoch [43/50] batch [500/796] time 0.846 (0.835) data 0.000 (0.002) loss 1.6134 (0.8631) lr 3.1545e-04 eta 1:21:38
epoch [43/50] batch [520/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.2782 (0.8602) lr 3.1545e-04 eta 1:21:22
epoch [43/50] batch [540/796] time 0.842 (0.835) data 0.000 (0.001) loss 3.0350 (0.8613) lr 3.1545e-04 eta 1:21:05
epoch [43/50] batch [560/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.1246 (0.8585) lr 3.1545e-04 eta 1:20:48
epoch [43/50] batch [580/796] time 0.834 (0.835) data 0.000 (0.001) loss 0.0681 (0.8551) lr 3.1545e-04 eta 1:20:31
epoch [43/50] batch [600/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.3134 (0.8516) lr 3.1545e-04 eta 1:20:15
epoch [43/50] batch [620/796] time 0.844 (0.835) data 0.000 (0.001) loss 1.4806 (0.8471) lr 3.1545e-04 eta 1:19:58
epoch [43/50] batch [640/796] time 0.845 (0.835) data 0.000 (0.001) loss 0.1157 (0.8462) lr 3.1545e-04 eta 1:19:41
epoch [43/50] batch [660/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.8443 (0.8468) lr 3.1545e-04 eta 1:19:24
epoch [43/50] batch [680/796] time 0.833 (0.835) data 0.000 (0.001) loss 2.2771 (0.8505) lr 3.1545e-04 eta 1:19:07
epoch [43/50] batch [700/796] time 0.812 (0.835) data 0.000 (0.001) loss 1.5066 (0.8504) lr 3.1545e-04 eta 1:18:50
epoch [43/50] batch [720/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.7752 (0.8475) lr 3.1545e-04 eta 1:18:34
epoch [43/50] batch [740/796] time 0.834 (0.835) data 0.000 (0.001) loss 1.1461 (0.8454) lr 3.1545e-04 eta 1:18:17
epoch [43/50] batch [760/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.3042 (0.8452) lr 3.1545e-04 eta 1:18:00
epoch [43/50] batch [780/796] time 0.834 (0.834) data 0.000 (0.001) loss 0.9949 (0.8453) lr 3.1545e-04 eta 1:17:42
epoch [44/50] batch [20/796] time 0.832 (0.866) data 0.000 (0.034) loss 1.1213 (0.9374) lr 2.7103e-04 eta 1:20:05
epoch [44/50] batch [40/796] time 0.832 (0.848) data 0.000 (0.017) loss 0.7344 (1.0345) lr 2.7103e-04 eta 1:18:12
epoch [44/50] batch [60/796] time 0.843 (0.843) data 0.001 (0.012) loss 0.4117 (0.9745) lr 2.7103e-04 eta 1:17:26
epoch [44/50] batch [80/796] time 0.845 (0.841) data 0.000 (0.009) loss 0.5044 (0.9537) lr 2.7103e-04 eta 1:16:57
epoch [44/50] batch [100/796] time 0.844 (0.840) data 0.000 (0.007) loss 1.1019 (0.9323) lr 2.7103e-04 eta 1:16:34
epoch [44/50] batch [120/796] time 0.820 (0.839) data 0.000 (0.006) loss 0.4548 (0.8859) lr 2.7103e-04 eta 1:16:12
epoch [44/50] batch [140/796] time 0.843 (0.838) data 0.000 (0.005) loss 1.1784 (0.8960) lr 2.7103e-04 eta 1:15:52
epoch [44/50] batch [160/796] time 0.831 (0.837) data 0.000 (0.004) loss 0.7069 (0.8787) lr 2.7103e-04 eta 1:15:31
epoch [44/50] batch [180/796] time 0.821 (0.837) data 0.000 (0.004) loss 0.9123 (0.8620) lr 2.7103e-04 eta 1:15:13
epoch [44/50] batch [200/796] time 0.832 (0.837) data 0.000 (0.004) loss 1.3457 (0.8386) lr 2.7103e-04 eta 1:14:54
epoch [44/50] batch [220/796] time 0.841 (0.836) data 0.000 (0.003) loss 2.1854 (0.8412) lr 2.7103e-04 eta 1:14:35
epoch [44/50] batch [240/796] time 0.812 (0.836) data 0.000 (0.003) loss 0.8624 (0.8601) lr 2.7103e-04 eta 1:14:17
epoch [44/50] batch [260/796] time 0.821 (0.836) data 0.000 (0.003) loss 0.7910 (0.8505) lr 2.7103e-04 eta 1:13:59
epoch [44/50] batch [280/796] time 0.844 (0.836) data 0.000 (0.003) loss 0.3606 (0.8593) lr 2.7103e-04 eta 1:13:42
epoch [44/50] batch [300/796] time 0.845 (0.836) data 0.000 (0.002) loss 1.0211 (0.8509) lr 2.7103e-04 eta 1:13:26
epoch [44/50] batch [320/796] time 0.822 (0.836) data 0.000 (0.002) loss 0.0420 (0.8335) lr 2.7103e-04 eta 1:13:09
epoch [44/50] batch [340/796] time 0.842 (0.836) data 0.000 (0.002) loss 0.6244 (0.8364) lr 2.7103e-04 eta 1:12:52
epoch [44/50] batch [360/796] time 0.844 (0.835) data 0.000 (0.002) loss 2.5054 (0.8531) lr 2.7103e-04 eta 1:12:34
epoch [44/50] batch [380/796] time 0.822 (0.835) data 0.000 (0.002) loss 2.9143 (0.8607) lr 2.7103e-04 eta 1:12:17
epoch [44/50] batch [400/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.2930 (0.8546) lr 2.7103e-04 eta 1:12:00
epoch [44/50] batch [420/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.5473 (0.8496) lr 2.7103e-04 eta 1:11:43
epoch [44/50] batch [440/796] time 0.846 (0.835) data 0.000 (0.002) loss 0.5711 (0.8529) lr 2.7103e-04 eta 1:11:25
epoch [44/50] batch [460/796] time 0.845 (0.835) data 0.000 (0.002) loss 0.0992 (0.8467) lr 2.7103e-04 eta 1:11:08
epoch [44/50] batch [480/796] time 0.844 (0.835) data 0.000 (0.002) loss 1.4043 (0.8378) lr 2.7103e-04 eta 1:10:51
epoch [44/50] batch [500/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.5661 (0.8425) lr 2.7103e-04 eta 1:10:35
epoch [44/50] batch [520/796] time 0.811 (0.835) data 0.000 (0.002) loss 0.5975 (0.8310) lr 2.7103e-04 eta 1:10:17
epoch [44/50] batch [540/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.6580 (0.8382) lr 2.7103e-04 eta 1:10:00
epoch [44/50] batch [560/796] time 0.811 (0.835) data 0.000 (0.001) loss 0.5566 (0.8350) lr 2.7103e-04 eta 1:09:43
epoch [44/50] batch [580/796] time 0.810 (0.835) data 0.000 (0.001) loss 0.0771 (0.8334) lr 2.7103e-04 eta 1:09:26
epoch [44/50] batch [600/796] time 0.842 (0.835) data 0.000 (0.001) loss 2.4269 (0.8412) lr 2.7103e-04 eta 1:09:10
epoch [44/50] batch [620/796] time 0.813 (0.835) data 0.000 (0.001) loss 0.8719 (0.8403) lr 2.7103e-04 eta 1:08:52
epoch [44/50] batch [640/796] time 0.832 (0.835) data 0.000 (0.001) loss 0.2905 (0.8333) lr 2.7103e-04 eta 1:08:36
epoch [44/50] batch [660/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.3130 (0.8291) lr 2.7103e-04 eta 1:08:19
epoch [44/50] batch [680/796] time 0.835 (0.834) data 0.000 (0.001) loss 1.0328 (0.8264) lr 2.7103e-04 eta 1:08:01
epoch [44/50] batch [700/796] time 0.832 (0.835) data 0.000 (0.001) loss 1.2374 (0.8340) lr 2.7103e-04 eta 1:07:45
epoch [44/50] batch [720/796] time 0.820 (0.834) data 0.000 (0.001) loss 0.3988 (0.8314) lr 2.7103e-04 eta 1:07:28
epoch [44/50] batch [740/796] time 0.834 (0.834) data 0.000 (0.001) loss 0.9499 (0.8319) lr 2.7103e-04 eta 1:07:11
epoch [44/50] batch [760/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.0266 (0.8256) lr 2.7103e-04 eta 1:06:54
epoch [44/50] batch [780/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.1475 (0.8225) lr 2.7103e-04 eta 1:06:38
epoch [45/50] batch [20/796] time 0.821 (0.869) data 0.000 (0.038) loss 3.5328 (0.8752) lr 2.2949e-04 eta 1:08:53
epoch [45/50] batch [40/796] time 0.819 (0.850) data 0.000 (0.019) loss 2.7778 (0.9004) lr 2.2949e-04 eta 1:07:07
epoch [45/50] batch [60/796] time 0.824 (0.845) data 0.000 (0.013) loss 0.3227 (1.0041) lr 2.2949e-04 eta 1:06:26
epoch [45/50] batch [80/796] time 0.822 (0.842) data 0.000 (0.010) loss 1.7274 (0.9689) lr 2.2949e-04 eta 1:05:54
epoch [45/50] batch [100/796] time 0.842 (0.841) data 0.000 (0.008) loss 0.8302 (0.8962) lr 2.2949e-04 eta 1:05:31
epoch [45/50] batch [120/796] time 0.842 (0.839) data 0.000 (0.006) loss 0.6859 (0.9297) lr 2.2949e-04 eta 1:05:05
epoch [45/50] batch [140/796] time 0.843 (0.838) data 0.000 (0.006) loss 0.2153 (0.9520) lr 2.2949e-04 eta 1:04:47
epoch [45/50] batch [160/796] time 0.841 (0.838) data 0.000 (0.005) loss 2.0670 (0.9340) lr 2.2949e-04 eta 1:04:26
epoch [45/50] batch [180/796] time 0.844 (0.837) data 0.000 (0.004) loss 2.1869 (0.9391) lr 2.2949e-04 eta 1:04:07
epoch [45/50] batch [200/796] time 0.832 (0.837) data 0.000 (0.004) loss 0.0806 (0.9153) lr 2.2949e-04 eta 1:03:49
epoch [45/50] batch [220/796] time 0.820 (0.837) data 0.000 (0.004) loss 1.1566 (0.9061) lr 2.2949e-04 eta 1:03:31
epoch [45/50] batch [240/796] time 0.843 (0.836) data 0.000 (0.003) loss 0.5003 (0.8736) lr 2.2949e-04 eta 1:03:12
epoch [45/50] batch [260/796] time 0.832 (0.836) data 0.000 (0.003) loss 2.4205 (0.8749) lr 2.2949e-04 eta 1:02:54
epoch [45/50] batch [280/796] time 0.832 (0.836) data 0.000 (0.003) loss 1.3073 (0.8590) lr 2.2949e-04 eta 1:02:36
epoch [45/50] batch [300/796] time 0.842 (0.835) data 0.000 (0.003) loss 1.1775 (0.8533) lr 2.2949e-04 eta 1:02:19
epoch [45/50] batch [320/796] time 0.842 (0.835) data 0.000 (0.003) loss 0.5051 (0.8412) lr 2.2949e-04 eta 1:02:02
epoch [45/50] batch [340/796] time 0.841 (0.835) data 0.000 (0.002) loss 1.3773 (0.8427) lr 2.2949e-04 eta 1:01:45
epoch [45/50] batch [360/796] time 0.844 (0.835) data 0.000 (0.002) loss 1.4888 (0.8435) lr 2.2949e-04 eta 1:01:27
epoch [45/50] batch [380/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.0999 (0.8431) lr 2.2949e-04 eta 1:01:10
epoch [45/50] batch [400/796] time 0.841 (0.835) data 0.000 (0.002) loss 2.1850 (0.8543) lr 2.2949e-04 eta 1:00:53
epoch [45/50] batch [420/796] time 0.811 (0.835) data 0.000 (0.002) loss 0.1833 (0.8531) lr 2.2949e-04 eta 1:00:36
epoch [45/50] batch [440/796] time 0.831 (0.835) data 0.000 (0.002) loss 1.0382 (0.8476) lr 2.2949e-04 eta 1:00:19
epoch [45/50] batch [460/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.2637 (0.8342) lr 2.2949e-04 eta 1:00:02
epoch [45/50] batch [480/796] time 0.845 (0.835) data 0.000 (0.002) loss 1.1969 (0.8443) lr 2.2949e-04 eta 0:59:45
epoch [45/50] batch [500/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.3856 (0.8374) lr 2.2949e-04 eta 0:59:28
epoch [45/50] batch [520/796] time 0.842 (0.835) data 0.000 (0.002) loss 1.3364 (0.8452) lr 2.2949e-04 eta 0:59:12
epoch [45/50] batch [540/796] time 0.833 (0.835) data 0.000 (0.002) loss 0.7679 (0.8422) lr 2.2949e-04 eta 0:58:55
epoch [45/50] batch [560/796] time 0.831 (0.834) data 0.000 (0.002) loss 0.7943 (0.8453) lr 2.2949e-04 eta 0:58:38
epoch [45/50] batch [580/796] time 0.812 (0.834) data 0.000 (0.002) loss 0.1197 (0.8430) lr 2.2949e-04 eta 0:58:21
epoch [45/50] batch [600/796] time 0.831 (0.834) data 0.000 (0.001) loss 0.4176 (0.8471) lr 2.2949e-04 eta 0:58:04
epoch [45/50] batch [620/796] time 0.831 (0.834) data 0.000 (0.001) loss 0.1892 (0.8458) lr 2.2949e-04 eta 0:57:47
epoch [45/50] batch [640/796] time 0.820 (0.834) data 0.000 (0.001) loss 2.8689 (0.8462) lr 2.2949e-04 eta 0:57:30
epoch [45/50] batch [660/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.0446 (0.8388) lr 2.2949e-04 eta 0:57:14
epoch [45/50] batch [680/796] time 0.821 (0.834) data 0.000 (0.001) loss 0.0418 (0.8371) lr 2.2949e-04 eta 0:56:57
epoch [45/50] batch [700/796] time 0.843 (0.834) data 0.000 (0.001) loss 0.3870 (0.8333) lr 2.2949e-04 eta 0:56:40
epoch [45/50] batch [720/796] time 0.814 (0.834) data 0.000 (0.001) loss 0.5184 (0.8372) lr 2.2949e-04 eta 0:56:23
epoch [45/50] batch [740/796] time 0.842 (0.834) data 0.000 (0.001) loss 0.9747 (0.8413) lr 2.2949e-04 eta 0:56:07
epoch [45/50] batch [760/796] time 0.831 (0.834) data 0.000 (0.001) loss 0.2938 (0.8380) lr 2.2949e-04 eta 0:55:50
epoch [45/50] batch [780/796] time 0.842 (0.834) data 0.000 (0.001) loss 1.4824 (0.8360) lr 2.2949e-04 eta 0:55:33
epoch [46/50] batch [20/796] time 0.843 (0.876) data 0.000 (0.039) loss 1.1221 (0.8250) lr 1.9098e-04 eta 0:57:48
epoch [46/50] batch [40/796] time 0.831 (0.855) data 0.000 (0.020) loss 2.3096 (0.7619) lr 1.9098e-04 eta 0:56:07
epoch [46/50] batch [60/796] time 0.820 (0.847) data 0.001 (0.013) loss 1.4506 (0.8053) lr 1.9098e-04 eta 0:55:21
epoch [46/50] batch [80/796] time 0.841 (0.844) data 0.000 (0.010) loss 0.3951 (0.8450) lr 1.9098e-04 eta 0:54:50
epoch [46/50] batch [100/796] time 0.822 (0.841) data 0.000 (0.008) loss 0.5408 (0.8160) lr 1.9098e-04 eta 0:54:24
epoch [46/50] batch [120/796] time 0.842 (0.840) data 0.000 (0.007) loss 0.3713 (0.8030) lr 1.9098e-04 eta 0:54:03
epoch [46/50] batch [140/796] time 0.843 (0.839) data 0.000 (0.006) loss 0.3026 (0.8355) lr 1.9098e-04 eta 0:53:41
epoch [46/50] batch [160/796] time 0.833 (0.838) data 0.000 (0.005) loss 0.6268 (0.8425) lr 1.9098e-04 eta 0:53:22
epoch [46/50] batch [180/796] time 0.822 (0.838) data 0.000 (0.005) loss 0.8629 (0.8296) lr 1.9098e-04 eta 0:53:03
epoch [46/50] batch [200/796] time 0.820 (0.837) data 0.000 (0.004) loss 0.5773 (0.8175) lr 1.9098e-04 eta 0:52:45
epoch [46/50] batch [220/796] time 0.848 (0.837) data 0.000 (0.004) loss 0.5283 (0.7899) lr 1.9098e-04 eta 0:52:28
epoch [46/50] batch [240/796] time 0.841 (0.837) data 0.000 (0.003) loss 0.3582 (0.7927) lr 1.9098e-04 eta 0:52:10
epoch [46/50] batch [260/796] time 0.842 (0.837) data 0.000 (0.003) loss 1.1712 (0.8042) lr 1.9098e-04 eta 0:51:52
epoch [46/50] batch [280/796] time 0.822 (0.836) data 0.000 (0.003) loss 0.2320 (0.7890) lr 1.9098e-04 eta 0:51:34
epoch [46/50] batch [300/796] time 0.833 (0.836) data 0.000 (0.003) loss 0.3944 (0.7901) lr 1.9098e-04 eta 0:51:17
epoch [46/50] batch [320/796] time 0.832 (0.836) data 0.000 (0.003) loss 0.1809 (0.7991) lr 1.9098e-04 eta 0:50:59
epoch [46/50] batch [340/796] time 0.822 (0.836) data 0.000 (0.003) loss 1.2685 (0.7993) lr 1.9098e-04 eta 0:50:42
epoch [46/50] batch [360/796] time 0.810 (0.836) data 0.000 (0.002) loss 0.9270 (0.8071) lr 1.9098e-04 eta 0:50:24
epoch [46/50] batch [380/796] time 0.821 (0.836) data 0.000 (0.002) loss 0.1358 (0.8176) lr 1.9098e-04 eta 0:50:08
epoch [46/50] batch [400/796] time 0.821 (0.835) data 0.000 (0.002) loss 0.0665 (0.8268) lr 1.9098e-04 eta 0:49:51
epoch [46/50] batch [420/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.2374 (0.8428) lr 1.9098e-04 eta 0:49:34
epoch [46/50] batch [440/796] time 0.823 (0.835) data 0.000 (0.002) loss 1.4249 (0.8427) lr 1.9098e-04 eta 0:49:16
epoch [46/50] batch [460/796] time 0.830 (0.835) data 0.000 (0.002) loss 0.7276 (0.8370) lr 1.9098e-04 eta 0:48:59
epoch [46/50] batch [480/796] time 0.822 (0.835) data 0.000 (0.002) loss 0.9645 (0.8372) lr 1.9098e-04 eta 0:48:43
epoch [46/50] batch [500/796] time 0.820 (0.835) data 0.000 (0.002) loss 0.0641 (0.8377) lr 1.9098e-04 eta 0:48:26
epoch [46/50] batch [520/796] time 0.833 (0.835) data 0.000 (0.002) loss 1.3282 (0.8493) lr 1.9098e-04 eta 0:48:09
epoch [46/50] batch [540/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.8075 (0.8501) lr 1.9098e-04 eta 0:47:53
epoch [46/50] batch [560/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.9984 (0.8520) lr 1.9098e-04 eta 0:47:36
epoch [46/50] batch [580/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.7810 (0.8487) lr 1.9098e-04 eta 0:47:19
epoch [46/50] batch [600/796] time 0.842 (0.835) data 0.001 (0.002) loss 0.7426 (0.8497) lr 1.9098e-04 eta 0:47:02
epoch [46/50] batch [620/796] time 0.833 (0.835) data 0.000 (0.001) loss 4.0126 (0.8552) lr 1.9098e-04 eta 0:46:45
epoch [46/50] batch [640/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.4184 (0.8585) lr 1.9098e-04 eta 0:46:28
epoch [46/50] batch [660/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.3643 (0.8597) lr 1.9098e-04 eta 0:46:11
epoch [46/50] batch [680/796] time 0.832 (0.835) data 0.000 (0.001) loss 1.2504 (0.8507) lr 1.9098e-04 eta 0:45:54
epoch [46/50] batch [700/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.3591 (0.8477) lr 1.9098e-04 eta 0:45:37
epoch [46/50] batch [720/796] time 0.842 (0.835) data 0.000 (0.001) loss 1.0947 (0.8501) lr 1.9098e-04 eta 0:45:20
epoch [46/50] batch [740/796] time 0.833 (0.835) data 0.000 (0.001) loss 0.7203 (0.8504) lr 1.9098e-04 eta 0:45:04
epoch [46/50] batch [760/796] time 0.822 (0.835) data 0.000 (0.001) loss 0.3876 (0.8419) lr 1.9098e-04 eta 0:44:47
epoch [46/50] batch [780/796] time 0.832 (0.835) data 0.000 (0.001) loss 1.0569 (0.8431) lr 1.9098e-04 eta 0:44:30
epoch [47/50] batch [20/796] time 0.842 (0.870) data 0.000 (0.037) loss 3.0533 (0.7448) lr 1.5567e-04 eta 0:45:52
epoch [47/50] batch [40/796] time 0.832 (0.853) data 0.000 (0.019) loss 1.5917 (0.8176) lr 1.5567e-04 eta 0:44:41
epoch [47/50] batch [60/796] time 0.820 (0.846) data 0.000 (0.012) loss 0.1777 (0.7649) lr 1.5567e-04 eta 0:44:03
epoch [47/50] batch [80/796] time 0.811 (0.843) data 0.000 (0.009) loss 0.9589 (0.7984) lr 1.5567e-04 eta 0:43:36
epoch [47/50] batch [100/796] time 0.831 (0.841) data 0.000 (0.008) loss 0.0659 (0.7968) lr 1.5567e-04 eta 0:43:13
epoch [47/50] batch [120/796] time 0.841 (0.840) data 0.000 (0.006) loss 0.1110 (0.7960) lr 1.5567e-04 eta 0:42:54
epoch [47/50] batch [140/796] time 0.842 (0.839) data 0.000 (0.005) loss 1.1789 (0.7966) lr 1.5567e-04 eta 0:42:35
epoch [47/50] batch [160/796] time 0.845 (0.839) data 0.000 (0.005) loss 0.2567 (0.7872) lr 1.5567e-04 eta 0:42:15
epoch [47/50] batch [180/796] time 0.843 (0.838) data 0.000 (0.004) loss 0.6148 (0.7815) lr 1.5567e-04 eta 0:41:58
epoch [47/50] batch [200/796] time 0.842 (0.838) data 0.000 (0.004) loss 0.9703 (0.7890) lr 1.5567e-04 eta 0:41:39
epoch [47/50] batch [220/796] time 0.832 (0.837) data 0.000 (0.004) loss 0.4157 (0.7817) lr 1.5567e-04 eta 0:41:21
epoch [47/50] batch [240/796] time 0.816 (0.837) data 0.000 (0.003) loss 0.4500 (0.8041) lr 1.5567e-04 eta 0:41:04
epoch [47/50] batch [260/796] time 0.821 (0.837) data 0.000 (0.003) loss 0.9408 (0.7886) lr 1.5567e-04 eta 0:40:46
epoch [47/50] batch [280/796] time 0.821 (0.837) data 0.000 (0.003) loss 0.7957 (0.7819) lr 1.5567e-04 eta 0:40:29
epoch [47/50] batch [300/796] time 0.841 (0.836) data 0.000 (0.003) loss 1.3523 (0.7829) lr 1.5567e-04 eta 0:40:12
epoch [47/50] batch [320/796] time 0.844 (0.836) data 0.000 (0.002) loss 0.8099 (0.7800) lr 1.5567e-04 eta 0:39:54
epoch [47/50] batch [340/796] time 0.820 (0.836) data 0.000 (0.002) loss 0.6134 (0.7842) lr 1.5567e-04 eta 0:39:37
epoch [47/50] batch [360/796] time 0.842 (0.836) data 0.000 (0.002) loss 0.3517 (0.7812) lr 1.5567e-04 eta 0:39:21
epoch [47/50] batch [380/796] time 0.832 (0.836) data 0.000 (0.002) loss 0.9241 (0.7852) lr 1.5567e-04 eta 0:39:04
epoch [47/50] batch [400/796] time 0.820 (0.836) data 0.000 (0.002) loss 0.2604 (0.7833) lr 1.5567e-04 eta 0:38:47
epoch [47/50] batch [420/796] time 0.820 (0.836) data 0.000 (0.002) loss 0.2612 (0.8006) lr 1.5567e-04 eta 0:38:29
epoch [47/50] batch [440/796] time 0.842 (0.836) data 0.000 (0.002) loss 0.4109 (0.8017) lr 1.5567e-04 eta 0:38:12
epoch [47/50] batch [460/796] time 0.844 (0.836) data 0.000 (0.002) loss 0.2911 (0.8064) lr 1.5567e-04 eta 0:37:55
epoch [47/50] batch [480/796] time 0.845 (0.836) data 0.000 (0.002) loss 1.6300 (0.8064) lr 1.5567e-04 eta 0:37:39
epoch [47/50] batch [500/796] time 0.842 (0.836) data 0.000 (0.002) loss 2.4864 (0.8078) lr 1.5567e-04 eta 0:37:22
epoch [47/50] batch [520/796] time 0.811 (0.836) data 0.000 (0.002) loss 1.5489 (0.8105) lr 1.5567e-04 eta 0:37:05
epoch [47/50] batch [540/796] time 0.843 (0.835) data 0.000 (0.002) loss 1.6906 (0.8102) lr 1.5567e-04 eta 0:36:48
epoch [47/50] batch [560/796] time 0.842 (0.835) data 0.000 (0.002) loss 0.0501 (0.8114) lr 1.5567e-04 eta 0:36:32
epoch [47/50] batch [580/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.1057 (0.8089) lr 1.5567e-04 eta 0:36:15
epoch [47/50] batch [600/796] time 0.841 (0.835) data 0.001 (0.001) loss 0.5913 (0.8043) lr 1.5567e-04 eta 0:35:58
epoch [47/50] batch [620/796] time 0.821 (0.835) data 0.000 (0.001) loss 0.0763 (0.7994) lr 1.5567e-04 eta 0:35:41
epoch [47/50] batch [640/796] time 0.821 (0.835) data 0.000 (0.001) loss 1.8841 (0.8061) lr 1.5567e-04 eta 0:35:24
epoch [47/50] batch [660/796] time 0.849 (0.835) data 0.000 (0.001) loss 1.2113 (0.8048) lr 1.5567e-04 eta 0:35:07
epoch [47/50] batch [680/796] time 0.816 (0.835) data 0.000 (0.001) loss 2.0226 (0.8080) lr 1.5567e-04 eta 0:34:50
epoch [47/50] batch [700/796] time 0.840 (0.835) data 0.000 (0.001) loss 0.2087 (0.8133) lr 1.5567e-04 eta 0:34:33
epoch [47/50] batch [720/796] time 0.817 (0.835) data 0.000 (0.001) loss 0.9447 (0.8143) lr 1.5567e-04 eta 0:34:16
epoch [47/50] batch [740/796] time 0.817 (0.835) data 0.000 (0.001) loss 0.7536 (0.8123) lr 1.5567e-04 eta 0:33:59
epoch [47/50] batch [760/796] time 0.839 (0.834) data 0.000 (0.001) loss 0.9648 (0.8114) lr 1.5567e-04 eta 0:33:42
epoch [47/50] batch [780/796] time 0.817 (0.834) data 0.000 (0.001) loss 0.6379 (0.8118) lr 1.5567e-04 eta 0:33:25
epoch [48/50] batch [20/796] time 0.817 (0.858) data 0.000 (0.029) loss 1.6753 (0.9229) lr 1.2369e-04 eta 0:33:50
epoch [48/50] batch [40/796] time 0.817 (0.844) data 0.000 (0.015) loss 0.8815 (1.0131) lr 1.2369e-04 eta 0:33:01
epoch [48/50] batch [60/796] time 0.838 (0.839) data 0.000 (0.010) loss 0.5330 (0.9378) lr 1.2369e-04 eta 0:32:33
epoch [48/50] batch [80/796] time 0.839 (0.837) data 0.000 (0.007) loss 0.5389 (0.8770) lr 1.2369e-04 eta 0:32:11
epoch [48/50] batch [100/796] time 0.829 (0.835) data 0.000 (0.006) loss 0.1432 (0.8407) lr 1.2369e-04 eta 0:31:51
epoch [48/50] batch [120/796] time 0.829 (0.835) data 0.000 (0.005) loss 1.7263 (0.8409) lr 1.2369e-04 eta 0:31:32
epoch [48/50] batch [140/796] time 0.818 (0.834) data 0.000 (0.004) loss 0.5274 (0.8391) lr 1.2369e-04 eta 0:31:14
epoch [48/50] batch [160/796] time 0.815 (0.834) data 0.000 (0.004) loss 1.5105 (0.8452) lr 1.2369e-04 eta 0:30:57
epoch [48/50] batch [180/796] time 0.837 (0.833) data 0.000 (0.003) loss 1.0437 (0.8306) lr 1.2369e-04 eta 0:30:40
epoch [48/50] batch [200/796] time 0.837 (0.833) data 0.000 (0.003) loss 1.1434 (0.8058) lr 1.2369e-04 eta 0:30:23
epoch [48/50] batch [220/796] time 0.838 (0.833) data 0.000 (0.003) loss 0.0529 (0.8102) lr 1.2369e-04 eta 0:30:06
epoch [48/50] batch [240/796] time 0.830 (0.833) data 0.000 (0.003) loss 0.8096 (0.8139) lr 1.2369e-04 eta 0:29:49
epoch [48/50] batch [260/796] time 0.841 (0.833) data 0.000 (0.002) loss 0.6061 (0.7914) lr 1.2369e-04 eta 0:29:32
epoch [48/50] batch [280/796] time 0.839 (0.833) data 0.000 (0.002) loss 0.4976 (0.7967) lr 1.2369e-04 eta 0:29:15
epoch [48/50] batch [300/796] time 0.840 (0.833) data 0.000 (0.002) loss 0.2838 (0.7950) lr 1.2369e-04 eta 0:28:58
epoch [48/50] batch [320/796] time 0.839 (0.833) data 0.000 (0.002) loss 0.6824 (0.8079) lr 1.2369e-04 eta 0:28:41
epoch [48/50] batch [340/796] time 0.818 (0.832) data 0.000 (0.002) loss 1.9197 (0.8254) lr 1.2369e-04 eta 0:28:24
epoch [48/50] batch [360/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.4997 (0.8109) lr 1.2369e-04 eta 0:28:07
epoch [48/50] batch [380/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.4808 (0.8158) lr 1.2369e-04 eta 0:27:50
epoch [48/50] batch [400/796] time 0.838 (0.832) data 0.000 (0.002) loss 0.6777 (0.8101) lr 1.2369e-04 eta 0:27:33
epoch [48/50] batch [420/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.3434 (0.8211) lr 1.2369e-04 eta 0:27:17
epoch [48/50] batch [440/796] time 0.838 (0.832) data 0.000 (0.002) loss 1.4857 (0.8222) lr 1.2369e-04 eta 0:27:00
epoch [48/50] batch [460/796] time 0.839 (0.832) data 0.000 (0.001) loss 0.9864 (0.8230) lr 1.2369e-04 eta 0:26:43
epoch [48/50] batch [480/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.8142 (0.8221) lr 1.2369e-04 eta 0:26:26
epoch [48/50] batch [500/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.1821 (0.8210) lr 1.2369e-04 eta 0:26:09
epoch [48/50] batch [520/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.8048 (0.8208) lr 1.2369e-04 eta 0:25:53
epoch [48/50] batch [540/796] time 0.839 (0.831) data 0.000 (0.001) loss 1.9783 (0.8162) lr 1.2369e-04 eta 0:25:36
epoch [48/50] batch [560/796] time 0.830 (0.831) data 0.000 (0.001) loss 0.1110 (0.8126) lr 1.2369e-04 eta 0:25:19
epoch [48/50] batch [580/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.5708 (0.8156) lr 1.2369e-04 eta 0:25:02
epoch [48/50] batch [600/796] time 0.839 (0.831) data 0.000 (0.001) loss 0.2121 (0.8143) lr 1.2369e-04 eta 0:24:46
epoch [48/50] batch [620/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.6974 (0.8182) lr 1.2369e-04 eta 0:24:29
epoch [48/50] batch [640/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.6072 (0.8208) lr 1.2369e-04 eta 0:24:12
epoch [48/50] batch [660/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.3703 (0.8229) lr 1.2369e-04 eta 0:23:56
epoch [48/50] batch [680/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.4725 (0.8216) lr 1.2369e-04 eta 0:23:39
epoch [48/50] batch [700/796] time 0.829 (0.831) data 0.000 (0.001) loss 0.3628 (0.8223) lr 1.2369e-04 eta 0:23:23
epoch [48/50] batch [720/796] time 0.820 (0.831) data 0.000 (0.001) loss 0.7601 (0.8199) lr 1.2369e-04 eta 0:23:06
epoch [48/50] batch [740/796] time 0.818 (0.831) data 0.000 (0.001) loss 0.1673 (0.8210) lr 1.2369e-04 eta 0:22:49
epoch [48/50] batch [760/796] time 0.819 (0.831) data 0.000 (0.001) loss 0.2069 (0.8233) lr 1.2369e-04 eta 0:22:33
epoch [48/50] batch [780/796] time 0.830 (0.831) data 0.000 (0.001) loss 1.0490 (0.8172) lr 1.2369e-04 eta 0:22:16
epoch [49/50] batch [20/796] time 0.828 (0.861) data 0.000 (0.029) loss 2.0736 (0.6999) lr 9.5173e-05 eta 0:22:33
epoch [49/50] batch [40/796] time 0.830 (0.845) data 0.000 (0.015) loss 0.9269 (0.8390) lr 9.5173e-05 eta 0:21:51
epoch [49/50] batch [60/796] time 0.829 (0.840) data 0.000 (0.010) loss 1.9913 (0.8100) lr 9.5173e-05 eta 0:21:26
epoch [49/50] batch [80/796] time 0.829 (0.837) data 0.000 (0.007) loss 0.3983 (0.7994) lr 9.5173e-05 eta 0:21:06
epoch [49/50] batch [100/796] time 0.829 (0.836) data 0.000 (0.006) loss 0.0991 (0.8268) lr 9.5173e-05 eta 0:20:46
epoch [49/50] batch [120/796] time 0.838 (0.835) data 0.000 (0.005) loss 0.9387 (0.7829) lr 9.5173e-05 eta 0:20:28
epoch [49/50] batch [140/796] time 0.809 (0.835) data 0.000 (0.004) loss 2.0631 (0.7958) lr 9.5173e-05 eta 0:20:11
epoch [49/50] batch [160/796] time 0.830 (0.834) data 0.000 (0.004) loss 1.1955 (0.7957) lr 9.5173e-05 eta 0:19:54
epoch [49/50] batch [180/796] time 0.839 (0.834) data 0.000 (0.003) loss 1.2620 (0.7821) lr 9.5173e-05 eta 0:19:37
epoch [49/50] batch [200/796] time 0.829 (0.833) data 0.000 (0.003) loss 0.0879 (0.7788) lr 9.5173e-05 eta 0:19:19
epoch [49/50] batch [220/796] time 0.842 (0.833) data 0.000 (0.003) loss 0.5502 (0.7708) lr 9.5173e-05 eta 0:19:03
epoch [49/50] batch [240/796] time 0.839 (0.833) data 0.000 (0.003) loss 0.5831 (0.7626) lr 9.5173e-05 eta 0:18:46
epoch [49/50] batch [260/796] time 0.838 (0.833) data 0.000 (0.002) loss 0.1171 (0.7542) lr 9.5173e-05 eta 0:18:29
epoch [49/50] batch [280/796] time 0.809 (0.833) data 0.000 (0.002) loss 1.0336 (0.7691) lr 9.5173e-05 eta 0:18:12
epoch [49/50] batch [300/796] time 0.839 (0.833) data 0.000 (0.002) loss 0.9136 (0.7778) lr 9.5173e-05 eta 0:17:55
epoch [49/50] batch [320/796] time 0.838 (0.833) data 0.000 (0.002) loss 1.5761 (0.7972) lr 9.5173e-05 eta 0:17:38
epoch [49/50] batch [340/796] time 0.818 (0.832) data 0.000 (0.002) loss 0.2680 (0.7887) lr 9.5173e-05 eta 0:17:22
epoch [49/50] batch [360/796] time 0.819 (0.832) data 0.000 (0.002) loss 0.6615 (0.7883) lr 9.5173e-05 eta 0:17:05
epoch [49/50] batch [380/796] time 0.840 (0.832) data 0.000 (0.002) loss 0.3086 (0.7998) lr 9.5173e-05 eta 0:16:48
epoch [49/50] batch [400/796] time 0.839 (0.832) data 0.000 (0.002) loss 0.5251 (0.8049) lr 9.5173e-05 eta 0:16:31
epoch [49/50] batch [420/796] time 0.839 (0.832) data 0.000 (0.002) loss 1.4083 (0.8214) lr 9.5173e-05 eta 0:16:15
epoch [49/50] batch [440/796] time 0.817 (0.832) data 0.000 (0.002) loss 0.0727 (0.8229) lr 9.5173e-05 eta 0:15:58
epoch [49/50] batch [460/796] time 0.818 (0.832) data 0.000 (0.001) loss 0.3357 (0.8159) lr 9.5173e-05 eta 0:15:41
epoch [49/50] batch [480/796] time 0.829 (0.832) data 0.000 (0.001) loss 1.9987 (0.8193) lr 9.5173e-05 eta 0:15:24
epoch [49/50] batch [500/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.0219 (0.8242) lr 9.5173e-05 eta 0:15:07
epoch [49/50] batch [520/796] time 0.817 (0.831) data 0.000 (0.001) loss 0.0158 (0.8092) lr 9.5173e-05 eta 0:14:51
epoch [49/50] batch [540/796] time 0.838 (0.831) data 0.000 (0.001) loss 1.4293 (0.8213) lr 9.5173e-05 eta 0:14:34
epoch [49/50] batch [560/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.1463 (0.8274) lr 9.5173e-05 eta 0:14:17
epoch [49/50] batch [580/796] time 0.809 (0.831) data 0.000 (0.001) loss 0.5220 (0.8230) lr 9.5173e-05 eta 0:14:01
epoch [49/50] batch [600/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.3175 (0.8156) lr 9.5173e-05 eta 0:13:44
epoch [49/50] batch [620/796] time 0.840 (0.831) data 0.000 (0.001) loss 1.4933 (0.8178) lr 9.5173e-05 eta 0:13:27
epoch [49/50] batch [640/796] time 0.817 (0.831) data 0.000 (0.001) loss 1.2883 (0.8179) lr 9.5173e-05 eta 0:13:11
epoch [49/50] batch [660/796] time 0.838 (0.831) data 0.000 (0.001) loss 2.5090 (0.8091) lr 9.5173e-05 eta 0:12:54
epoch [49/50] batch [680/796] time 0.839 (0.831) data 0.000 (0.001) loss 3.8591 (0.8111) lr 9.5173e-05 eta 0:12:37
epoch [49/50] batch [700/796] time 0.837 (0.831) data 0.000 (0.001) loss 0.5793 (0.8169) lr 9.5173e-05 eta 0:12:21
epoch [49/50] batch [720/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.3755 (0.8120) lr 9.5173e-05 eta 0:12:04
epoch [49/50] batch [740/796] time 0.829 (0.831) data 0.000 (0.001) loss 1.7465 (0.8120) lr 9.5173e-05 eta 0:11:47
epoch [49/50] batch [760/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.9374 (0.8150) lr 9.5173e-05 eta 0:11:31
epoch [49/50] batch [780/796] time 0.838 (0.831) data 0.000 (0.001) loss 0.7182 (0.8183) lr 9.5173e-05 eta 0:11:14
epoch [50/50] batch [20/796] time 0.838 (0.853) data 0.000 (0.026) loss 0.3078 (0.8625) lr 7.0224e-05 eta 0:11:02
epoch [50/50] batch [40/796] time 0.839 (0.840) data 0.000 (0.013) loss 0.5921 (0.8625) lr 7.0224e-05 eta 0:10:35
epoch [50/50] batch [60/796] time 0.809 (0.837) data 0.000 (0.009) loss 0.6624 (0.8631) lr 7.0224e-05 eta 0:10:16
epoch [50/50] batch [80/796] time 0.845 (0.837) data 0.002 (0.007) loss 0.4684 (0.8559) lr 7.0224e-05 eta 0:09:59
epoch [50/50] batch [100/796] time 0.843 (0.836) data 0.000 (0.005) loss 1.0632 (0.8556) lr 7.0224e-05 eta 0:09:41
epoch [50/50] batch [120/796] time 0.843 (0.836) data 0.000 (0.005) loss 0.3474 (0.8241) lr 7.0224e-05 eta 0:09:24
epoch [50/50] batch [140/796] time 0.821 (0.836) data 0.000 (0.004) loss 0.2109 (0.8270) lr 7.0224e-05 eta 0:09:08
epoch [50/50] batch [160/796] time 0.826 (0.836) data 0.000 (0.003) loss 1.6538 (0.8552) lr 7.0224e-05 eta 0:08:51
epoch [50/50] batch [180/796] time 0.836 (0.835) data 0.000 (0.003) loss 0.7279 (0.8390) lr 7.0224e-05 eta 0:08:34
epoch [50/50] batch [200/796] time 0.843 (0.835) data 0.000 (0.003) loss 0.5166 (0.8381) lr 7.0224e-05 eta 0:08:17
epoch [50/50] batch [220/796] time 0.819 (0.835) data 0.000 (0.003) loss 0.0883 (0.8272) lr 7.0224e-05 eta 0:08:00
epoch [50/50] batch [240/796] time 0.822 (0.835) data 0.000 (0.002) loss 1.6860 (0.8224) lr 7.0224e-05 eta 0:07:44
epoch [50/50] batch [260/796] time 0.843 (0.835) data 0.000 (0.002) loss 1.8206 (0.8273) lr 7.0224e-05 eta 0:07:27
epoch [50/50] batch [280/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.4455 (0.8253) lr 7.0224e-05 eta 0:07:10
epoch [50/50] batch [300/796] time 0.844 (0.835) data 0.000 (0.002) loss 0.1892 (0.8207) lr 7.0224e-05 eta 0:06:54
epoch [50/50] batch [320/796] time 0.812 (0.835) data 0.000 (0.002) loss 0.4906 (0.8345) lr 7.0224e-05 eta 0:06:37
epoch [50/50] batch [340/796] time 0.843 (0.835) data 0.000 (0.002) loss 1.2318 (0.8337) lr 7.0224e-05 eta 0:06:20
epoch [50/50] batch [360/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.4017 (0.8258) lr 7.0224e-05 eta 0:06:03
epoch [50/50] batch [380/796] time 0.843 (0.835) data 0.000 (0.002) loss 0.3339 (0.8196) lr 7.0224e-05 eta 0:05:47
epoch [50/50] batch [400/796] time 0.843 (0.835) data 0.000 (0.001) loss 1.3900 (0.8154) lr 7.0224e-05 eta 0:05:30
epoch [50/50] batch [420/796] time 0.844 (0.835) data 0.000 (0.001) loss 1.1394 (0.8128) lr 7.0224e-05 eta 0:05:13
epoch [50/50] batch [440/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.7501 (0.8194) lr 7.0224e-05 eta 0:04:57
epoch [50/50] batch [460/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.1657 (0.8171) lr 7.0224e-05 eta 0:04:40
epoch [50/50] batch [480/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.2536 (0.8175) lr 7.0224e-05 eta 0:04:23
epoch [50/50] batch [500/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.2908 (0.8085) lr 7.0224e-05 eta 0:04:07
epoch [50/50] batch [520/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.1388 (0.8042) lr 7.0224e-05 eta 0:03:50
epoch [50/50] batch [540/796] time 0.844 (0.835) data 0.000 (0.001) loss 1.0766 (0.8076) lr 7.0224e-05 eta 0:03:33
epoch [50/50] batch [560/796] time 0.835 (0.835) data 0.000 (0.001) loss 0.3564 (0.8004) lr 7.0224e-05 eta 0:03:16
epoch [50/50] batch [580/796] time 0.821 (0.835) data 0.000 (0.001) loss 1.0764 (0.7976) lr 7.0224e-05 eta 0:03:00
epoch [50/50] batch [600/796] time 0.845 (0.835) data 0.001 (0.001) loss 0.2907 (0.7969) lr 7.0224e-05 eta 0:02:43
epoch [50/50] batch [620/796] time 0.834 (0.835) data 0.000 (0.001) loss 0.7046 (0.7974) lr 7.0224e-05 eta 0:02:26
epoch [50/50] batch [640/796] time 0.842 (0.835) data 0.000 (0.001) loss 0.1294 (0.7982) lr 7.0224e-05 eta 0:02:10
epoch [50/50] batch [660/796] time 0.843 (0.835) data 0.000 (0.001) loss 0.0632 (0.8049) lr 7.0224e-05 eta 0:01:53
epoch [50/50] batch [680/796] time 0.838 (0.835) data 0.000 (0.001) loss 0.2957 (0.8095) lr 7.0224e-05 eta 0:01:36
epoch [50/50] batch [700/796] time 0.844 (0.835) data 0.000 (0.001) loss 0.0850 (0.8094) lr 7.0224e-05 eta 0:01:20
epoch [50/50] batch [720/796] time 0.844 (0.835) data 0.000 (0.001) loss 1.8720 (0.8140) lr 7.0224e-05 eta 0:01:03
epoch [50/50] batch [740/796] time 0.810 (0.835) data 0.000 (0.001) loss 0.2511 (0.8151) lr 7.0224e-05 eta 0:00:46
epoch [50/50] batch [760/796] time 0.820 (0.835) data 0.000 (0.001) loss 0.2740 (0.8196) lr 7.0224e-05 eta 0:00:30
epoch [50/50] batch [780/796] time 0.834 (0.835) data 0.000 (0.001) loss 2.2480 (0.8244) lr 7.0224e-05 eta 0:00:13
Checkpoint saved to output/base2new/train_base/sun397/vit_b16_ep50_c4_BZ4_ProDA/seed3/prompt_learner/model.pth.tar-50
Finish training
Deploy the last-epoch model
Evaluate on the *test* set
=> result
* total: 9,950
* correct: 8,220
* accuracy: 82.61%
* error: 17.39%
* macro_f1: 82.35%
Elapsed: 9:14:34
