***************
** Arguments **
***************
backbone: 
config_file: configs/trainers/ProDA/vit_b16_ep50_c4_BZ4_ProDA.yaml
dataset_config_file: configs/datasets/stanford_cars.yaml
eval_only: False
head: 
load_epoch: None
model_dir: 
no_train: False
opts: ['DATASET.NUM_SHOTS', '16', 'DATASET.SUBSAMPLE_CLASSES', 'base']
output_dir: output/base2new/train_base/stanford_cars/vit_b16_ep50_c4_BZ4_ProDA/seed2
resume: 
root: /mnt/hdd/DATA
seed: 2
source_domains: None
target_domains: None
trainer: ProDA
transforms: None
************
** Config **
************
DATALOADER:
  K_TRANSFORMS: 1
  NUM_WORKERS: 8
  RETURN_IMG0: False
  TEST:
    BATCH_SIZE: 100
    SAMPLER: SequentialSampler
  TRAIN_U:
    BATCH_SIZE: 32
    N_DOMAIN: 0
    N_INS: 16
    SAME_AS_X: True
    SAMPLER: RandomSampler
  TRAIN_X:
    BATCH_SIZE: 4
    N_DOMAIN: 0
    N_INS: 16
    SAMPLER: RandomSampler
DATASET:
  ALL_AS_UNLABELED: False
  CIFAR_C_LEVEL: 1
  CIFAR_C_TYPE: 
  NAME: StanfordCars
  NUM_LABELED: -1
  NUM_SHOTS: 16
  ROOT: /mnt/hdd/DATA
  SOURCE_DOMAINS: ()
  STL10_FOLD: -1
  SUBSAMPLE_CLASSES: base
  TARGET_DOMAINS: ()
  VAL_PERCENT: 0.1
INPUT:
  COLORJITTER_B: 0.4
  COLORJITTER_C: 0.4
  COLORJITTER_H: 0.1
  COLORJITTER_S: 0.4
  CROP_PADDING: 4
  CUTOUT_LEN: 16
  CUTOUT_N: 1
  GB_K: 21
  GB_P: 0.5
  GN_MEAN: 0.0
  GN_STD: 0.15
  INTERPOLATION: bicubic
  NO_TRANSFORM: False
  PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
  PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
  RANDAUGMENT_M: 10
  RANDAUGMENT_N: 2
  RGS_P: 0.2
  RRCROP_SCALE: (0.08, 1.0)
  SIZE: (224, 224)
  TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize')
MODEL:
  BACKBONE:
    NAME: ViT-B/16
    PRETRAINED: True
  HEAD:
    ACTIVATION: relu
    BN: True
    DROPOUT: 0.0
    HIDDEN_LAYERS: ()
    NAME: 
  INIT_WEIGHTS: 
OPTIM:
  ADAM_BETA1: 0.9
  ADAM_BETA2: 0.999
  BASE_LR_MULT: 0.1
  GAMMA: 0.1
  LR: 0.002
  LR_SCHEDULER: cosine
  MAX_EPOCH: 50
  MOMENTUM: 0.9
  NAME: sgd
  NEW_LAYERS: ()
  RMSPROP_ALPHA: 0.99
  SGD_DAMPNING: 0
  SGD_NESTEROV: False
  STAGED_LR: False
  STEPSIZE: (-1,)
  WARMUP_CONS_LR: 1e-05
  WARMUP_EPOCH: 5
  WARMUP_MIN_LR: 1e-05
  WARMUP_RECOUNT: True
  WARMUP_TYPE: constant
  WEIGHT_DECAY: 0.0005
OUTPUT_DIR: output/base2new/train_base/stanford_cars/vit_b16_ep50_c4_BZ4_ProDA/seed2
RESUME: 
SEED: 2
TEST:
  COMPUTE_CMAT: False
  EVALUATOR: Classification
  FINAL_MODEL: last_step
  NO_TEST: False
  PER_CLASS_RESULT: False
  SPLIT: test
TRAIN:
  CHECKPOINT_FREQ: 0
  COUNT_ITER: train_x
  PRINT_FREQ: 20
TRAINER:
  CDAC:
    CLASS_LR_MULTI: 10
    P_THRESH: 0.95
    RAMPUP_COEF: 30
    RAMPUP_ITRS: 1000
    STRONG_TRANSFORMS: ()
    TOPK_MATCH: 5
  COCOOP:
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  COOP:
    CLASS_TOKEN_POSITION: end
    CSC: False
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  CROSSGRAD:
    ALPHA_D: 0.5
    ALPHA_F: 0.5
    EPS_D: 1.0
    EPS_F: 1.0
  DAEL:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DAELDG:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DDAIG:
    ALPHA: 0.5
    CLAMP: False
    CLAMP_MAX: 1.0
    CLAMP_MIN: -1.0
    G_ARCH: 
    LMDA: 0.3
    WARMUP: 0
  DOMAINMIX:
    ALPHA: 1.0
    BETA: 1.0
    TYPE: crossdomain
  ENTMIN:
    LMDA: 0.001
  FIXMATCH:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 1.0
  IVLP:
    CTX_INIT: a photo of a
    N_CTX_TEXT: 2
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_TEXT: 9
    PROMPT_DEPTH_VISION: 9
  M3SDA:
    LMDA: 0.5
    N_STEP_F: 4
  MAPLE:
    CTX_INIT: a photo of a
    N_CTX: 4
    PREC: fp16
    PROMPT_DEPTH: 9
  MCD:
    N_STEP_F: 4
  MEANTEACHER:
    EMA_ALPHA: 0.999
    RAMPUP: 5
    WEIGHT_U: 1.0
  MIXMATCH:
    MIXUP_BETA: 0.75
    RAMPUP: 20000
    TEMP: 2.0
    WEIGHT_U: 100.0
  MME:
    LMDA: 0.1
  NAME: ProDA
  ProDA:
    N_CTX: 4
    N_PROMPT: 32
    PREC: fp16
  SE:
    CONF_THRE: 0.95
    EMA_ALPHA: 0.999
    RAMPUP: 300
  VPT:
    CTX_INIT: a photo of a
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_VISION: 1
USE_CUDA: True
VERBOSE: True
VERSION: 1
Collecting env info ...
** System info **
PyTorch version: 2.2.1+cu121
Is debug build: False
CUDA used to build PyTorch: 12.1
ROCM used to build PyTorch: N/A

OS: Debian GNU/Linux 12 (bookworm) (x86_64)
GCC version: (Debian 12.2.0-14) 12.2.0
Clang version: Could not collect
CMake version: Could not collect
Libc version: glibc-2.36

Python version: 3.11.2 (main, Mar 13 2023, 12:18:29) [GCC 12.2.0] (64-bit runtime)
Python platform: Linux-6.5.13-3-pve-x86_64-with-glibc2.36
Is CUDA available: True
CUDA runtime version: 11.8.89
CUDA_MODULE_LOADING set to: LAZY
GPU models and configuration: 
GPU 0: NVIDIA A800 80GB PCIe
GPU 1: NVIDIA A800 80GB PCIe

Nvidia driver version: 525.147.05
cuDNN version: Could not collect
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True

CPU:
Architecture:                       x86_64
CPU op-mode(s):                     32-bit, 64-bit
Address sizes:                      46 bits physical, 57 bits virtual
Byte Order:                         Little Endian
CPU(s):                             64
On-line CPU(s) list:                18,20,22,23,25-27,29,31,32,34,37,46-49
Off-line CPU(s) list:               0-17,19,21,24,28,30,33,35,36,38-45,50-63
Vendor ID:                          GenuineIntel
Model name:                         Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz
CPU family:                         6
Model:                              106
Thread(s) per core:                 2
Core(s) per socket:                 16
Socket(s):                          2
Stepping:                           6
CPU(s) scaling MHz:                 98%
CPU max MHz:                        3500.0000
CPU min MHz:                        800.0000
BogoMIPS:                           5800.00
Flags:                              fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect wbnoinvd dtherm ida arat pln pts vnmi avx512vbmi umip pku ospke avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid fsrm md_clear pconfig flush_l1d arch_capabilities
Virtualization:                     VT-x
L1d cache:                          1.5 MiB (32 instances)
L1i cache:                          1 MiB (32 instances)
L2 cache:                           40 MiB (32 instances)
L3 cache:                           48 MiB (2 instances)
NUMA node(s):                       2
NUMA node0 CPU(s):                  0-15,32-47
NUMA node1 CPU(s):                  16-31,48-63
Vulnerability Gather data sampling: Vulnerable: No microcode
Vulnerability Itlb multihit:        Not affected
Vulnerability L1tf:                 Not affected
Vulnerability Mds:                  Not affected
Vulnerability Meltdown:             Not affected
Vulnerability Mmio stale data:      Mitigation; Clear CPU buffers; SMT vulnerable
Vulnerability Retbleed:             Not affected
Vulnerability Spec rstack overflow: Not affected
Vulnerability Spec store bypass:    Mitigation; Speculative Store Bypass disabled via prctl
Vulnerability Spectre v1:           Mitigation; usercopy/swapgs barriers and __user pointer sanitization
Vulnerability Spectre v2:           Mitigation; Enhanced / Automatic IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
Vulnerability Srbds:                Not affected
Vulnerability Tsx async abort:      Not affected

Versions of relevant libraries:
[pip3] flake8==3.7.9
[pip3] flake8==3.7.9
[pip3] numpy==1.26.4
[pip3] torch==2.2.1
[pip3] torchaudio==2.2.1
[pip3] torchvision==0.17.1
[pip3] triton==2.2.0
[conda] Could not collect
        Pillow (10.2.0)

Loading trainer: ProDA
Loading dataset: StanfordCars
Reading split from /mnt/hdd/DATA/stanford_cars/split_zhou_StanfordCars.json
Loading preprocessed few-shot data from /mnt/hdd/DATA/stanford_cars/split_fewshot/shot_16_shuffled-seed_2.pkl
SUBSAMPLE BASE CLASSES!
Building transform_train
+ random resized crop (size=(224, 224), scale=(0.08, 1.0))
+ random flip
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
Building transform_test
+ resize the smaller edge to 224
+ 224x224 center crop
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
---------  ------------
Dataset    StanfordCars
# classes  98
# train_x  1,568
# val      392
# test     4,006
---------  ------------
Loading CLIP (backbone: ViT-B/16)
Building custom CLIP
Turning off gradients in both the image and the text encoder
Parameters to be updated: {'prompt_learner.ctx'}
Loading evaluator: Classification
No checkpoint found, train from scratch
Initialize tensorboard (log_dir=output/base2new/train_base/stanford_cars/vit_b16_ep50_c4_BZ4_ProDA/seed2/tensorboard)
epoch [1/50] batch [20/392] time 0.431 (0.563) data 0.000 (0.029) loss 0.2969 (1.4614) lr 1.0000e-05 eta 3:03:35
epoch [1/50] batch [40/392] time 0.436 (0.497) data 0.000 (0.015) loss 1.9766 (1.6436) lr 1.0000e-05 eta 2:42:03
epoch [1/50] batch [60/392] time 0.426 (0.476) data 0.000 (0.010) loss 1.2296 (1.6353) lr 1.0000e-05 eta 2:34:55
epoch [1/50] batch [80/392] time 0.435 (0.465) data 0.000 (0.007) loss 2.0862 (1.6447) lr 1.0000e-05 eta 2:31:08
epoch [1/50] batch [100/392] time 0.435 (0.458) data 0.000 (0.006) loss 1.9328 (1.6460) lr 1.0000e-05 eta 2:28:51
epoch [1/50] batch [120/392] time 0.432 (0.454) data 0.000 (0.005) loss 2.0003 (1.6786) lr 1.0000e-05 eta 2:27:15
epoch [1/50] batch [140/392] time 0.437 (0.451) data 0.000 (0.004) loss 1.1467 (1.6630) lr 1.0000e-05 eta 2:26:07
epoch [1/50] batch [160/392] time 0.436 (0.448) data 0.000 (0.004) loss 2.0438 (1.6359) lr 1.0000e-05 eta 2:25:14
epoch [1/50] batch [180/392] time 0.427 (0.446) data 0.000 (0.003) loss 0.6960 (1.5959) lr 1.0000e-05 eta 2:24:29
epoch [1/50] batch [200/392] time 0.436 (0.445) data 0.000 (0.003) loss 1.7766 (1.5853) lr 1.0000e-05 eta 2:23:53
epoch [1/50] batch [220/392] time 0.422 (0.444) data 0.000 (0.003) loss 1.5174 (1.6005) lr 1.0000e-05 eta 2:23:19
epoch [1/50] batch [240/392] time 0.433 (0.443) data 0.000 (0.003) loss 1.7522 (1.5868) lr 1.0000e-05 eta 2:22:52
epoch [1/50] batch [260/392] time 0.427 (0.442) data 0.000 (0.002) loss 2.5572 (1.5840) lr 1.0000e-05 eta 2:22:26
epoch [1/50] batch [280/392] time 0.433 (0.441) data 0.000 (0.002) loss 2.7739 (1.5911) lr 1.0000e-05 eta 2:22:04
epoch [1/50] batch [300/392] time 0.423 (0.441) data 0.000 (0.002) loss 1.7500 (1.5735) lr 1.0000e-05 eta 2:21:45
epoch [1/50] batch [320/392] time 0.428 (0.440) data 0.000 (0.002) loss 0.6935 (1.5673) lr 1.0000e-05 eta 2:21:28
epoch [1/50] batch [340/392] time 0.437 (0.440) data 0.000 (0.002) loss 1.4256 (1.5485) lr 1.0000e-05 eta 2:21:11
epoch [1/50] batch [360/392] time 0.432 (0.439) data 0.000 (0.002) loss 1.2306 (1.5488) lr 1.0000e-05 eta 2:20:54
epoch [1/50] batch [380/392] time 0.430 (0.439) data 0.000 (0.002) loss 0.7785 (1.5375) lr 1.0000e-05 eta 2:20:38
epoch [2/50] batch [20/392] time 0.438 (0.449) data 0.000 (0.017) loss 0.7124 (1.4505) lr 1.0000e-05 eta 2:23:44
epoch [2/50] batch [40/392] time 0.428 (0.441) data 0.000 (0.009) loss 1.1902 (1.4423) lr 1.0000e-05 eta 2:20:47
epoch [2/50] batch [60/392] time 0.431 (0.438) data 0.000 (0.006) loss 1.6520 (1.4435) lr 1.0000e-05 eta 2:19:51
epoch [2/50] batch [80/392] time 0.432 (0.436) data 0.000 (0.004) loss 0.7620 (1.4532) lr 1.0000e-05 eta 2:19:08
epoch [2/50] batch [100/392] time 0.435 (0.436) data 0.000 (0.004) loss 1.0323 (1.5187) lr 1.0000e-05 eta 2:18:42
epoch [2/50] batch [120/392] time 0.442 (0.435) data 0.000 (0.003) loss 1.8517 (1.4615) lr 1.0000e-05 eta 2:18:22
epoch [2/50] batch [140/392] time 0.436 (0.435) data 0.000 (0.003) loss 2.2701 (1.4578) lr 1.0000e-05 eta 2:18:09
epoch [2/50] batch [160/392] time 0.434 (0.434) data 0.000 (0.002) loss 1.0591 (1.4640) lr 1.0000e-05 eta 2:17:54
epoch [2/50] batch [180/392] time 0.437 (0.434) data 0.000 (0.002) loss 1.5666 (1.4671) lr 1.0000e-05 eta 2:17:42
epoch [2/50] batch [200/392] time 0.433 (0.434) data 0.000 (0.002) loss 1.0220 (1.4590) lr 1.0000e-05 eta 2:17:33
epoch [2/50] batch [220/392] time 0.434 (0.434) data 0.000 (0.002) loss 2.2072 (1.4544) lr 1.0000e-05 eta 2:17:21
epoch [2/50] batch [240/392] time 0.427 (0.434) data 0.000 (0.002) loss 2.3122 (1.4713) lr 1.0000e-05 eta 2:17:12
epoch [2/50] batch [260/392] time 0.438 (0.434) data 0.000 (0.001) loss 3.2666 (1.4778) lr 1.0000e-05 eta 2:17:03
epoch [2/50] batch [280/392] time 0.428 (0.434) data 0.000 (0.001) loss 1.1074 (1.4892) lr 1.0000e-05 eta 2:16:55
epoch [2/50] batch [300/392] time 0.428 (0.434) data 0.000 (0.001) loss 2.0546 (1.4998) lr 1.0000e-05 eta 2:16:47
epoch [2/50] batch [320/392] time 0.428 (0.434) data 0.000 (0.001) loss 1.4049 (1.4847) lr 1.0000e-05 eta 2:16:38
epoch [2/50] batch [340/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.8935 (1.4805) lr 1.0000e-05 eta 2:16:29
epoch [2/50] batch [360/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.5880 (1.4864) lr 1.0000e-05 eta 2:16:20
epoch [2/50] batch [380/392] time 0.437 (0.434) data 0.000 (0.001) loss 2.3410 (1.4788) lr 1.0000e-05 eta 2:16:09
epoch [3/50] batch [20/392] time 0.438 (0.451) data 0.000 (0.017) loss 3.7823 (1.5774) lr 1.0000e-05 eta 2:21:11
epoch [3/50] batch [40/392] time 0.439 (0.442) data 0.000 (0.009) loss 0.9267 (1.4848) lr 1.0000e-05 eta 2:18:16
epoch [3/50] batch [60/392] time 0.438 (0.439) data 0.000 (0.006) loss 1.9665 (1.4478) lr 1.0000e-05 eta 2:17:22
epoch [3/50] batch [80/392] time 0.437 (0.438) data 0.000 (0.004) loss 0.3203 (1.4916) lr 1.0000e-05 eta 2:16:41
epoch [3/50] batch [100/392] time 0.438 (0.437) data 0.000 (0.004) loss 0.5372 (1.5840) lr 1.0000e-05 eta 2:16:14
epoch [3/50] batch [120/392] time 0.437 (0.436) data 0.000 (0.003) loss 0.7557 (1.5503) lr 1.0000e-05 eta 2:15:57
epoch [3/50] batch [140/392] time 0.438 (0.436) data 0.000 (0.003) loss 2.1544 (1.5178) lr 1.0000e-05 eta 2:15:39
epoch [3/50] batch [160/392] time 0.437 (0.435) data 0.000 (0.002) loss 2.0462 (1.4742) lr 1.0000e-05 eta 2:15:22
epoch [3/50] batch [180/392] time 0.437 (0.435) data 0.000 (0.002) loss 3.0863 (1.4658) lr 1.0000e-05 eta 2:15:10
epoch [3/50] batch [200/392] time 0.433 (0.435) data 0.000 (0.002) loss 1.3177 (1.4412) lr 1.0000e-05 eta 2:14:56
epoch [3/50] batch [220/392] time 0.438 (0.435) data 0.000 (0.002) loss 3.0374 (1.4712) lr 1.0000e-05 eta 2:14:43
epoch [3/50] batch [240/392] time 0.424 (0.435) data 0.000 (0.002) loss 1.4957 (1.4750) lr 1.0000e-05 eta 2:14:32
epoch [3/50] batch [260/392] time 0.428 (0.434) data 0.000 (0.001) loss 1.5087 (1.4641) lr 1.0000e-05 eta 2:14:22
epoch [3/50] batch [280/392] time 0.432 (0.434) data 0.000 (0.001) loss 0.6432 (1.4783) lr 1.0000e-05 eta 2:14:11
epoch [3/50] batch [300/392] time 0.429 (0.434) data 0.000 (0.001) loss 1.0379 (1.4771) lr 1.0000e-05 eta 2:14:01
epoch [3/50] batch [320/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.5415 (1.4867) lr 1.0000e-05 eta 2:13:51
epoch [3/50] batch [340/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.0267 (1.4898) lr 1.0000e-05 eta 2:13:42
epoch [3/50] batch [360/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.4153 (1.4982) lr 1.0000e-05 eta 2:13:32
epoch [3/50] batch [380/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.3661 (1.4861) lr 1.0000e-05 eta 2:13:22
epoch [4/50] batch [20/392] time 0.433 (0.451) data 0.000 (0.017) loss 0.5020 (1.4877) lr 1.0000e-05 eta 2:18:14
epoch [4/50] batch [40/392] time 0.437 (0.441) data 0.000 (0.009) loss 2.3971 (1.4718) lr 1.0000e-05 eta 2:15:15
epoch [4/50] batch [60/392] time 0.427 (0.438) data 0.000 (0.006) loss 0.5018 (1.4429) lr 1.0000e-05 eta 2:14:11
epoch [4/50] batch [80/392] time 0.437 (0.437) data 0.000 (0.004) loss 0.3263 (1.4345) lr 1.0000e-05 eta 2:13:39
epoch [4/50] batch [100/392] time 0.438 (0.437) data 0.000 (0.004) loss 1.0765 (1.4304) lr 1.0000e-05 eta 2:13:22
epoch [4/50] batch [120/392] time 0.437 (0.436) data 0.000 (0.003) loss 1.0104 (1.4193) lr 1.0000e-05 eta 2:13:01
epoch [4/50] batch [140/392] time 0.433 (0.436) data 0.000 (0.003) loss 0.2798 (1.4040) lr 1.0000e-05 eta 2:12:44
epoch [4/50] batch [160/392] time 0.427 (0.436) data 0.000 (0.002) loss 0.1504 (1.4144) lr 1.0000e-05 eta 2:12:34
epoch [4/50] batch [180/392] time 0.427 (0.435) data 0.000 (0.002) loss 0.6835 (1.4125) lr 1.0000e-05 eta 2:12:20
epoch [4/50] batch [200/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.2200 (1.4053) lr 1.0000e-05 eta 2:12:10
epoch [4/50] batch [220/392] time 0.432 (0.435) data 0.000 (0.002) loss 0.6738 (1.4112) lr 1.0000e-05 eta 2:11:56
epoch [4/50] batch [240/392] time 0.437 (0.435) data 0.000 (0.002) loss 2.7827 (1.4174) lr 1.0000e-05 eta 2:11:45
epoch [4/50] batch [260/392] time 0.433 (0.435) data 0.000 (0.001) loss 0.5639 (1.4377) lr 1.0000e-05 eta 2:11:34
epoch [4/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.8643 (1.4283) lr 1.0000e-05 eta 2:11:25
epoch [4/50] batch [300/392] time 0.428 (0.435) data 0.000 (0.001) loss 0.5113 (1.4239) lr 1.0000e-05 eta 2:11:15
epoch [4/50] batch [320/392] time 0.436 (0.434) data 0.000 (0.001) loss 0.8264 (1.4194) lr 1.0000e-05 eta 2:11:05
epoch [4/50] batch [340/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.4704 (1.4170) lr 1.0000e-05 eta 2:10:54
epoch [4/50] batch [360/392] time 0.436 (0.434) data 0.000 (0.001) loss 0.3909 (1.4178) lr 1.0000e-05 eta 2:10:44
epoch [4/50] batch [380/392] time 0.431 (0.434) data 0.000 (0.001) loss 1.0786 (1.4427) lr 1.0000e-05 eta 2:10:33
epoch [5/50] batch [20/392] time 0.437 (0.449) data 0.000 (0.016) loss 1.7712 (1.3683) lr 1.0000e-05 eta 2:14:42
epoch [5/50] batch [40/392] time 0.432 (0.441) data 0.000 (0.008) loss 1.5877 (1.3164) lr 1.0000e-05 eta 2:12:07
epoch [5/50] batch [60/392] time 0.438 (0.438) data 0.000 (0.005) loss 1.2976 (1.4028) lr 1.0000e-05 eta 2:11:17
epoch [5/50] batch [80/392] time 0.437 (0.437) data 0.000 (0.004) loss 0.4551 (1.3612) lr 1.0000e-05 eta 2:10:47
epoch [5/50] batch [100/392] time 0.438 (0.436) data 0.000 (0.003) loss 1.8856 (1.4296) lr 1.0000e-05 eta 2:10:22
epoch [5/50] batch [120/392] time 0.436 (0.436) data 0.000 (0.003) loss 2.0305 (1.3959) lr 1.0000e-05 eta 2:10:07
epoch [5/50] batch [140/392] time 0.432 (0.435) data 0.000 (0.002) loss 0.8434 (1.3775) lr 1.0000e-05 eta 2:09:45
epoch [5/50] batch [160/392] time 0.434 (0.435) data 0.000 (0.002) loss 0.8663 (1.3662) lr 1.0000e-05 eta 2:09:29
epoch [5/50] batch [180/392] time 0.433 (0.434) data 0.000 (0.002) loss 0.9712 (1.3904) lr 1.0000e-05 eta 2:09:16
epoch [5/50] batch [200/392] time 0.438 (0.434) data 0.000 (0.002) loss 1.2420 (1.4052) lr 1.0000e-05 eta 2:09:06
epoch [5/50] batch [220/392] time 0.433 (0.434) data 0.000 (0.002) loss 1.4128 (1.4126) lr 1.0000e-05 eta 2:08:58
epoch [5/50] batch [240/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.1173 (1.3959) lr 1.0000e-05 eta 2:08:47
epoch [5/50] batch [260/392] time 0.432 (0.434) data 0.000 (0.001) loss 1.3910 (1.3800) lr 1.0000e-05 eta 2:08:37
epoch [5/50] batch [280/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.9803 (1.3926) lr 1.0000e-05 eta 2:08:26
epoch [5/50] batch [300/392] time 0.428 (0.434) data 0.000 (0.001) loss 1.9127 (1.3725) lr 1.0000e-05 eta 2:08:14
epoch [5/50] batch [320/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.9624 (1.3813) lr 1.0000e-05 eta 2:08:06
epoch [5/50] batch [340/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.0890 (1.3622) lr 1.0000e-05 eta 2:07:55
epoch [5/50] batch [360/392] time 0.434 (0.434) data 0.000 (0.001) loss 1.9031 (1.3705) lr 1.0000e-05 eta 2:07:47
epoch [5/50] batch [380/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.4043 (1.3702) lr 1.0000e-05 eta 2:07:38
epoch [6/50] batch [20/392] time 0.426 (0.451) data 0.000 (0.017) loss 1.6387 (1.3275) lr 2.0000e-03 eta 2:12:28
epoch [6/50] batch [40/392] time 0.434 (0.443) data 0.000 (0.009) loss 1.2627 (1.3174) lr 2.0000e-03 eta 2:09:50
epoch [6/50] batch [60/392] time 0.428 (0.439) data 0.000 (0.006) loss 3.5159 (1.3530) lr 2.0000e-03 eta 2:08:46
epoch [6/50] batch [80/392] time 0.438 (0.438) data 0.000 (0.004) loss 0.3804 (1.3719) lr 2.0000e-03 eta 2:08:10
epoch [6/50] batch [100/392] time 0.437 (0.437) data 0.000 (0.004) loss 0.8141 (1.3989) lr 2.0000e-03 eta 2:07:44
epoch [6/50] batch [120/392] time 0.437 (0.436) data 0.000 (0.003) loss 0.5345 (1.4221) lr 2.0000e-03 eta 2:07:23
epoch [6/50] batch [140/392] time 0.433 (0.436) data 0.000 (0.003) loss 1.0337 (1.4261) lr 2.0000e-03 eta 2:07:07
epoch [6/50] batch [160/392] time 0.427 (0.436) data 0.000 (0.002) loss 0.7322 (1.3991) lr 2.0000e-03 eta 2:06:55
epoch [6/50] batch [180/392] time 0.432 (0.435) data 0.000 (0.002) loss 0.5454 (1.3733) lr 2.0000e-03 eta 2:06:43
epoch [6/50] batch [200/392] time 0.439 (0.435) data 0.000 (0.002) loss 1.9856 (1.3930) lr 2.0000e-03 eta 2:06:30
epoch [6/50] batch [220/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.9352 (1.3648) lr 2.0000e-03 eta 2:06:20
epoch [6/50] batch [240/392] time 0.440 (0.435) data 0.000 (0.002) loss 1.4373 (1.3805) lr 2.0000e-03 eta 2:06:10
epoch [6/50] batch [260/392] time 0.428 (0.435) data 0.000 (0.001) loss 1.8493 (1.3867) lr 2.0000e-03 eta 2:05:58
epoch [6/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.1670 (1.3807) lr 2.0000e-03 eta 2:05:48
epoch [6/50] batch [300/392] time 0.427 (0.435) data 0.000 (0.001) loss 1.3999 (1.3719) lr 2.0000e-03 eta 2:05:38
epoch [6/50] batch [320/392] time 0.434 (0.435) data 0.000 (0.001) loss 0.5903 (1.3760) lr 2.0000e-03 eta 2:05:28
epoch [6/50] batch [340/392] time 0.427 (0.435) data 0.000 (0.001) loss 1.0347 (1.3696) lr 2.0000e-03 eta 2:05:18
epoch [6/50] batch [360/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.4429 (1.3464) lr 2.0000e-03 eta 2:05:08
epoch [6/50] batch [380/392] time 0.427 (0.434) data 0.000 (0.001) loss 1.4982 (1.3522) lr 2.0000e-03 eta 2:04:58
epoch [7/50] batch [20/392] time 0.439 (0.454) data 0.000 (0.020) loss 1.7832 (1.1842) lr 1.9980e-03 eta 2:10:24
epoch [7/50] batch [40/392] time 0.433 (0.443) data 0.000 (0.010) loss 0.6406 (1.1816) lr 1.9980e-03 eta 2:07:06
epoch [7/50] batch [60/392] time 0.433 (0.440) data 0.000 (0.007) loss 2.5697 (1.2356) lr 1.9980e-03 eta 2:05:59
epoch [7/50] batch [80/392] time 0.427 (0.438) data 0.000 (0.005) loss 1.4045 (1.2465) lr 1.9980e-03 eta 2:05:27
epoch [7/50] batch [100/392] time 0.437 (0.437) data 0.001 (0.004) loss 1.1694 (1.2248) lr 1.9980e-03 eta 2:04:58
epoch [7/50] batch [120/392] time 0.437 (0.437) data 0.000 (0.003) loss 0.8412 (1.2870) lr 1.9980e-03 eta 2:04:39
epoch [7/50] batch [140/392] time 0.437 (0.436) data 0.000 (0.003) loss 1.5228 (1.3097) lr 1.9980e-03 eta 2:04:18
epoch [7/50] batch [160/392] time 0.437 (0.436) data 0.000 (0.003) loss 0.5412 (1.2824) lr 1.9980e-03 eta 2:04:04
epoch [7/50] batch [180/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.7229 (1.2791) lr 1.9980e-03 eta 2:03:51
epoch [7/50] batch [200/392] time 0.432 (0.435) data 0.000 (0.002) loss 1.5108 (1.2722) lr 1.9980e-03 eta 2:03:39
epoch [7/50] batch [220/392] time 0.424 (0.435) data 0.000 (0.002) loss 1.4453 (1.2890) lr 1.9980e-03 eta 2:03:30
epoch [7/50] batch [240/392] time 0.424 (0.435) data 0.000 (0.002) loss 1.4505 (1.2893) lr 1.9980e-03 eta 2:03:18
epoch [7/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.9925 (1.2828) lr 1.9980e-03 eta 2:03:06
epoch [7/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.2148 (1.2792) lr 1.9980e-03 eta 2:02:55
epoch [7/50] batch [300/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.2936 (1.2743) lr 1.9980e-03 eta 2:02:45
epoch [7/50] batch [320/392] time 0.433 (0.434) data 0.000 (0.001) loss 0.3575 (1.2875) lr 1.9980e-03 eta 2:02:33
epoch [7/50] batch [340/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.9929 (1.2872) lr 1.9980e-03 eta 2:02:22
epoch [7/50] batch [360/392] time 0.439 (0.434) data 0.000 (0.001) loss 0.6555 (1.2942) lr 1.9980e-03 eta 2:02:12
epoch [7/50] batch [380/392] time 0.432 (0.434) data 0.000 (0.001) loss 2.1230 (1.2820) lr 1.9980e-03 eta 2:02:02
epoch [8/50] batch [20/392] time 0.434 (0.451) data 0.000 (0.018) loss 1.3066 (1.5546) lr 1.9921e-03 eta 2:06:29
epoch [8/50] batch [40/392] time 0.438 (0.442) data 0.000 (0.009) loss 0.8949 (1.3724) lr 1.9921e-03 eta 2:03:52
epoch [8/50] batch [60/392] time 0.439 (0.439) data 0.000 (0.006) loss 1.6015 (1.3668) lr 1.9921e-03 eta 2:03:00
epoch [8/50] batch [80/392] time 0.434 (0.438) data 0.000 (0.005) loss 0.7322 (1.2971) lr 1.9921e-03 eta 2:02:30
epoch [8/50] batch [100/392] time 0.438 (0.437) data 0.000 (0.004) loss 1.5906 (1.3004) lr 1.9921e-03 eta 2:02:07
epoch [8/50] batch [120/392] time 0.439 (0.437) data 0.000 (0.003) loss 2.4419 (1.2941) lr 1.9921e-03 eta 2:01:51
epoch [8/50] batch [140/392] time 0.429 (0.436) data 0.000 (0.003) loss 0.9254 (1.2934) lr 1.9921e-03 eta 2:01:35
epoch [8/50] batch [160/392] time 0.439 (0.436) data 0.000 (0.002) loss 1.0497 (1.3038) lr 1.9921e-03 eta 2:01:22
epoch [8/50] batch [180/392] time 0.438 (0.436) data 0.000 (0.002) loss 2.0583 (1.2899) lr 1.9921e-03 eta 2:01:11
epoch [8/50] batch [200/392] time 0.429 (0.436) data 0.000 (0.002) loss 0.9834 (1.2879) lr 1.9921e-03 eta 2:00:59
epoch [8/50] batch [220/392] time 0.430 (0.436) data 0.000 (0.002) loss 2.4778 (1.3035) lr 1.9921e-03 eta 2:00:47
epoch [8/50] batch [240/392] time 0.434 (0.435) data 0.000 (0.002) loss 0.3404 (1.2857) lr 1.9921e-03 eta 2:00:35
epoch [8/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.1198 (1.2603) lr 1.9921e-03 eta 2:00:25
epoch [8/50] batch [280/392] time 0.443 (0.435) data 0.000 (0.001) loss 1.4671 (1.2498) lr 1.9921e-03 eta 2:00:15
epoch [8/50] batch [300/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.1413 (1.2452) lr 1.9921e-03 eta 2:00:05
epoch [8/50] batch [320/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.2181 (1.2400) lr 1.9921e-03 eta 1:59:54
epoch [8/50] batch [340/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.4199 (1.2275) lr 1.9921e-03 eta 1:59:44
epoch [8/50] batch [360/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.3513 (1.2351) lr 1.9921e-03 eta 1:59:35
epoch [8/50] batch [380/392] time 0.432 (0.435) data 0.000 (0.001) loss 0.8614 (1.2423) lr 1.9921e-03 eta 1:59:23
epoch [9/50] batch [20/392] time 0.439 (0.452) data 0.000 (0.017) loss 2.6264 (1.3560) lr 1.9823e-03 eta 2:03:47
epoch [9/50] batch [40/392] time 0.432 (0.442) data 0.000 (0.009) loss 0.6026 (1.2579) lr 1.9823e-03 eta 2:01:07
epoch [9/50] batch [60/392] time 0.428 (0.439) data 0.000 (0.006) loss 2.0632 (1.2307) lr 1.9823e-03 eta 2:00:01
epoch [9/50] batch [80/392] time 0.432 (0.437) data 0.000 (0.004) loss 1.0563 (1.2205) lr 1.9823e-03 eta 1:59:25
epoch [9/50] batch [100/392] time 0.423 (0.437) data 0.000 (0.004) loss 0.7789 (1.1870) lr 1.9823e-03 eta 1:59:03
epoch [9/50] batch [120/392] time 0.437 (0.436) data 0.000 (0.003) loss 0.7699 (1.1871) lr 1.9823e-03 eta 1:58:45
epoch [9/50] batch [140/392] time 0.437 (0.436) data 0.000 (0.003) loss 3.5982 (1.2375) lr 1.9823e-03 eta 1:58:30
epoch [9/50] batch [160/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.5676 (1.2285) lr 1.9823e-03 eta 1:58:15
epoch [9/50] batch [180/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.6108 (1.2491) lr 1.9823e-03 eta 1:58:05
epoch [9/50] batch [200/392] time 0.439 (0.435) data 0.000 (0.002) loss 0.7113 (1.2417) lr 1.9823e-03 eta 1:57:53
epoch [9/50] batch [220/392] time 0.434 (0.435) data 0.000 (0.002) loss 0.4739 (1.2197) lr 1.9823e-03 eta 1:57:42
epoch [9/50] batch [240/392] time 0.427 (0.435) data 0.000 (0.002) loss 2.2530 (1.2395) lr 1.9823e-03 eta 1:57:32
epoch [9/50] batch [260/392] time 0.434 (0.435) data 0.000 (0.001) loss 0.8810 (1.2328) lr 1.9823e-03 eta 1:57:21
epoch [9/50] batch [280/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.9858 (1.2395) lr 1.9823e-03 eta 1:57:10
epoch [9/50] batch [300/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.1869 (1.2324) lr 1.9823e-03 eta 1:57:00
epoch [9/50] batch [320/392] time 0.430 (0.434) data 0.000 (0.001) loss 1.5348 (1.2424) lr 1.9823e-03 eta 1:56:51
epoch [9/50] batch [340/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.3802 (1.2320) lr 1.9823e-03 eta 1:56:41
epoch [9/50] batch [360/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.6922 (1.2257) lr 1.9823e-03 eta 1:56:31
epoch [9/50] batch [380/392] time 0.433 (0.434) data 0.000 (0.001) loss 1.5854 (1.2358) lr 1.9823e-03 eta 1:56:20
epoch [10/50] batch [20/392] time 0.423 (0.452) data 0.000 (0.018) loss 1.7628 (1.0785) lr 1.9686e-03 eta 2:00:59
epoch [10/50] batch [40/392] time 0.438 (0.442) data 0.000 (0.009) loss 1.1094 (1.2411) lr 1.9686e-03 eta 1:58:08
epoch [10/50] batch [60/392] time 0.439 (0.439) data 0.000 (0.006) loss 0.8613 (1.2731) lr 1.9686e-03 eta 1:57:07
epoch [10/50] batch [80/392] time 0.433 (0.437) data 0.000 (0.005) loss 2.3549 (1.2551) lr 1.9686e-03 eta 1:56:34
epoch [10/50] batch [100/392] time 0.437 (0.436) data 0.000 (0.004) loss 2.0291 (1.2379) lr 1.9686e-03 eta 1:56:08
epoch [10/50] batch [120/392] time 0.437 (0.436) data 0.000 (0.003) loss 0.2456 (1.1717) lr 1.9686e-03 eta 1:55:51
epoch [10/50] batch [140/392] time 0.440 (0.436) data 0.000 (0.003) loss 1.1833 (1.1531) lr 1.9686e-03 eta 1:55:40
epoch [10/50] batch [160/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.2766 (1.1711) lr 1.9686e-03 eta 1:55:28
epoch [10/50] batch [180/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.6802 (1.2045) lr 1.9686e-03 eta 1:55:18
epoch [10/50] batch [200/392] time 0.427 (0.435) data 0.000 (0.002) loss 2.1962 (1.2127) lr 1.9686e-03 eta 1:55:07
epoch [10/50] batch [220/392] time 0.432 (0.435) data 0.000 (0.002) loss 0.1928 (1.2075) lr 1.9686e-03 eta 1:54:56
epoch [10/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.8585 (1.1792) lr 1.9686e-03 eta 1:54:45
epoch [10/50] batch [260/392] time 0.433 (0.435) data 0.000 (0.002) loss 1.1221 (1.1873) lr 1.9686e-03 eta 1:54:34
epoch [10/50] batch [280/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.3367 (1.1949) lr 1.9686e-03 eta 1:54:24
epoch [10/50] batch [300/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.6671 (1.1988) lr 1.9686e-03 eta 1:54:14
epoch [10/50] batch [320/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.8527 (1.2071) lr 1.9686e-03 eta 1:54:05
epoch [10/50] batch [340/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.9039 (1.2232) lr 1.9686e-03 eta 1:53:55
epoch [10/50] batch [360/392] time 0.438 (0.434) data 0.000 (0.001) loss 0.1767 (1.2228) lr 1.9686e-03 eta 1:53:45
epoch [10/50] batch [380/392] time 0.433 (0.434) data 0.000 (0.001) loss 1.4604 (1.2146) lr 1.9686e-03 eta 1:53:35
epoch [11/50] batch [20/392] time 0.427 (0.450) data 0.000 (0.018) loss 2.6108 (1.3489) lr 1.9511e-03 eta 1:57:33
epoch [11/50] batch [40/392] time 0.434 (0.442) data 0.000 (0.009) loss 0.3559 (1.3576) lr 1.9511e-03 eta 1:55:06
epoch [11/50] batch [60/392] time 0.427 (0.439) data 0.000 (0.006) loss 2.0260 (1.2867) lr 1.9511e-03 eta 1:54:13
epoch [11/50] batch [80/392] time 0.424 (0.437) data 0.000 (0.005) loss 0.6557 (1.2597) lr 1.9511e-03 eta 1:53:41
epoch [11/50] batch [100/392] time 0.426 (0.436) data 0.000 (0.004) loss 1.1378 (1.2463) lr 1.9511e-03 eta 1:53:20
epoch [11/50] batch [120/392] time 0.432 (0.436) data 0.000 (0.003) loss 1.5560 (1.2938) lr 1.9511e-03 eta 1:53:04
epoch [11/50] batch [140/392] time 0.432 (0.436) data 0.000 (0.003) loss 0.4545 (1.2901) lr 1.9511e-03 eta 1:52:48
epoch [11/50] batch [160/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.2968 (1.2854) lr 1.9511e-03 eta 1:52:32
epoch [11/50] batch [180/392] time 0.427 (0.435) data 0.000 (0.002) loss 0.5550 (1.2538) lr 1.9511e-03 eta 1:52:21
epoch [11/50] batch [200/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.2947 (1.2236) lr 1.9511e-03 eta 1:52:10
epoch [11/50] batch [220/392] time 0.437 (0.435) data 0.000 (0.002) loss 1.8988 (1.2163) lr 1.9511e-03 eta 1:52:02
epoch [11/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.0798 (1.2279) lr 1.9511e-03 eta 1:51:51
epoch [11/50] batch [260/392] time 0.432 (0.435) data 0.000 (0.002) loss 0.2042 (1.2166) lr 1.9511e-03 eta 1:51:40
epoch [11/50] batch [280/392] time 0.427 (0.434) data 0.000 (0.001) loss 2.4077 (1.2222) lr 1.9511e-03 eta 1:51:30
epoch [11/50] batch [300/392] time 0.429 (0.434) data 0.000 (0.001) loss 1.0970 (1.2261) lr 1.9511e-03 eta 1:51:19
epoch [11/50] batch [320/392] time 0.427 (0.434) data 0.000 (0.001) loss 3.7123 (1.2267) lr 1.9511e-03 eta 1:51:09
epoch [11/50] batch [340/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.3840 (1.2195) lr 1.9511e-03 eta 1:50:59
epoch [11/50] batch [360/392] time 0.433 (0.434) data 0.000 (0.001) loss 1.7348 (1.2164) lr 1.9511e-03 eta 1:50:50
epoch [11/50] batch [380/392] time 0.422 (0.434) data 0.000 (0.001) loss 1.8257 (1.2041) lr 1.9511e-03 eta 1:50:39
epoch [12/50] batch [20/392] time 0.426 (0.452) data 0.000 (0.019) loss 1.6287 (1.3264) lr 1.9298e-03 eta 1:54:56
epoch [12/50] batch [40/392] time 0.438 (0.443) data 0.000 (0.009) loss 1.1515 (1.1903) lr 1.9298e-03 eta 1:52:31
epoch [12/50] batch [60/392] time 0.434 (0.440) data 0.000 (0.006) loss 3.2079 (1.2628) lr 1.9298e-03 eta 1:51:41
epoch [12/50] batch [80/392] time 0.434 (0.438) data 0.000 (0.005) loss 1.1359 (1.2916) lr 1.9298e-03 eta 1:51:07
epoch [12/50] batch [100/392] time 0.428 (0.438) data 0.000 (0.004) loss 1.1969 (1.3022) lr 1.9298e-03 eta 1:50:45
epoch [12/50] batch [120/392] time 0.429 (0.437) data 0.000 (0.003) loss 2.5144 (1.2656) lr 1.9298e-03 eta 1:50:25
epoch [12/50] batch [140/392] time 0.433 (0.436) data 0.000 (0.003) loss 0.3298 (1.2552) lr 1.9298e-03 eta 1:50:09
epoch [12/50] batch [160/392] time 0.432 (0.436) data 0.000 (0.002) loss 1.5400 (1.2310) lr 1.9298e-03 eta 1:49:56
epoch [12/50] batch [180/392] time 0.426 (0.436) data 0.000 (0.002) loss 0.2092 (1.2216) lr 1.9298e-03 eta 1:49:42
epoch [12/50] batch [200/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.3844 (1.2243) lr 1.9298e-03 eta 1:49:30
epoch [12/50] batch [220/392] time 0.437 (0.435) data 0.000 (0.002) loss 1.5565 (1.2294) lr 1.9298e-03 eta 1:49:20
epoch [12/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.1948 (1.2249) lr 1.9298e-03 eta 1:49:09
epoch [12/50] batch [260/392] time 0.424 (0.435) data 0.000 (0.002) loss 1.3618 (1.2171) lr 1.9298e-03 eta 1:48:59
epoch [12/50] batch [280/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.4880 (1.2235) lr 1.9298e-03 eta 1:48:49
epoch [12/50] batch [300/392] time 0.437 (0.435) data 0.000 (0.001) loss 1.1940 (1.2128) lr 1.9298e-03 eta 1:48:40
epoch [12/50] batch [320/392] time 0.427 (0.435) data 0.000 (0.001) loss 1.9340 (1.2007) lr 1.9298e-03 eta 1:48:32
epoch [12/50] batch [340/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.0254 (1.2039) lr 1.9298e-03 eta 1:48:22
epoch [12/50] batch [360/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.5293 (1.1892) lr 1.9298e-03 eta 1:48:13
epoch [12/50] batch [380/392] time 0.420 (0.435) data 0.000 (0.001) loss 1.5535 (1.1939) lr 1.9298e-03 eta 1:48:02
epoch [13/50] batch [20/392] time 0.434 (0.451) data 0.000 (0.018) loss 0.3926 (0.8835) lr 1.9048e-03 eta 1:51:56
epoch [13/50] batch [40/392] time 0.438 (0.443) data 0.000 (0.009) loss 1.6812 (1.0741) lr 1.9048e-03 eta 1:49:40
epoch [13/50] batch [60/392] time 0.437 (0.440) data 0.000 (0.006) loss 0.7728 (1.0278) lr 1.9048e-03 eta 1:48:44
epoch [13/50] batch [80/392] time 0.442 (0.438) data 0.000 (0.005) loss 1.6408 (1.0340) lr 1.9048e-03 eta 1:48:14
epoch [13/50] batch [100/392] time 0.428 (0.437) data 0.000 (0.004) loss 1.5473 (1.0529) lr 1.9048e-03 eta 1:47:50
epoch [13/50] batch [120/392] time 0.437 (0.437) data 0.000 (0.003) loss 1.1585 (1.0576) lr 1.9048e-03 eta 1:47:31
epoch [13/50] batch [140/392] time 0.437 (0.436) data 0.000 (0.003) loss 1.5234 (1.1156) lr 1.9048e-03 eta 1:47:17
epoch [13/50] batch [160/392] time 0.433 (0.436) data 0.000 (0.002) loss 2.5521 (1.1265) lr 1.9048e-03 eta 1:47:05
epoch [13/50] batch [180/392] time 0.428 (0.436) data 0.000 (0.002) loss 1.1103 (1.1531) lr 1.9048e-03 eta 1:46:52
epoch [13/50] batch [200/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.7957 (1.1793) lr 1.9048e-03 eta 1:46:41
epoch [13/50] batch [220/392] time 0.433 (0.435) data 0.000 (0.002) loss 0.8181 (1.1868) lr 1.9048e-03 eta 1:46:29
epoch [13/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.5068 (1.1805) lr 1.9048e-03 eta 1:46:19
epoch [13/50] batch [260/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.6308 (1.1834) lr 1.9048e-03 eta 1:46:08
epoch [13/50] batch [280/392] time 0.433 (0.435) data 0.000 (0.001) loss 0.3371 (1.1756) lr 1.9048e-03 eta 1:45:58
epoch [13/50] batch [300/392] time 0.440 (0.435) data 0.000 (0.001) loss 1.0550 (1.1704) lr 1.9048e-03 eta 1:45:48
epoch [13/50] batch [320/392] time 0.428 (0.435) data 0.000 (0.001) loss 0.1371 (1.1713) lr 1.9048e-03 eta 1:45:39
epoch [13/50] batch [340/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.4237 (1.1623) lr 1.9048e-03 eta 1:45:31
epoch [13/50] batch [360/392] time 0.434 (0.435) data 0.000 (0.001) loss 1.6596 (1.1622) lr 1.9048e-03 eta 1:45:20
epoch [13/50] batch [380/392] time 0.428 (0.435) data 0.000 (0.001) loss 1.1530 (1.1713) lr 1.9048e-03 eta 1:45:11
epoch [14/50] batch [20/392] time 0.429 (0.452) data 0.000 (0.018) loss 1.3228 (1.1989) lr 1.8763e-03 eta 1:49:00
epoch [14/50] batch [40/392] time 0.432 (0.442) data 0.000 (0.009) loss 0.0214 (1.0306) lr 1.8763e-03 eta 1:46:38
epoch [14/50] batch [60/392] time 0.433 (0.439) data 0.000 (0.006) loss 0.5545 (1.0492) lr 1.8763e-03 eta 1:45:41
epoch [14/50] batch [80/392] time 0.433 (0.438) data 0.000 (0.005) loss 1.6818 (1.1268) lr 1.8763e-03 eta 1:45:15
epoch [14/50] batch [100/392] time 0.422 (0.437) data 0.000 (0.004) loss 2.3280 (1.1472) lr 1.8763e-03 eta 1:44:50
epoch [14/50] batch [120/392] time 0.438 (0.436) data 0.000 (0.003) loss 1.7357 (1.1483) lr 1.8763e-03 eta 1:44:34
epoch [14/50] batch [140/392] time 0.439 (0.436) data 0.000 (0.003) loss 2.0473 (1.1666) lr 1.8763e-03 eta 1:44:20
epoch [14/50] batch [160/392] time 0.433 (0.436) data 0.000 (0.002) loss 0.8634 (1.1469) lr 1.8763e-03 eta 1:44:07
epoch [14/50] batch [180/392] time 0.437 (0.435) data 0.000 (0.002) loss 2.2462 (1.1545) lr 1.8763e-03 eta 1:43:55
epoch [14/50] batch [200/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.3807 (1.1496) lr 1.8763e-03 eta 1:43:42
epoch [14/50] batch [220/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.0172 (1.1328) lr 1.8763e-03 eta 1:43:30
epoch [14/50] batch [240/392] time 0.432 (0.435) data 0.000 (0.002) loss 1.3954 (1.1551) lr 1.8763e-03 eta 1:43:19
epoch [14/50] batch [260/392] time 0.441 (0.434) data 0.000 (0.002) loss 1.6341 (1.1618) lr 1.8763e-03 eta 1:43:07
epoch [14/50] batch [280/392] time 0.432 (0.434) data 0.000 (0.001) loss 1.8803 (1.1649) lr 1.8763e-03 eta 1:42:58
epoch [14/50] batch [300/392] time 0.432 (0.434) data 0.000 (0.001) loss 0.3195 (1.1790) lr 1.8763e-03 eta 1:42:47
epoch [14/50] batch [320/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.1574 (1.1786) lr 1.8763e-03 eta 1:42:39
epoch [14/50] batch [340/392] time 0.434 (0.434) data 0.000 (0.001) loss 0.4536 (1.1831) lr 1.8763e-03 eta 1:42:31
epoch [14/50] batch [360/392] time 0.439 (0.434) data 0.000 (0.001) loss 1.5827 (1.1954) lr 1.8763e-03 eta 1:42:22
epoch [14/50] batch [380/392] time 0.432 (0.434) data 0.000 (0.001) loss 0.4286 (1.1850) lr 1.8763e-03 eta 1:42:13
epoch [15/50] batch [20/392] time 0.437 (0.453) data 0.000 (0.018) loss 1.4418 (1.0721) lr 1.8443e-03 eta 1:46:25
epoch [15/50] batch [40/392] time 0.429 (0.442) data 0.000 (0.009) loss 1.1233 (1.2185) lr 1.8443e-03 eta 1:43:44
epoch [15/50] batch [60/392] time 0.423 (0.439) data 0.000 (0.006) loss 1.8714 (1.1936) lr 1.8443e-03 eta 1:42:47
epoch [15/50] batch [80/392] time 0.434 (0.438) data 0.000 (0.005) loss 2.3408 (1.2160) lr 1.8443e-03 eta 1:42:21
epoch [15/50] batch [100/392] time 0.437 (0.437) data 0.000 (0.004) loss 1.8609 (1.2504) lr 1.8443e-03 eta 1:42:00
epoch [15/50] batch [120/392] time 0.438 (0.436) data 0.000 (0.003) loss 1.0788 (1.2502) lr 1.8443e-03 eta 1:41:40
epoch [15/50] batch [140/392] time 0.433 (0.436) data 0.000 (0.003) loss 1.7202 (1.2275) lr 1.8443e-03 eta 1:41:26
epoch [15/50] batch [160/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.8527 (1.1958) lr 1.8443e-03 eta 1:41:14
epoch [15/50] batch [180/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.3044 (1.2027) lr 1.8443e-03 eta 1:41:02
epoch [15/50] batch [200/392] time 0.439 (0.435) data 0.000 (0.002) loss 1.2481 (1.1996) lr 1.8443e-03 eta 1:40:52
epoch [15/50] batch [220/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.2170 (1.1876) lr 1.8443e-03 eta 1:40:40
epoch [15/50] batch [240/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.1604 (1.1968) lr 1.8443e-03 eta 1:40:31
epoch [15/50] batch [260/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.2179 (1.1743) lr 1.8443e-03 eta 1:40:21
epoch [15/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.5781 (1.1712) lr 1.8443e-03 eta 1:40:11
epoch [15/50] batch [300/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.6189 (1.1671) lr 1.8443e-03 eta 1:40:00
epoch [15/50] batch [320/392] time 0.437 (0.434) data 0.000 (0.001) loss 2.4264 (1.1830) lr 1.8443e-03 eta 1:39:51
epoch [15/50] batch [340/392] time 0.438 (0.434) data 0.000 (0.001) loss 0.4406 (1.1834) lr 1.8443e-03 eta 1:39:42
epoch [15/50] batch [360/392] time 0.439 (0.434) data 0.000 (0.001) loss 0.6619 (1.1763) lr 1.8443e-03 eta 1:39:32
epoch [15/50] batch [380/392] time 0.432 (0.434) data 0.000 (0.001) loss 1.5837 (1.1780) lr 1.8443e-03 eta 1:39:23
epoch [16/50] batch [20/392] time 0.428 (0.454) data 0.000 (0.018) loss 1.6996 (1.4124) lr 1.8090e-03 eta 1:43:35
epoch [16/50] batch [40/392] time 0.438 (0.444) data 0.000 (0.009) loss 0.4247 (1.1579) lr 1.8090e-03 eta 1:41:18
epoch [16/50] batch [60/392] time 0.424 (0.440) data 0.000 (0.006) loss 0.4376 (1.1030) lr 1.8090e-03 eta 1:40:16
epoch [16/50] batch [80/392] time 0.433 (0.439) data 0.000 (0.005) loss 0.9845 (1.1601) lr 1.8090e-03 eta 1:39:42
epoch [16/50] batch [100/392] time 0.438 (0.438) data 0.000 (0.004) loss 0.9169 (1.2201) lr 1.8090e-03 eta 1:39:20
epoch [16/50] batch [120/392] time 0.438 (0.437) data 0.000 (0.003) loss 1.8583 (1.2212) lr 1.8090e-03 eta 1:39:02
epoch [16/50] batch [140/392] time 0.429 (0.437) data 0.000 (0.003) loss 1.1976 (1.2376) lr 1.8090e-03 eta 1:38:48
epoch [16/50] batch [160/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.6382 (1.1987) lr 1.8090e-03 eta 1:38:33
epoch [16/50] batch [180/392] time 0.437 (0.436) data 0.000 (0.002) loss 1.2567 (1.1883) lr 1.8090e-03 eta 1:38:20
epoch [16/50] batch [200/392] time 0.426 (0.436) data 0.000 (0.002) loss 1.5628 (1.2066) lr 1.8090e-03 eta 1:38:08
epoch [16/50] batch [220/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.1739 (1.2159) lr 1.8090e-03 eta 1:37:58
epoch [16/50] batch [240/392] time 0.436 (0.435) data 0.000 (0.002) loss 0.8146 (1.1945) lr 1.8090e-03 eta 1:37:47
epoch [16/50] batch [260/392] time 0.427 (0.435) data 0.000 (0.002) loss 0.6727 (1.1757) lr 1.8090e-03 eta 1:37:35
epoch [16/50] batch [280/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.2826 (1.1664) lr 1.8090e-03 eta 1:37:24
epoch [16/50] batch [300/392] time 0.436 (0.435) data 0.000 (0.001) loss 1.0610 (1.1563) lr 1.8090e-03 eta 1:37:14
epoch [16/50] batch [320/392] time 0.427 (0.435) data 0.000 (0.001) loss 0.5768 (1.1526) lr 1.8090e-03 eta 1:37:03
epoch [16/50] batch [340/392] time 0.422 (0.435) data 0.000 (0.001) loss 1.2423 (1.1657) lr 1.8090e-03 eta 1:36:54
epoch [16/50] batch [360/392] time 0.435 (0.434) data 0.000 (0.001) loss 0.5788 (1.1636) lr 1.8090e-03 eta 1:36:43
epoch [16/50] batch [380/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.3910 (1.1673) lr 1.8090e-03 eta 1:36:34
epoch [17/50] batch [20/392] time 0.437 (0.451) data 0.000 (0.018) loss 1.4308 (0.9556) lr 1.7705e-03 eta 1:40:06
epoch [17/50] batch [40/392] time 0.438 (0.442) data 0.000 (0.009) loss 1.4225 (0.9747) lr 1.7705e-03 eta 1:37:54
epoch [17/50] batch [60/392] time 0.428 (0.440) data 0.000 (0.006) loss 0.9617 (1.0614) lr 1.7705e-03 eta 1:37:11
epoch [17/50] batch [80/392] time 0.429 (0.438) data 0.000 (0.005) loss 1.3141 (1.1096) lr 1.7705e-03 eta 1:36:45
epoch [17/50] batch [100/392] time 0.429 (0.437) data 0.000 (0.004) loss 3.4088 (1.1574) lr 1.7705e-03 eta 1:36:25
epoch [17/50] batch [120/392] time 0.439 (0.437) data 0.000 (0.003) loss 0.7306 (1.1122) lr 1.7705e-03 eta 1:36:08
epoch [17/50] batch [140/392] time 0.439 (0.436) data 0.000 (0.003) loss 0.7330 (1.0945) lr 1.7705e-03 eta 1:35:56
epoch [17/50] batch [160/392] time 0.437 (0.436) data 0.000 (0.002) loss 1.3504 (1.1149) lr 1.7705e-03 eta 1:35:43
epoch [17/50] batch [180/392] time 0.424 (0.436) data 0.000 (0.002) loss 1.4153 (1.1127) lr 1.7705e-03 eta 1:35:31
epoch [17/50] batch [200/392] time 0.428 (0.436) data 0.000 (0.002) loss 0.6725 (1.1862) lr 1.7705e-03 eta 1:35:19
epoch [17/50] batch [220/392] time 0.431 (0.435) data 0.000 (0.002) loss 0.3180 (1.1873) lr 1.7705e-03 eta 1:35:06
epoch [17/50] batch [240/392] time 0.434 (0.435) data 0.000 (0.002) loss 1.7759 (1.1994) lr 1.7705e-03 eta 1:34:55
epoch [17/50] batch [260/392] time 0.427 (0.435) data 0.000 (0.002) loss 0.8519 (1.2000) lr 1.7705e-03 eta 1:34:44
epoch [17/50] batch [280/392] time 0.429 (0.435) data 0.000 (0.001) loss 1.3596 (1.2040) lr 1.7705e-03 eta 1:34:32
epoch [17/50] batch [300/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.6203 (1.2091) lr 1.7705e-03 eta 1:34:21
epoch [17/50] batch [320/392] time 0.438 (0.434) data 0.000 (0.001) loss 0.7510 (1.2153) lr 1.7705e-03 eta 1:34:11
epoch [17/50] batch [340/392] time 0.434 (0.434) data 0.000 (0.001) loss 2.9886 (1.2259) lr 1.7705e-03 eta 1:34:01
epoch [17/50] batch [360/392] time 0.429 (0.434) data 0.000 (0.001) loss 0.8273 (1.2373) lr 1.7705e-03 eta 1:33:52
epoch [17/50] batch [380/392] time 0.437 (0.434) data 0.000 (0.001) loss 2.3557 (1.2363) lr 1.7705e-03 eta 1:33:43
epoch [18/50] batch [20/392] time 0.437 (0.451) data 0.000 (0.017) loss 2.2980 (1.2991) lr 1.7290e-03 eta 1:36:59
epoch [18/50] batch [40/392] time 0.428 (0.441) data 0.000 (0.009) loss 2.2530 (1.2785) lr 1.7290e-03 eta 1:34:51
epoch [18/50] batch [60/392] time 0.438 (0.439) data 0.000 (0.006) loss 1.2442 (1.1994) lr 1.7290e-03 eta 1:34:13
epoch [18/50] batch [80/392] time 0.434 (0.438) data 0.000 (0.004) loss 0.5879 (1.1402) lr 1.7290e-03 eta 1:33:46
epoch [18/50] batch [100/392] time 0.423 (0.437) data 0.000 (0.004) loss 1.5316 (1.1526) lr 1.7290e-03 eta 1:33:23
epoch [18/50] batch [120/392] time 0.429 (0.436) data 0.000 (0.003) loss 0.6416 (1.1554) lr 1.7290e-03 eta 1:33:08
epoch [18/50] batch [140/392] time 0.434 (0.436) data 0.000 (0.003) loss 0.8612 (1.1474) lr 1.7290e-03 eta 1:32:53
epoch [18/50] batch [160/392] time 0.440 (0.435) data 0.000 (0.002) loss 0.6444 (1.1736) lr 1.7290e-03 eta 1:32:42
epoch [18/50] batch [180/392] time 0.436 (0.435) data 0.000 (0.002) loss 2.5709 (1.1513) lr 1.7290e-03 eta 1:32:31
epoch [18/50] batch [200/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.6462 (1.1531) lr 1.7290e-03 eta 1:32:20
epoch [18/50] batch [220/392] time 0.423 (0.435) data 0.000 (0.002) loss 1.3865 (1.1504) lr 1.7290e-03 eta 1:32:09
epoch [18/50] batch [240/392] time 0.428 (0.435) data 0.000 (0.002) loss 2.0741 (1.1490) lr 1.7290e-03 eta 1:32:00
epoch [18/50] batch [260/392] time 0.434 (0.435) data 0.000 (0.001) loss 0.4100 (1.1453) lr 1.7290e-03 eta 1:31:49
epoch [18/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.3518 (1.1454) lr 1.7290e-03 eta 1:31:39
epoch [18/50] batch [300/392] time 0.429 (0.435) data 0.000 (0.001) loss 1.1257 (1.1557) lr 1.7290e-03 eta 1:31:31
epoch [18/50] batch [320/392] time 0.427 (0.434) data 0.000 (0.001) loss 2.3748 (1.1582) lr 1.7290e-03 eta 1:31:20
epoch [18/50] batch [340/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.4126 (1.1509) lr 1.7290e-03 eta 1:31:11
epoch [18/50] batch [360/392] time 0.434 (0.434) data 0.000 (0.001) loss 0.5907 (1.1619) lr 1.7290e-03 eta 1:31:01
epoch [18/50] batch [380/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.0458 (1.1634) lr 1.7290e-03 eta 1:30:52
epoch [19/50] batch [20/392] time 0.437 (0.453) data 0.000 (0.018) loss 1.0654 (1.2592) lr 1.6845e-03 eta 1:34:32
epoch [19/50] batch [40/392] time 0.440 (0.443) data 0.000 (0.009) loss 0.2754 (1.1455) lr 1.6845e-03 eta 1:32:22
epoch [19/50] batch [60/392] time 0.438 (0.440) data 0.000 (0.006) loss 2.1966 (1.2358) lr 1.6845e-03 eta 1:31:35
epoch [19/50] batch [80/392] time 0.434 (0.439) data 0.000 (0.005) loss 1.2097 (1.2203) lr 1.6845e-03 eta 1:31:06
epoch [19/50] batch [100/392] time 0.438 (0.438) data 0.000 (0.004) loss 0.6432 (1.1879) lr 1.6845e-03 eta 1:30:49
epoch [19/50] batch [120/392] time 0.438 (0.437) data 0.000 (0.003) loss 0.6377 (1.1913) lr 1.6845e-03 eta 1:30:32
epoch [19/50] batch [140/392] time 0.437 (0.437) data 0.000 (0.003) loss 1.1669 (1.2312) lr 1.6845e-03 eta 1:30:18
epoch [19/50] batch [160/392] time 0.428 (0.436) data 0.000 (0.002) loss 0.6193 (1.2454) lr 1.6845e-03 eta 1:30:02
epoch [19/50] batch [180/392] time 0.434 (0.436) data 0.000 (0.002) loss 0.4414 (1.2454) lr 1.6845e-03 eta 1:29:51
epoch [19/50] batch [200/392] time 0.428 (0.436) data 0.000 (0.002) loss 1.1987 (1.2581) lr 1.6845e-03 eta 1:29:39
epoch [19/50] batch [220/392] time 0.427 (0.436) data 0.000 (0.002) loss 1.0163 (1.2708) lr 1.6845e-03 eta 1:29:28
epoch [19/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.3272 (1.2530) lr 1.6845e-03 eta 1:29:17
epoch [19/50] batch [260/392] time 0.426 (0.435) data 0.000 (0.002) loss 0.3109 (1.2449) lr 1.6845e-03 eta 1:29:07
epoch [19/50] batch [280/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.3076 (1.2426) lr 1.6845e-03 eta 1:28:58
epoch [19/50] batch [300/392] time 0.432 (0.435) data 0.000 (0.001) loss 0.1948 (1.2355) lr 1.6845e-03 eta 1:28:49
epoch [19/50] batch [320/392] time 0.427 (0.435) data 0.000 (0.001) loss 1.2936 (1.2385) lr 1.6845e-03 eta 1:28:39
epoch [19/50] batch [340/392] time 0.434 (0.435) data 0.000 (0.001) loss 0.8121 (1.2473) lr 1.6845e-03 eta 1:28:28
epoch [19/50] batch [360/392] time 0.429 (0.435) data 0.000 (0.001) loss 1.5167 (1.2474) lr 1.6845e-03 eta 1:28:18
epoch [19/50] batch [380/392] time 0.426 (0.435) data 0.000 (0.001) loss 2.5972 (1.2392) lr 1.6845e-03 eta 1:28:07
epoch [20/50] batch [20/392] time 0.433 (0.452) data 0.000 (0.018) loss 0.9857 (1.0864) lr 1.6374e-03 eta 1:31:20
epoch [20/50] batch [40/392] time 0.437 (0.443) data 0.000 (0.009) loss 1.3094 (1.0842) lr 1.6374e-03 eta 1:29:26
epoch [20/50] batch [60/392] time 0.428 (0.440) data 0.000 (0.006) loss 0.9463 (1.1789) lr 1.6374e-03 eta 1:28:39
epoch [20/50] batch [80/392] time 0.427 (0.439) data 0.000 (0.005) loss 0.2084 (1.1449) lr 1.6374e-03 eta 1:28:16
epoch [20/50] batch [100/392] time 0.438 (0.438) data 0.000 (0.004) loss 1.0369 (1.1952) lr 1.6374e-03 eta 1:27:53
epoch [20/50] batch [120/392] time 0.438 (0.437) data 0.000 (0.003) loss 0.4269 (1.1514) lr 1.6374e-03 eta 1:27:37
epoch [20/50] batch [140/392] time 0.428 (0.437) data 0.000 (0.003) loss 0.3373 (1.1587) lr 1.6374e-03 eta 1:27:23
epoch [20/50] batch [160/392] time 0.437 (0.436) data 0.000 (0.002) loss 0.7179 (1.1686) lr 1.6374e-03 eta 1:27:12
epoch [20/50] batch [180/392] time 0.436 (0.436) data 0.000 (0.002) loss 0.4690 (1.1492) lr 1.6374e-03 eta 1:26:59
epoch [20/50] batch [200/392] time 0.422 (0.436) data 0.000 (0.002) loss 0.4314 (1.1425) lr 1.6374e-03 eta 1:26:46
epoch [20/50] batch [220/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.2958 (1.1458) lr 1.6374e-03 eta 1:26:35
epoch [20/50] batch [240/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.4624 (1.1358) lr 1.6374e-03 eta 1:26:24
epoch [20/50] batch [260/392] time 0.424 (0.435) data 0.000 (0.002) loss 1.5444 (1.1351) lr 1.6374e-03 eta 1:26:13
epoch [20/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.0713 (1.1551) lr 1.6374e-03 eta 1:26:03
epoch [20/50] batch [300/392] time 0.424 (0.435) data 0.000 (0.001) loss 1.3925 (1.1587) lr 1.6374e-03 eta 1:25:53
epoch [20/50] batch [320/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.5405 (1.1636) lr 1.6374e-03 eta 1:25:43
epoch [20/50] batch [340/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.2529 (1.1605) lr 1.6374e-03 eta 1:25:34
epoch [20/50] batch [360/392] time 0.432 (0.435) data 0.000 (0.001) loss 2.6256 (1.1694) lr 1.6374e-03 eta 1:25:25
epoch [20/50] batch [380/392] time 0.436 (0.435) data 0.000 (0.001) loss 0.5309 (1.1628) lr 1.6374e-03 eta 1:25:15
epoch [21/50] batch [20/392] time 0.423 (0.451) data 0.000 (0.017) loss 1.6380 (1.2806) lr 1.5878e-03 eta 1:28:13
epoch [21/50] batch [40/392] time 0.437 (0.442) data 0.000 (0.009) loss 2.1729 (1.3254) lr 1.5878e-03 eta 1:26:22
epoch [21/50] batch [60/392] time 0.438 (0.439) data 0.000 (0.006) loss 0.2453 (1.2716) lr 1.5878e-03 eta 1:25:34
epoch [21/50] batch [80/392] time 0.433 (0.438) data 0.000 (0.004) loss 1.2887 (1.2012) lr 1.5878e-03 eta 1:25:11
epoch [21/50] batch [100/392] time 0.428 (0.437) data 0.000 (0.004) loss 1.7056 (1.2244) lr 1.5878e-03 eta 1:24:55
epoch [21/50] batch [120/392] time 0.427 (0.436) data 0.000 (0.003) loss 2.1931 (1.2259) lr 1.5878e-03 eta 1:24:39
epoch [21/50] batch [140/392] time 0.427 (0.436) data 0.000 (0.003) loss 0.9557 (1.2002) lr 1.5878e-03 eta 1:24:24
epoch [21/50] batch [160/392] time 0.437 (0.436) data 0.000 (0.002) loss 1.3664 (1.2037) lr 1.5878e-03 eta 1:24:13
epoch [21/50] batch [180/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.3000 (1.1761) lr 1.5878e-03 eta 1:24:01
epoch [21/50] batch [200/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.7743 (1.1580) lr 1.5878e-03 eta 1:23:50
epoch [21/50] batch [220/392] time 0.437 (0.435) data 0.000 (0.002) loss 1.5841 (1.1558) lr 1.5878e-03 eta 1:23:39
epoch [21/50] batch [240/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.5365 (1.1469) lr 1.5878e-03 eta 1:23:28
epoch [21/50] batch [260/392] time 0.422 (0.435) data 0.000 (0.001) loss 1.3029 (1.1341) lr 1.5878e-03 eta 1:23:17
epoch [21/50] batch [280/392] time 0.424 (0.434) data 0.000 (0.001) loss 0.8010 (1.1613) lr 1.5878e-03 eta 1:23:07
epoch [21/50] batch [300/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.5156 (1.1561) lr 1.5878e-03 eta 1:22:58
epoch [21/50] batch [320/392] time 0.427 (0.434) data 0.000 (0.001) loss 1.1055 (1.1415) lr 1.5878e-03 eta 1:22:48
epoch [21/50] batch [340/392] time 0.434 (0.434) data 0.000 (0.001) loss 1.4453 (1.1329) lr 1.5878e-03 eta 1:22:39
epoch [21/50] batch [360/392] time 0.423 (0.434) data 0.000 (0.001) loss 0.9191 (1.1321) lr 1.5878e-03 eta 1:22:29
epoch [21/50] batch [380/392] time 0.436 (0.434) data 0.000 (0.001) loss 1.3637 (1.1376) lr 1.5878e-03 eta 1:22:20
epoch [22/50] batch [20/392] time 0.437 (0.453) data 0.000 (0.018) loss 1.3528 (1.2139) lr 1.5358e-03 eta 1:25:36
epoch [22/50] batch [40/392] time 0.431 (0.443) data 0.000 (0.009) loss 1.4212 (1.0330) lr 1.5358e-03 eta 1:23:34
epoch [22/50] batch [60/392] time 0.423 (0.440) data 0.000 (0.006) loss 0.8459 (1.1089) lr 1.5358e-03 eta 1:22:50
epoch [22/50] batch [80/392] time 0.438 (0.438) data 0.000 (0.005) loss 1.1867 (1.0465) lr 1.5358e-03 eta 1:22:23
epoch [22/50] batch [100/392] time 0.435 (0.437) data 0.000 (0.004) loss 1.3525 (1.0784) lr 1.5358e-03 eta 1:22:03
epoch [22/50] batch [120/392] time 0.438 (0.436) data 0.000 (0.003) loss 0.2925 (1.0539) lr 1.5358e-03 eta 1:21:49
epoch [22/50] batch [140/392] time 0.433 (0.436) data 0.000 (0.003) loss 1.7056 (1.0579) lr 1.5358e-03 eta 1:21:36
epoch [22/50] batch [160/392] time 0.438 (0.436) data 0.000 (0.002) loss 1.6923 (1.0587) lr 1.5358e-03 eta 1:21:23
epoch [22/50] batch [180/392] time 0.422 (0.435) data 0.000 (0.002) loss 2.1411 (1.1206) lr 1.5358e-03 eta 1:21:09
epoch [22/50] batch [200/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.9470 (1.1156) lr 1.5358e-03 eta 1:20:58
epoch [22/50] batch [220/392] time 0.424 (0.435) data 0.000 (0.002) loss 1.4928 (1.1340) lr 1.5358e-03 eta 1:20:48
epoch [22/50] batch [240/392] time 0.439 (0.435) data 0.000 (0.002) loss 1.0690 (1.1074) lr 1.5358e-03 eta 1:20:38
epoch [22/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.1485 (1.1125) lr 1.5358e-03 eta 1:20:29
epoch [22/50] batch [280/392] time 0.429 (0.435) data 0.000 (0.001) loss 1.2217 (1.1242) lr 1.5358e-03 eta 1:20:19
epoch [22/50] batch [300/392] time 0.428 (0.435) data 0.000 (0.001) loss 2.7687 (1.1525) lr 1.5358e-03 eta 1:20:09
epoch [22/50] batch [320/392] time 0.428 (0.434) data 0.000 (0.001) loss 1.4404 (1.1503) lr 1.5358e-03 eta 1:20:00
epoch [22/50] batch [340/392] time 0.429 (0.434) data 0.000 (0.001) loss 1.2617 (1.1441) lr 1.5358e-03 eta 1:19:50
epoch [22/50] batch [360/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.1778 (1.1552) lr 1.5358e-03 eta 1:19:41
epoch [22/50] batch [380/392] time 0.422 (0.434) data 0.000 (0.001) loss 1.2328 (1.1626) lr 1.5358e-03 eta 1:19:31
epoch [23/50] batch [20/392] time 0.435 (0.453) data 0.000 (0.018) loss 0.3976 (1.0687) lr 1.4818e-03 eta 1:22:43
epoch [23/50] batch [40/392] time 0.428 (0.444) data 0.000 (0.009) loss 2.5192 (1.1525) lr 1.4818e-03 eta 1:20:50
epoch [23/50] batch [60/392] time 0.437 (0.440) data 0.000 (0.006) loss 0.6296 (1.1546) lr 1.4818e-03 eta 1:20:05
epoch [23/50] batch [80/392] time 0.437 (0.439) data 0.000 (0.005) loss 1.9256 (1.1942) lr 1.4818e-03 eta 1:19:40
epoch [23/50] batch [100/392] time 0.435 (0.438) data 0.000 (0.004) loss 1.5959 (1.2008) lr 1.4818e-03 eta 1:19:18
epoch [23/50] batch [120/392] time 0.437 (0.437) data 0.000 (0.003) loss 0.5514 (1.1713) lr 1.4818e-03 eta 1:19:03
epoch [23/50] batch [140/392] time 0.429 (0.436) data 0.000 (0.003) loss 0.5842 (1.1504) lr 1.4818e-03 eta 1:18:49
epoch [23/50] batch [160/392] time 0.438 (0.436) data 0.000 (0.002) loss 1.2376 (1.1477) lr 1.4818e-03 eta 1:18:38
epoch [23/50] batch [180/392] time 0.434 (0.436) data 0.000 (0.002) loss 0.6033 (1.1640) lr 1.4818e-03 eta 1:18:27
epoch [23/50] batch [200/392] time 0.427 (0.436) data 0.000 (0.002) loss 0.7642 (1.1826) lr 1.4818e-03 eta 1:18:15
epoch [23/50] batch [220/392] time 0.436 (0.436) data 0.000 (0.002) loss 1.0403 (1.1776) lr 1.4818e-03 eta 1:18:05
epoch [23/50] batch [240/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.7642 (1.2026) lr 1.4818e-03 eta 1:17:53
epoch [23/50] batch [260/392] time 0.428 (0.435) data 0.000 (0.002) loss 2.1139 (1.2155) lr 1.4818e-03 eta 1:17:43
epoch [23/50] batch [280/392] time 0.427 (0.435) data 0.000 (0.001) loss 1.1754 (1.2300) lr 1.4818e-03 eta 1:17:32
epoch [23/50] batch [300/392] time 0.427 (0.435) data 0.000 (0.001) loss 1.3297 (1.2160) lr 1.4818e-03 eta 1:17:22
epoch [23/50] batch [320/392] time 0.434 (0.435) data 0.000 (0.001) loss 0.9132 (1.2076) lr 1.4818e-03 eta 1:17:13
epoch [23/50] batch [340/392] time 0.423 (0.435) data 0.000 (0.001) loss 0.2707 (1.2011) lr 1.4818e-03 eta 1:17:03
epoch [23/50] batch [360/392] time 0.428 (0.435) data 0.000 (0.001) loss 1.3957 (1.2021) lr 1.4818e-03 eta 1:16:53
epoch [23/50] batch [380/392] time 0.425 (0.434) data 0.000 (0.001) loss 1.3131 (1.1918) lr 1.4818e-03 eta 1:16:43
epoch [24/50] batch [20/392] time 0.428 (0.452) data 0.000 (0.018) loss 0.8619 (1.2481) lr 1.4258e-03 eta 1:19:36
epoch [24/50] batch [40/392] time 0.437 (0.442) data 0.000 (0.009) loss 0.7133 (1.1797) lr 1.4258e-03 eta 1:17:45
epoch [24/50] batch [60/392] time 0.437 (0.439) data 0.000 (0.006) loss 1.8850 (1.2294) lr 1.4258e-03 eta 1:17:03
epoch [24/50] batch [80/392] time 0.437 (0.438) data 0.000 (0.005) loss 1.0600 (1.2053) lr 1.4258e-03 eta 1:16:38
epoch [24/50] batch [100/392] time 0.437 (0.437) data 0.000 (0.004) loss 1.0573 (1.1864) lr 1.4258e-03 eta 1:16:19
epoch [24/50] batch [120/392] time 0.437 (0.436) data 0.000 (0.003) loss 1.0928 (1.1503) lr 1.4258e-03 eta 1:16:03
epoch [24/50] batch [140/392] time 0.437 (0.436) data 0.000 (0.003) loss 2.3361 (1.1476) lr 1.4258e-03 eta 1:15:50
epoch [24/50] batch [160/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.6150 (1.1305) lr 1.4258e-03 eta 1:15:36
epoch [24/50] batch [180/392] time 0.438 (0.435) data 0.000 (0.002) loss 2.4720 (1.1424) lr 1.4258e-03 eta 1:15:26
epoch [24/50] batch [200/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.7846 (1.1353) lr 1.4258e-03 eta 1:15:16
epoch [24/50] batch [220/392] time 0.428 (0.435) data 0.000 (0.002) loss 2.4155 (1.1336) lr 1.4258e-03 eta 1:15:06
epoch [24/50] batch [240/392] time 0.433 (0.435) data 0.000 (0.002) loss 0.6462 (1.1318) lr 1.4258e-03 eta 1:14:56
epoch [24/50] batch [260/392] time 0.427 (0.435) data 0.000 (0.002) loss 2.0244 (1.1121) lr 1.4258e-03 eta 1:14:46
epoch [24/50] batch [280/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.1215 (1.1207) lr 1.4258e-03 eta 1:14:36
epoch [24/50] batch [300/392] time 0.428 (0.434) data 0.000 (0.001) loss 0.4805 (1.1307) lr 1.4258e-03 eta 1:14:27
epoch [24/50] batch [320/392] time 0.439 (0.434) data 0.000 (0.001) loss 1.2671 (1.1491) lr 1.4258e-03 eta 1:14:17
epoch [24/50] batch [340/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.5386 (1.1427) lr 1.4258e-03 eta 1:14:08
epoch [24/50] batch [360/392] time 0.439 (0.434) data 0.000 (0.001) loss 0.2143 (1.1620) lr 1.4258e-03 eta 1:13:58
epoch [24/50] batch [380/392] time 0.426 (0.434) data 0.000 (0.001) loss 1.1763 (1.1733) lr 1.4258e-03 eta 1:13:49
epoch [25/50] batch [20/392] time 0.433 (0.450) data 0.000 (0.018) loss 1.4991 (1.1747) lr 1.3681e-03 eta 1:16:21
epoch [25/50] batch [40/392] time 0.437 (0.442) data 0.000 (0.009) loss 1.5862 (1.1897) lr 1.3681e-03 eta 1:14:47
epoch [25/50] batch [60/392] time 0.427 (0.439) data 0.000 (0.006) loss 0.9320 (1.1691) lr 1.3681e-03 eta 1:14:08
epoch [25/50] batch [80/392] time 0.437 (0.438) data 0.000 (0.005) loss 0.3269 (1.1413) lr 1.3681e-03 eta 1:13:49
epoch [25/50] batch [100/392] time 0.438 (0.437) data 0.000 (0.004) loss 1.1561 (1.1392) lr 1.3681e-03 eta 1:13:32
epoch [25/50] batch [120/392] time 0.432 (0.436) data 0.000 (0.003) loss 0.3475 (1.1470) lr 1.3681e-03 eta 1:13:15
epoch [25/50] batch [140/392] time 0.438 (0.436) data 0.000 (0.003) loss 2.2218 (1.1166) lr 1.3681e-03 eta 1:13:02
epoch [25/50] batch [160/392] time 0.437 (0.436) data 0.000 (0.002) loss 1.6653 (1.1257) lr 1.3681e-03 eta 1:12:49
epoch [25/50] batch [180/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.8766 (1.1225) lr 1.3681e-03 eta 1:12:37
epoch [25/50] batch [200/392] time 0.439 (0.435) data 0.000 (0.002) loss 0.6747 (1.1029) lr 1.3681e-03 eta 1:12:28
epoch [25/50] batch [220/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.3633 (1.1178) lr 1.3681e-03 eta 1:12:16
epoch [25/50] batch [240/392] time 0.433 (0.435) data 0.000 (0.002) loss 0.8530 (1.1275) lr 1.3681e-03 eta 1:12:07
epoch [25/50] batch [260/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.0848 (1.1241) lr 1.3681e-03 eta 1:11:57
epoch [25/50] batch [280/392] time 0.428 (0.435) data 0.000 (0.001) loss 3.6259 (1.1245) lr 1.3681e-03 eta 1:11:47
epoch [25/50] batch [300/392] time 0.436 (0.434) data 0.000 (0.001) loss 1.2348 (1.1205) lr 1.3681e-03 eta 1:11:37
epoch [25/50] batch [320/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.6487 (1.1335) lr 1.3681e-03 eta 1:11:27
epoch [25/50] batch [340/392] time 0.426 (0.434) data 0.000 (0.001) loss 1.6188 (1.1433) lr 1.3681e-03 eta 1:11:18
epoch [25/50] batch [360/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.9346 (1.1479) lr 1.3681e-03 eta 1:11:08
epoch [25/50] batch [380/392] time 0.435 (0.434) data 0.000 (0.001) loss 0.7296 (1.1498) lr 1.3681e-03 eta 1:10:59
epoch [26/50] batch [20/392] time 0.432 (0.450) data 0.000 (0.017) loss 1.5977 (0.9575) lr 1.3090e-03 eta 1:13:19
epoch [26/50] batch [40/392] time 0.433 (0.441) data 0.000 (0.009) loss 1.7017 (1.1345) lr 1.3090e-03 eta 1:11:47
epoch [26/50] batch [60/392] time 0.438 (0.439) data 0.000 (0.006) loss 0.6916 (1.1827) lr 1.3090e-03 eta 1:11:13
epoch [26/50] batch [80/392] time 0.427 (0.437) data 0.000 (0.004) loss 1.2274 (1.1913) lr 1.3090e-03 eta 1:10:49
epoch [26/50] batch [100/392] time 0.433 (0.436) data 0.000 (0.004) loss 1.5386 (1.1962) lr 1.3090e-03 eta 1:10:33
epoch [26/50] batch [120/392] time 0.437 (0.436) data 0.000 (0.003) loss 0.5602 (1.2075) lr 1.3090e-03 eta 1:10:19
epoch [26/50] batch [140/392] time 0.439 (0.436) data 0.000 (0.003) loss 0.8587 (1.2012) lr 1.3090e-03 eta 1:10:07
epoch [26/50] batch [160/392] time 0.424 (0.435) data 0.000 (0.002) loss 0.7235 (1.1816) lr 1.3090e-03 eta 1:09:56
epoch [26/50] batch [180/392] time 0.434 (0.435) data 0.000 (0.002) loss 3.3605 (1.2059) lr 1.3090e-03 eta 1:09:46
epoch [26/50] batch [200/392] time 0.434 (0.435) data 0.000 (0.002) loss 0.5993 (1.1697) lr 1.3090e-03 eta 1:09:35
epoch [26/50] batch [220/392] time 0.439 (0.435) data 0.000 (0.002) loss 0.7798 (1.1688) lr 1.3090e-03 eta 1:09:26
epoch [26/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.2760 (1.1429) lr 1.3090e-03 eta 1:09:17
epoch [26/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.4436 (1.1614) lr 1.3090e-03 eta 1:09:08
epoch [26/50] batch [280/392] time 0.435 (0.435) data 0.000 (0.001) loss 1.0551 (1.1725) lr 1.3090e-03 eta 1:08:59
epoch [26/50] batch [300/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.9096 (1.1768) lr 1.3090e-03 eta 1:08:51
epoch [26/50] batch [320/392] time 0.439 (0.435) data 0.000 (0.001) loss 0.7656 (1.1727) lr 1.3090e-03 eta 1:08:42
epoch [26/50] batch [340/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.5125 (1.1756) lr 1.3090e-03 eta 1:08:33
epoch [26/50] batch [360/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.1665 (1.1668) lr 1.3090e-03 eta 1:08:24
epoch [26/50] batch [380/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.4173 (1.1572) lr 1.3090e-03 eta 1:08:15
epoch [27/50] batch [20/392] time 0.423 (0.450) data 0.000 (0.017) loss 2.8230 (1.3304) lr 1.2487e-03 eta 1:10:22
epoch [27/50] batch [40/392] time 0.439 (0.442) data 0.000 (0.009) loss 3.7826 (1.3552) lr 1.2487e-03 eta 1:09:00
epoch [27/50] batch [60/392] time 0.428 (0.439) data 0.000 (0.006) loss 0.5575 (1.2335) lr 1.2487e-03 eta 1:08:25
epoch [27/50] batch [80/392] time 0.426 (0.437) data 0.000 (0.004) loss 2.2314 (1.2152) lr 1.2487e-03 eta 1:08:00
epoch [27/50] batch [100/392] time 0.438 (0.436) data 0.000 (0.004) loss 0.6093 (1.2160) lr 1.2487e-03 eta 1:07:41
epoch [27/50] batch [120/392] time 0.428 (0.436) data 0.000 (0.003) loss 1.8218 (1.1820) lr 1.2487e-03 eta 1:07:28
epoch [27/50] batch [140/392] time 0.437 (0.435) data 0.000 (0.003) loss 2.1181 (1.2098) lr 1.2487e-03 eta 1:07:15
epoch [27/50] batch [160/392] time 0.436 (0.435) data 0.000 (0.002) loss 0.9388 (1.1980) lr 1.2487e-03 eta 1:07:02
epoch [27/50] batch [180/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.7219 (1.1621) lr 1.2487e-03 eta 1:06:51
epoch [27/50] batch [200/392] time 0.436 (0.435) data 0.000 (0.002) loss 0.9493 (1.1769) lr 1.2487e-03 eta 1:06:41
epoch [27/50] batch [220/392] time 0.432 (0.434) data 0.000 (0.002) loss 0.7931 (1.1786) lr 1.2487e-03 eta 1:06:32
epoch [27/50] batch [240/392] time 0.428 (0.434) data 0.000 (0.002) loss 0.6537 (1.1898) lr 1.2487e-03 eta 1:06:22
epoch [27/50] batch [260/392] time 0.439 (0.434) data 0.000 (0.001) loss 0.5336 (1.1881) lr 1.2487e-03 eta 1:06:12
epoch [27/50] batch [280/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.9491 (1.1929) lr 1.2487e-03 eta 1:06:03
epoch [27/50] batch [300/392] time 0.436 (0.434) data 0.000 (0.001) loss 0.7812 (1.1868) lr 1.2487e-03 eta 1:05:53
epoch [27/50] batch [320/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.2779 (1.1733) lr 1.2487e-03 eta 1:05:44
epoch [27/50] batch [340/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.5886 (1.1752) lr 1.2487e-03 eta 1:05:34
epoch [27/50] batch [360/392] time 0.436 (0.434) data 0.000 (0.001) loss 1.0011 (1.1755) lr 1.2487e-03 eta 1:05:26
epoch [27/50] batch [380/392] time 0.431 (0.434) data 0.000 (0.001) loss 2.3402 (1.1761) lr 1.2487e-03 eta 1:05:16
epoch [28/50] batch [20/392] time 0.433 (0.455) data 0.000 (0.021) loss 0.1568 (0.8742) lr 1.1874e-03 eta 1:08:15
epoch [28/50] batch [40/392] time 0.437 (0.444) data 0.000 (0.011) loss 1.3346 (1.1459) lr 1.1874e-03 eta 1:06:28
epoch [28/50] batch [60/392] time 0.433 (0.440) data 0.000 (0.007) loss 0.4733 (1.0736) lr 1.1874e-03 eta 1:05:43
epoch [28/50] batch [80/392] time 0.426 (0.438) data 0.000 (0.005) loss 0.7919 (1.1081) lr 1.1874e-03 eta 1:05:14
epoch [28/50] batch [100/392] time 0.437 (0.437) data 0.000 (0.004) loss 0.8625 (1.1271) lr 1.1874e-03 eta 1:04:57
epoch [28/50] batch [120/392] time 0.433 (0.437) data 0.000 (0.004) loss 1.1639 (1.1544) lr 1.1874e-03 eta 1:04:43
epoch [28/50] batch [140/392] time 0.423 (0.436) data 0.000 (0.003) loss 1.1569 (1.1493) lr 1.1874e-03 eta 1:04:31
epoch [28/50] batch [160/392] time 0.434 (0.436) data 0.000 (0.003) loss 1.0480 (1.1768) lr 1.1874e-03 eta 1:04:20
epoch [28/50] batch [180/392] time 0.437 (0.436) data 0.000 (0.002) loss 0.6028 (1.1333) lr 1.1874e-03 eta 1:04:08
epoch [28/50] batch [200/392] time 0.428 (0.435) data 0.000 (0.002) loss 2.5463 (1.1411) lr 1.1874e-03 eta 1:03:58
epoch [28/50] batch [220/392] time 0.438 (0.435) data 0.000 (0.002) loss 2.3052 (1.1509) lr 1.1874e-03 eta 1:03:48
epoch [28/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 2.0061 (1.1276) lr 1.1874e-03 eta 1:03:39
epoch [28/50] batch [260/392] time 0.434 (0.435) data 0.000 (0.002) loss 1.0401 (1.1197) lr 1.1874e-03 eta 1:03:28
epoch [28/50] batch [280/392] time 0.429 (0.435) data 0.000 (0.002) loss 0.8030 (1.1211) lr 1.1874e-03 eta 1:03:19
epoch [28/50] batch [300/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.3331 (1.1211) lr 1.1874e-03 eta 1:03:10
epoch [28/50] batch [320/392] time 0.435 (0.435) data 0.000 (0.001) loss 0.9201 (1.1253) lr 1.1874e-03 eta 1:03:00
epoch [28/50] batch [340/392] time 0.428 (0.435) data 0.000 (0.001) loss 0.8618 (1.1242) lr 1.1874e-03 eta 1:02:51
epoch [28/50] batch [360/392] time 0.429 (0.435) data 0.000 (0.001) loss 2.0198 (1.1206) lr 1.1874e-03 eta 1:02:42
epoch [28/50] batch [380/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.3060 (1.1332) lr 1.1874e-03 eta 1:02:33
epoch [29/50] batch [20/392] time 0.432 (0.452) data 0.000 (0.018) loss 1.1285 (1.0977) lr 1.1253e-03 eta 1:04:51
epoch [29/50] batch [40/392] time 0.438 (0.443) data 0.000 (0.009) loss 2.4160 (1.1680) lr 1.1253e-03 eta 1:03:23
epoch [29/50] batch [60/392] time 0.437 (0.440) data 0.000 (0.006) loss 0.9908 (1.0946) lr 1.1253e-03 eta 1:02:49
epoch [29/50] batch [80/392] time 0.427 (0.439) data 0.000 (0.005) loss 0.8585 (1.0979) lr 1.1253e-03 eta 1:02:31
epoch [29/50] batch [100/392] time 0.438 (0.438) data 0.000 (0.004) loss 1.0144 (1.0790) lr 1.1253e-03 eta 1:02:11
epoch [29/50] batch [120/392] time 0.424 (0.437) data 0.000 (0.003) loss 0.4849 (1.0723) lr 1.1253e-03 eta 1:01:55
epoch [29/50] batch [140/392] time 0.428 (0.436) data 0.000 (0.003) loss 1.0553 (1.0942) lr 1.1253e-03 eta 1:01:43
epoch [29/50] batch [160/392] time 0.435 (0.436) data 0.000 (0.002) loss 1.8244 (1.1161) lr 1.1253e-03 eta 1:01:31
epoch [29/50] batch [180/392] time 0.429 (0.436) data 0.000 (0.002) loss 1.3046 (1.1163) lr 1.1253e-03 eta 1:01:19
epoch [29/50] batch [200/392] time 0.438 (0.436) data 0.000 (0.002) loss 1.0614 (1.1175) lr 1.1253e-03 eta 1:01:08
epoch [29/50] batch [220/392] time 0.433 (0.435) data 0.000 (0.002) loss 1.8708 (1.1152) lr 1.1253e-03 eta 1:00:59
epoch [29/50] batch [240/392] time 0.426 (0.435) data 0.000 (0.002) loss 1.8322 (1.1169) lr 1.1253e-03 eta 1:00:48
epoch [29/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.8044 (1.1078) lr 1.1253e-03 eta 1:00:39
epoch [29/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.1748 (1.1123) lr 1.1253e-03 eta 1:00:29
epoch [29/50] batch [300/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.0845 (1.1100) lr 1.1253e-03 eta 1:00:20
epoch [29/50] batch [320/392] time 0.427 (0.435) data 0.000 (0.001) loss 0.8164 (1.0996) lr 1.1253e-03 eta 1:00:09
epoch [29/50] batch [340/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.2446 (1.1122) lr 1.1253e-03 eta 1:00:01
epoch [29/50] batch [360/392] time 0.427 (0.435) data 0.000 (0.001) loss 0.3764 (1.1221) lr 1.1253e-03 eta 0:59:51
epoch [29/50] batch [380/392] time 0.428 (0.435) data 0.000 (0.001) loss 2.5822 (1.1222) lr 1.1253e-03 eta 0:59:42
epoch [30/50] batch [20/392] time 0.438 (0.451) data 0.000 (0.018) loss 1.4186 (1.1390) lr 1.0628e-03 eta 1:01:44
epoch [30/50] batch [40/392] time 0.440 (0.443) data 0.000 (0.009) loss 0.7682 (1.2491) lr 1.0628e-03 eta 1:00:25
epoch [30/50] batch [60/392] time 0.437 (0.439) data 0.000 (0.006) loss 2.0864 (1.2468) lr 1.0628e-03 eta 0:59:50
epoch [30/50] batch [80/392] time 0.437 (0.438) data 0.000 (0.005) loss 0.5929 (1.2541) lr 1.0628e-03 eta 0:59:29
epoch [30/50] batch [100/392] time 0.437 (0.437) data 0.000 (0.004) loss 0.9039 (1.2798) lr 1.0628e-03 eta 0:59:14
epoch [30/50] batch [120/392] time 0.428 (0.436) data 0.000 (0.003) loss 0.3295 (1.3067) lr 1.0628e-03 eta 0:59:00
epoch [30/50] batch [140/392] time 0.438 (0.436) data 0.000 (0.003) loss 1.8988 (1.2916) lr 1.0628e-03 eta 0:58:48
epoch [30/50] batch [160/392] time 0.427 (0.436) data 0.000 (0.002) loss 2.0005 (1.2646) lr 1.0628e-03 eta 0:58:37
epoch [30/50] batch [180/392] time 0.427 (0.436) data 0.000 (0.002) loss 0.6775 (1.2433) lr 1.0628e-03 eta 0:58:26
epoch [30/50] batch [200/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.4190 (1.2372) lr 1.0628e-03 eta 0:58:17
epoch [30/50] batch [220/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.5098 (1.2087) lr 1.0628e-03 eta 0:58:08
epoch [30/50] batch [240/392] time 0.423 (0.435) data 0.000 (0.002) loss 1.1331 (1.2088) lr 1.0628e-03 eta 0:57:58
epoch [30/50] batch [260/392] time 0.429 (0.435) data 0.000 (0.002) loss 0.7834 (1.1941) lr 1.0628e-03 eta 0:57:48
epoch [30/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.5874 (1.1772) lr 1.0628e-03 eta 0:57:38
epoch [30/50] batch [300/392] time 0.432 (0.435) data 0.000 (0.001) loss 1.3134 (1.1686) lr 1.0628e-03 eta 0:57:29
epoch [30/50] batch [320/392] time 0.434 (0.435) data 0.000 (0.001) loss 0.3590 (1.1593) lr 1.0628e-03 eta 0:57:19
epoch [30/50] batch [340/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.2866 (1.1631) lr 1.0628e-03 eta 0:57:10
epoch [30/50] batch [360/392] time 0.436 (0.435) data 0.000 (0.001) loss 0.4723 (1.1639) lr 1.0628e-03 eta 0:57:01
epoch [30/50] batch [380/392] time 0.430 (0.435) data 0.000 (0.001) loss 1.4638 (1.1626) lr 1.0628e-03 eta 0:56:51
epoch [31/50] batch [20/392] time 0.438 (0.451) data 0.000 (0.018) loss 0.4635 (1.4048) lr 1.0000e-03 eta 0:58:47
epoch [31/50] batch [40/392] time 0.437 (0.442) data 0.000 (0.009) loss 0.5350 (1.1512) lr 1.0000e-03 eta 0:57:27
epoch [31/50] batch [60/392] time 0.437 (0.439) data 0.000 (0.006) loss 0.7013 (1.1386) lr 1.0000e-03 eta 0:56:55
epoch [31/50] batch [80/392] time 0.438 (0.438) data 0.000 (0.005) loss 0.3401 (1.1951) lr 1.0000e-03 eta 0:56:37
epoch [31/50] batch [100/392] time 0.427 (0.437) data 0.000 (0.004) loss 1.2655 (1.1826) lr 1.0000e-03 eta 0:56:21
epoch [31/50] batch [120/392] time 0.432 (0.436) data 0.000 (0.003) loss 2.1692 (1.1777) lr 1.0000e-03 eta 0:56:06
epoch [31/50] batch [140/392] time 0.436 (0.436) data 0.000 (0.003) loss 0.4037 (1.1469) lr 1.0000e-03 eta 0:55:53
epoch [31/50] batch [160/392] time 0.432 (0.435) data 0.000 (0.002) loss 0.7101 (1.1450) lr 1.0000e-03 eta 0:55:42
epoch [31/50] batch [180/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.8454 (1.1475) lr 1.0000e-03 eta 0:55:30
epoch [31/50] batch [200/392] time 0.432 (0.435) data 0.000 (0.002) loss 0.3619 (1.1311) lr 1.0000e-03 eta 0:55:21
epoch [31/50] batch [220/392] time 0.433 (0.435) data 0.000 (0.002) loss 0.8117 (1.1358) lr 1.0000e-03 eta 0:55:12
epoch [31/50] batch [240/392] time 0.426 (0.434) data 0.000 (0.002) loss 0.1451 (1.1105) lr 1.0000e-03 eta 0:55:01
epoch [31/50] batch [260/392] time 0.436 (0.434) data 0.000 (0.002) loss 0.2204 (1.1093) lr 1.0000e-03 eta 0:54:52
epoch [31/50] batch [280/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.5825 (1.1372) lr 1.0000e-03 eta 0:54:41
epoch [31/50] batch [300/392] time 0.433 (0.434) data 0.000 (0.001) loss 0.1701 (1.1222) lr 1.0000e-03 eta 0:54:32
epoch [31/50] batch [320/392] time 0.432 (0.434) data 0.000 (0.001) loss 1.2255 (1.1117) lr 1.0000e-03 eta 0:54:23
epoch [31/50] batch [340/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.0491 (1.1153) lr 1.0000e-03 eta 0:54:14
epoch [31/50] batch [360/392] time 0.435 (0.434) data 0.000 (0.001) loss 0.5646 (1.1149) lr 1.0000e-03 eta 0:54:05
epoch [31/50] batch [380/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.7676 (1.1122) lr 1.0000e-03 eta 0:53:56
epoch [32/50] batch [20/392] time 0.421 (0.451) data 0.000 (0.017) loss 1.9380 (1.1568) lr 9.3721e-04 eta 0:55:46
epoch [32/50] batch [40/392] time 0.426 (0.442) data 0.000 (0.009) loss 1.4220 (1.0720) lr 9.3721e-04 eta 0:54:32
epoch [32/50] batch [60/392] time 0.433 (0.439) data 0.000 (0.006) loss 1.1001 (1.0515) lr 9.3721e-04 eta 0:54:00
epoch [32/50] batch [80/392] time 0.427 (0.437) data 0.000 (0.004) loss 2.6602 (1.1065) lr 9.3721e-04 eta 0:53:39
epoch [32/50] batch [100/392] time 0.437 (0.436) data 0.000 (0.004) loss 0.6901 (1.1106) lr 9.3721e-04 eta 0:53:24
epoch [32/50] batch [120/392] time 0.433 (0.436) data 0.000 (0.003) loss 0.6073 (1.0949) lr 9.3721e-04 eta 0:53:11
epoch [32/50] batch [140/392] time 0.438 (0.435) data 0.000 (0.003) loss 1.3734 (1.0934) lr 9.3721e-04 eta 0:53:01
epoch [32/50] batch [160/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.6516 (1.0909) lr 9.3721e-04 eta 0:52:51
epoch [32/50] batch [180/392] time 0.433 (0.435) data 0.000 (0.002) loss 1.7837 (1.1013) lr 9.3721e-04 eta 0:52:42
epoch [32/50] batch [200/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.5125 (1.1109) lr 9.3721e-04 eta 0:52:33
epoch [32/50] batch [220/392] time 0.427 (0.435) data 0.000 (0.002) loss 2.3118 (1.1192) lr 9.3721e-04 eta 0:52:23
epoch [32/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.5271 (1.1146) lr 9.3721e-04 eta 0:52:13
epoch [32/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.001) loss 2.0822 (1.1187) lr 9.3721e-04 eta 0:52:04
epoch [32/50] batch [280/392] time 0.428 (0.435) data 0.000 (0.001) loss 3.0996 (1.1362) lr 9.3721e-04 eta 0:51:55
epoch [32/50] batch [300/392] time 0.434 (0.435) data 0.000 (0.001) loss 1.3598 (1.1323) lr 9.3721e-04 eta 0:51:45
epoch [32/50] batch [320/392] time 0.438 (0.434) data 0.000 (0.001) loss 0.8609 (1.1404) lr 9.3721e-04 eta 0:51:36
epoch [32/50] batch [340/392] time 0.432 (0.435) data 0.000 (0.001) loss 1.5501 (1.1419) lr 9.3721e-04 eta 0:51:28
epoch [32/50] batch [360/392] time 0.424 (0.434) data 0.000 (0.001) loss 0.2771 (1.1325) lr 9.3721e-04 eta 0:51:18
epoch [32/50] batch [380/392] time 0.425 (0.434) data 0.000 (0.001) loss 2.4152 (1.1378) lr 9.3721e-04 eta 0:51:09
epoch [33/50] batch [20/392] time 0.426 (0.449) data 0.000 (0.018) loss 2.0379 (1.0197) lr 8.7467e-04 eta 0:52:42
epoch [33/50] batch [40/392] time 0.437 (0.442) data 0.000 (0.009) loss 1.8561 (1.0797) lr 8.7467e-04 eta 0:51:39
epoch [33/50] batch [60/392] time 0.437 (0.439) data 0.000 (0.006) loss 1.8548 (1.0820) lr 8.7467e-04 eta 0:51:11
epoch [33/50] batch [80/392] time 0.437 (0.438) data 0.000 (0.005) loss 0.4857 (1.0373) lr 8.7467e-04 eta 0:50:53
epoch [33/50] batch [100/392] time 0.427 (0.437) data 0.000 (0.004) loss 0.3887 (1.0018) lr 8.7467e-04 eta 0:50:38
epoch [33/50] batch [120/392] time 0.432 (0.436) data 0.000 (0.003) loss 1.9133 (1.0450) lr 8.7467e-04 eta 0:50:24
epoch [33/50] batch [140/392] time 0.436 (0.436) data 0.000 (0.003) loss 0.8547 (1.0319) lr 8.7467e-04 eta 0:50:12
epoch [33/50] batch [160/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.2183 (1.0326) lr 8.7467e-04 eta 0:50:00
epoch [33/50] batch [180/392] time 0.436 (0.435) data 0.000 (0.002) loss 2.0077 (1.0664) lr 8.7467e-04 eta 0:49:50
epoch [33/50] batch [200/392] time 0.427 (0.435) data 0.000 (0.002) loss 0.3260 (1.0790) lr 8.7467e-04 eta 0:49:40
epoch [33/50] batch [220/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.4075 (1.0741) lr 8.7467e-04 eta 0:49:31
epoch [33/50] batch [240/392] time 0.434 (0.435) data 0.000 (0.002) loss 0.5132 (1.0793) lr 8.7467e-04 eta 0:49:22
epoch [33/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.4409 (1.0786) lr 8.7467e-04 eta 0:49:12
epoch [33/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.2375 (1.0679) lr 8.7467e-04 eta 0:49:04
epoch [33/50] batch [300/392] time 0.434 (0.434) data 0.000 (0.001) loss 1.0079 (1.0878) lr 8.7467e-04 eta 0:48:55
epoch [33/50] batch [320/392] time 0.433 (0.434) data 0.000 (0.001) loss 2.1669 (1.0905) lr 8.7467e-04 eta 0:48:45
epoch [33/50] batch [340/392] time 0.438 (0.434) data 0.000 (0.001) loss 2.0053 (1.1032) lr 8.7467e-04 eta 0:48:37
epoch [33/50] batch [360/392] time 0.435 (0.434) data 0.000 (0.001) loss 0.6177 (1.0983) lr 8.7467e-04 eta 0:48:27
epoch [33/50] batch [380/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.8251 (1.1012) lr 8.7467e-04 eta 0:48:19
epoch [34/50] batch [20/392] time 0.428 (0.452) data 0.000 (0.017) loss 0.5677 (1.1729) lr 8.1262e-04 eta 0:50:04
epoch [34/50] batch [40/392] time 0.438 (0.443) data 0.000 (0.009) loss 1.5216 (1.1249) lr 8.1262e-04 eta 0:48:53
epoch [34/50] batch [60/392] time 0.439 (0.439) data 0.000 (0.006) loss 0.7647 (1.0900) lr 8.1262e-04 eta 0:48:22
epoch [34/50] batch [80/392] time 0.438 (0.438) data 0.000 (0.004) loss 2.2994 (1.1318) lr 8.1262e-04 eta 0:48:06
epoch [34/50] batch [100/392] time 0.434 (0.438) data 0.000 (0.004) loss 1.5371 (1.1919) lr 8.1262e-04 eta 0:47:51
epoch [34/50] batch [120/392] time 0.437 (0.437) data 0.000 (0.003) loss 1.8513 (1.2007) lr 8.1262e-04 eta 0:47:39
epoch [34/50] batch [140/392] time 0.429 (0.437) data 0.000 (0.003) loss 0.6705 (1.1779) lr 8.1262e-04 eta 0:47:27
epoch [34/50] batch [160/392] time 0.428 (0.436) data 0.000 (0.002) loss 1.8964 (1.1709) lr 8.1262e-04 eta 0:47:17
epoch [34/50] batch [180/392] time 0.428 (0.436) data 0.000 (0.002) loss 2.3077 (1.1628) lr 8.1262e-04 eta 0:47:06
epoch [34/50] batch [200/392] time 0.437 (0.436) data 0.000 (0.002) loss 0.9052 (1.1604) lr 8.1262e-04 eta 0:46:56
epoch [34/50] batch [220/392] time 0.429 (0.436) data 0.000 (0.002) loss 0.3339 (1.1440) lr 8.1262e-04 eta 0:46:46
epoch [34/50] batch [240/392] time 0.429 (0.435) data 0.000 (0.002) loss 1.0072 (1.1398) lr 8.1262e-04 eta 0:46:36
epoch [34/50] batch [260/392] time 0.429 (0.435) data 0.000 (0.001) loss 0.4565 (1.1298) lr 8.1262e-04 eta 0:46:26
epoch [34/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 2.4745 (1.1262) lr 8.1262e-04 eta 0:46:17
epoch [34/50] batch [300/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.9919 (1.1222) lr 8.1262e-04 eta 0:46:09
epoch [34/50] batch [320/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.7075 (1.1002) lr 8.1262e-04 eta 0:45:59
epoch [34/50] batch [340/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.8321 (1.1073) lr 8.1262e-04 eta 0:45:50
epoch [34/50] batch [360/392] time 0.436 (0.435) data 0.000 (0.001) loss 1.0542 (1.0993) lr 8.1262e-04 eta 0:45:40
epoch [34/50] batch [380/392] time 0.436 (0.435) data 0.000 (0.001) loss 1.0735 (1.1091) lr 8.1262e-04 eta 0:45:31
epoch [35/50] batch [20/392] time 0.432 (0.450) data 0.000 (0.017) loss 3.0032 (0.9650) lr 7.5131e-04 eta 0:46:55
epoch [35/50] batch [40/392] time 0.433 (0.442) data 0.000 (0.009) loss 0.1962 (0.9684) lr 7.5131e-04 eta 0:45:52
epoch [35/50] batch [60/392] time 0.437 (0.439) data 0.000 (0.006) loss 1.4503 (1.0378) lr 7.5131e-04 eta 0:45:24
epoch [35/50] batch [80/392] time 0.437 (0.438) data 0.000 (0.004) loss 0.2859 (1.1537) lr 7.5131e-04 eta 0:45:09
epoch [35/50] batch [100/392] time 0.437 (0.437) data 0.000 (0.004) loss 1.1627 (1.1362) lr 7.5131e-04 eta 0:44:56
epoch [35/50] batch [120/392] time 0.438 (0.436) data 0.000 (0.003) loss 2.2256 (1.1495) lr 7.5131e-04 eta 0:44:44
epoch [35/50] batch [140/392] time 0.427 (0.436) data 0.000 (0.003) loss 0.8282 (1.1148) lr 7.5131e-04 eta 0:44:33
epoch [35/50] batch [160/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.7009 (1.1413) lr 7.5131e-04 eta 0:44:23
epoch [35/50] batch [180/392] time 0.436 (0.436) data 0.000 (0.002) loss 1.4791 (1.1435) lr 7.5131e-04 eta 0:44:13
epoch [35/50] batch [200/392] time 0.434 (0.435) data 0.000 (0.002) loss 0.1852 (1.1266) lr 7.5131e-04 eta 0:44:03
epoch [35/50] batch [220/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.7429 (1.1254) lr 7.5131e-04 eta 0:43:53
epoch [35/50] batch [240/392] time 0.436 (0.435) data 0.000 (0.002) loss 0.5992 (1.1413) lr 7.5131e-04 eta 0:43:43
epoch [35/50] batch [260/392] time 0.436 (0.435) data 0.000 (0.001) loss 0.9764 (1.1343) lr 7.5131e-04 eta 0:43:34
epoch [35/50] batch [280/392] time 0.435 (0.435) data 0.000 (0.001) loss 1.7413 (1.1434) lr 7.5131e-04 eta 0:43:25
epoch [35/50] batch [300/392] time 0.436 (0.435) data 0.000 (0.001) loss 0.3211 (1.1297) lr 7.5131e-04 eta 0:43:16
epoch [35/50] batch [320/392] time 0.428 (0.435) data 0.000 (0.001) loss 1.0218 (1.1451) lr 7.5131e-04 eta 0:43:06
epoch [35/50] batch [340/392] time 0.432 (0.435) data 0.000 (0.001) loss 0.9350 (1.1561) lr 7.5131e-04 eta 0:42:57
epoch [35/50] batch [360/392] time 0.429 (0.434) data 0.000 (0.001) loss 0.8875 (1.1563) lr 7.5131e-04 eta 0:42:48
epoch [35/50] batch [380/392] time 0.432 (0.434) data 0.000 (0.001) loss 0.2398 (1.1585) lr 7.5131e-04 eta 0:42:39
epoch [36/50] batch [20/392] time 0.434 (0.451) data 0.000 (0.018) loss 1.3612 (1.1500) lr 6.9098e-04 eta 0:44:01
epoch [36/50] batch [40/392] time 0.437 (0.443) data 0.000 (0.009) loss 1.1164 (1.2325) lr 6.9098e-04 eta 0:43:04
epoch [36/50] batch [60/392] time 0.437 (0.439) data 0.000 (0.006) loss 1.4547 (1.2061) lr 6.9098e-04 eta 0:42:36
epoch [36/50] batch [80/392] time 0.437 (0.438) data 0.000 (0.005) loss 0.1304 (1.1998) lr 6.9098e-04 eta 0:42:21
epoch [36/50] batch [100/392] time 0.439 (0.437) data 0.000 (0.004) loss 1.1797 (1.1737) lr 6.9098e-04 eta 0:42:08
epoch [36/50] batch [120/392] time 0.436 (0.437) data 0.000 (0.003) loss 1.1850 (1.1573) lr 6.9098e-04 eta 0:41:57
epoch [36/50] batch [140/392] time 0.438 (0.437) data 0.000 (0.003) loss 1.6868 (1.1406) lr 6.9098e-04 eta 0:41:46
epoch [36/50] batch [160/392] time 0.436 (0.436) data 0.000 (0.002) loss 1.2742 (1.1455) lr 6.9098e-04 eta 0:41:35
epoch [36/50] batch [180/392] time 0.437 (0.436) data 0.000 (0.002) loss 1.6566 (1.1729) lr 6.9098e-04 eta 0:41:25
epoch [36/50] batch [200/392] time 0.439 (0.436) data 0.000 (0.002) loss 1.0863 (1.1592) lr 6.9098e-04 eta 0:41:14
epoch [36/50] batch [220/392] time 0.424 (0.435) data 0.000 (0.002) loss 0.1021 (1.1591) lr 6.9098e-04 eta 0:41:04
epoch [36/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.9972 (1.1686) lr 6.9098e-04 eta 0:40:55
epoch [36/50] batch [260/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.9987 (1.1870) lr 6.9098e-04 eta 0:40:45
epoch [36/50] batch [280/392] time 0.437 (0.435) data 0.000 (0.001) loss 1.5505 (1.2073) lr 6.9098e-04 eta 0:40:36
epoch [36/50] batch [300/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.6019 (1.2068) lr 6.9098e-04 eta 0:40:26
epoch [36/50] batch [320/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.9570 (1.1916) lr 6.9098e-04 eta 0:40:17
epoch [36/50] batch [340/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.7042 (1.1814) lr 6.9098e-04 eta 0:40:08
epoch [36/50] batch [360/392] time 0.432 (0.435) data 0.000 (0.001) loss 0.4452 (1.1767) lr 6.9098e-04 eta 0:39:59
epoch [36/50] batch [380/392] time 0.426 (0.435) data 0.000 (0.001) loss 0.7706 (1.1707) lr 6.9098e-04 eta 0:39:49
epoch [37/50] batch [20/392] time 0.427 (0.451) data 0.000 (0.018) loss 0.4900 (1.1034) lr 6.3188e-04 eta 0:41:05
epoch [37/50] batch [40/392] time 0.427 (0.441) data 0.000 (0.009) loss 1.6521 (1.0972) lr 6.3188e-04 eta 0:40:05
epoch [37/50] batch [60/392] time 0.433 (0.439) data 0.000 (0.006) loss 0.1789 (1.0843) lr 6.3188e-04 eta 0:39:40
epoch [37/50] batch [80/392] time 0.432 (0.437) data 0.000 (0.005) loss 0.9817 (1.0715) lr 6.3188e-04 eta 0:39:23
epoch [37/50] batch [100/392] time 0.438 (0.436) data 0.000 (0.004) loss 2.6128 (1.0710) lr 6.3188e-04 eta 0:39:10
epoch [37/50] batch [120/392] time 0.437 (0.436) data 0.000 (0.003) loss 1.6129 (1.1083) lr 6.3188e-04 eta 0:38:59
epoch [37/50] batch [140/392] time 0.432 (0.435) data 0.000 (0.003) loss 0.9092 (1.0861) lr 6.3188e-04 eta 0:38:48
epoch [37/50] batch [160/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.5228 (1.0854) lr 6.3188e-04 eta 0:38:37
epoch [37/50] batch [180/392] time 0.438 (0.435) data 0.000 (0.002) loss 2.0467 (1.1180) lr 6.3188e-04 eta 0:38:27
epoch [37/50] batch [200/392] time 0.432 (0.435) data 0.000 (0.002) loss 1.9656 (1.1449) lr 6.3188e-04 eta 0:38:18
epoch [37/50] batch [220/392] time 0.439 (0.435) data 0.000 (0.002) loss 1.9876 (1.1403) lr 6.3188e-04 eta 0:38:08
epoch [37/50] batch [240/392] time 0.438 (0.434) data 0.000 (0.002) loss 1.0436 (1.1571) lr 6.3188e-04 eta 0:38:00
epoch [37/50] batch [260/392] time 0.438 (0.434) data 0.000 (0.002) loss 0.1912 (1.1486) lr 6.3188e-04 eta 0:37:51
epoch [37/50] batch [280/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.2348 (1.1113) lr 6.3188e-04 eta 0:37:42
epoch [37/50] batch [300/392] time 0.438 (0.434) data 0.000 (0.001) loss 0.9934 (1.0996) lr 6.3188e-04 eta 0:37:33
epoch [37/50] batch [320/392] time 0.425 (0.434) data 0.000 (0.001) loss 1.6998 (1.0994) lr 6.3188e-04 eta 0:37:24
epoch [37/50] batch [340/392] time 0.428 (0.434) data 0.000 (0.001) loss 1.0426 (1.1058) lr 6.3188e-04 eta 0:37:15
epoch [37/50] batch [360/392] time 0.439 (0.434) data 0.000 (0.001) loss 0.9597 (1.0961) lr 6.3188e-04 eta 0:37:07
epoch [37/50] batch [380/392] time 0.433 (0.434) data 0.000 (0.001) loss 0.4215 (1.1020) lr 6.3188e-04 eta 0:36:58
epoch [38/50] batch [20/392] time 0.428 (0.452) data 0.000 (0.019) loss 1.3046 (1.0015) lr 5.7422e-04 eta 0:38:13
epoch [38/50] batch [40/392] time 0.435 (0.443) data 0.000 (0.009) loss 0.7731 (0.9759) lr 5.7422e-04 eta 0:37:17
epoch [38/50] batch [60/392] time 0.424 (0.439) data 0.000 (0.006) loss 0.8819 (1.1187) lr 5.7422e-04 eta 0:36:52
epoch [38/50] batch [80/392] time 0.434 (0.438) data 0.000 (0.005) loss 2.2570 (1.1366) lr 5.7422e-04 eta 0:36:36
epoch [38/50] batch [100/392] time 0.434 (0.437) data 0.000 (0.004) loss 0.9427 (1.1293) lr 5.7422e-04 eta 0:36:23
epoch [38/50] batch [120/392] time 0.437 (0.437) data 0.000 (0.003) loss 1.6610 (1.1252) lr 5.7422e-04 eta 0:36:13
epoch [38/50] batch [140/392] time 0.433 (0.436) data 0.000 (0.003) loss 0.8146 (1.1015) lr 5.7422e-04 eta 0:36:02
epoch [38/50] batch [160/392] time 0.437 (0.436) data 0.000 (0.002) loss 1.3496 (1.0964) lr 5.7422e-04 eta 0:35:51
epoch [38/50] batch [180/392] time 0.439 (0.436) data 0.000 (0.002) loss 0.8924 (1.0709) lr 5.7422e-04 eta 0:35:41
epoch [38/50] batch [200/392] time 0.429 (0.436) data 0.000 (0.002) loss 1.6373 (1.0750) lr 5.7422e-04 eta 0:35:32
epoch [38/50] batch [220/392] time 0.440 (0.435) data 0.000 (0.002) loss 0.8298 (1.0584) lr 5.7422e-04 eta 0:35:23
epoch [38/50] batch [240/392] time 0.429 (0.435) data 0.000 (0.002) loss 1.5361 (1.0709) lr 5.7422e-04 eta 0:35:13
epoch [38/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.3537 (1.0815) lr 5.7422e-04 eta 0:35:04
epoch [38/50] batch [280/392] time 0.429 (0.435) data 0.000 (0.001) loss 0.5791 (1.0828) lr 5.7422e-04 eta 0:34:55
epoch [38/50] batch [300/392] time 0.427 (0.435) data 0.000 (0.001) loss 1.6503 (1.0838) lr 5.7422e-04 eta 0:34:45
epoch [38/50] batch [320/392] time 0.434 (0.435) data 0.000 (0.001) loss 0.3819 (1.0777) lr 5.7422e-04 eta 0:34:36
epoch [38/50] batch [340/392] time 0.434 (0.435) data 0.000 (0.001) loss 0.9973 (1.0695) lr 5.7422e-04 eta 0:34:27
epoch [38/50] batch [360/392] time 0.429 (0.435) data 0.000 (0.001) loss 2.8931 (1.0984) lr 5.7422e-04 eta 0:34:18
epoch [38/50] batch [380/392] time 0.427 (0.435) data 0.000 (0.001) loss 0.3743 (1.0999) lr 5.7422e-04 eta 0:34:09
epoch [39/50] batch [20/392] time 0.434 (0.454) data 0.000 (0.019) loss 1.2568 (1.1581) lr 5.1825e-04 eta 0:35:25
epoch [39/50] batch [40/392] time 0.438 (0.444) data 0.000 (0.010) loss 0.3507 (1.2263) lr 5.1825e-04 eta 0:34:32
epoch [39/50] batch [60/392] time 0.438 (0.441) data 0.000 (0.007) loss 2.5301 (1.1897) lr 5.1825e-04 eta 0:34:09
epoch [39/50] batch [80/392] time 0.429 (0.439) data 0.000 (0.005) loss 0.9868 (1.2128) lr 5.1825e-04 eta 0:33:51
epoch [39/50] batch [100/392] time 0.429 (0.438) data 0.000 (0.004) loss 2.1782 (1.2024) lr 5.1825e-04 eta 0:33:38
epoch [39/50] batch [120/392] time 0.428 (0.438) data 0.000 (0.003) loss 0.4249 (1.1954) lr 5.1825e-04 eta 0:33:26
epoch [39/50] batch [140/392] time 0.428 (0.437) data 0.000 (0.003) loss 0.6362 (1.1785) lr 5.1825e-04 eta 0:33:15
epoch [39/50] batch [160/392] time 0.438 (0.437) data 0.000 (0.003) loss 2.1488 (1.1860) lr 5.1825e-04 eta 0:33:05
epoch [39/50] batch [180/392] time 0.439 (0.437) data 0.000 (0.002) loss 0.6816 (1.1735) lr 5.1825e-04 eta 0:32:55
epoch [39/50] batch [200/392] time 0.427 (0.436) data 0.000 (0.002) loss 0.5697 (1.1656) lr 5.1825e-04 eta 0:32:45
epoch [39/50] batch [220/392] time 0.438 (0.436) data 0.000 (0.002) loss 1.3495 (1.1614) lr 5.1825e-04 eta 0:32:35
epoch [39/50] batch [240/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.5843 (1.1616) lr 5.1825e-04 eta 0:32:26
epoch [39/50] batch [260/392] time 0.429 (0.436) data 0.000 (0.002) loss 0.2900 (1.1548) lr 5.1825e-04 eta 0:32:17
epoch [39/50] batch [280/392] time 0.427 (0.436) data 0.000 (0.002) loss 0.4231 (1.1469) lr 5.1825e-04 eta 0:32:08
epoch [39/50] batch [300/392] time 0.428 (0.436) data 0.000 (0.001) loss 0.8553 (1.1449) lr 5.1825e-04 eta 0:31:58
epoch [39/50] batch [320/392] time 0.434 (0.436) data 0.000 (0.001) loss 0.9967 (1.1298) lr 5.1825e-04 eta 0:31:49
epoch [39/50] batch [340/392] time 0.433 (0.436) data 0.000 (0.001) loss 1.5976 (1.1252) lr 5.1825e-04 eta 0:31:40
epoch [39/50] batch [360/392] time 0.437 (0.435) data 0.000 (0.001) loss 1.4996 (1.1405) lr 5.1825e-04 eta 0:31:31
epoch [39/50] batch [380/392] time 0.426 (0.435) data 0.000 (0.001) loss 0.9020 (1.1474) lr 5.1825e-04 eta 0:31:22
epoch [40/50] batch [20/392] time 0.427 (0.450) data 0.000 (0.017) loss 2.5228 (0.9907) lr 4.6417e-04 eta 0:32:11
epoch [40/50] batch [40/392] time 0.438 (0.442) data 0.000 (0.009) loss 0.1697 (0.9617) lr 4.6417e-04 eta 0:31:27
epoch [40/50] batch [60/392] time 0.438 (0.439) data 0.000 (0.006) loss 0.8301 (0.9833) lr 4.6417e-04 eta 0:31:06
epoch [40/50] batch [80/392] time 0.427 (0.437) data 0.000 (0.004) loss 1.3357 (0.9990) lr 4.6417e-04 eta 0:30:50
epoch [40/50] batch [100/392] time 0.437 (0.436) data 0.000 (0.004) loss 1.2413 (1.0322) lr 4.6417e-04 eta 0:30:37
epoch [40/50] batch [120/392] time 0.437 (0.436) data 0.000 (0.003) loss 0.9777 (1.0206) lr 4.6417e-04 eta 0:30:26
epoch [40/50] batch [140/392] time 0.431 (0.435) data 0.000 (0.003) loss 0.4364 (1.0187) lr 4.6417e-04 eta 0:30:16
epoch [40/50] batch [160/392] time 0.426 (0.435) data 0.000 (0.002) loss 0.6058 (1.0555) lr 4.6417e-04 eta 0:30:06
epoch [40/50] batch [180/392] time 0.433 (0.435) data 0.000 (0.002) loss 0.7958 (1.0397) lr 4.6417e-04 eta 0:29:56
epoch [40/50] batch [200/392] time 0.432 (0.435) data 0.000 (0.002) loss 0.0969 (1.0492) lr 4.6417e-04 eta 0:29:47
epoch [40/50] batch [220/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.4524 (1.0398) lr 4.6417e-04 eta 0:29:38
epoch [40/50] batch [240/392] time 0.422 (0.435) data 0.000 (0.002) loss 2.2879 (1.0590) lr 4.6417e-04 eta 0:29:29
epoch [40/50] batch [260/392] time 0.433 (0.434) data 0.000 (0.002) loss 1.2679 (1.0645) lr 4.6417e-04 eta 0:29:20
epoch [40/50] batch [280/392] time 0.426 (0.434) data 0.000 (0.001) loss 0.4047 (1.0717) lr 4.6417e-04 eta 0:29:10
epoch [40/50] batch [300/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.5035 (1.0555) lr 4.6417e-04 eta 0:29:01
epoch [40/50] batch [320/392] time 0.427 (0.434) data 0.000 (0.001) loss 1.0203 (1.0628) lr 4.6417e-04 eta 0:28:52
epoch [40/50] batch [340/392] time 0.432 (0.434) data 0.000 (0.001) loss 0.2966 (1.0651) lr 4.6417e-04 eta 0:28:43
epoch [40/50] batch [360/392] time 0.432 (0.434) data 0.000 (0.001) loss 1.6377 (1.0657) lr 4.6417e-04 eta 0:28:34
epoch [40/50] batch [380/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.8159 (1.0797) lr 4.6417e-04 eta 0:28:25
epoch [41/50] batch [20/392] time 0.436 (0.451) data 0.000 (0.019) loss 0.6391 (1.0784) lr 4.1221e-04 eta 0:29:18
epoch [41/50] batch [40/392] time 0.433 (0.442) data 0.000 (0.009) loss 0.3695 (1.1140) lr 4.1221e-04 eta 0:28:34
epoch [41/50] batch [60/392] time 0.427 (0.439) data 0.000 (0.006) loss 0.5969 (1.0953) lr 4.1221e-04 eta 0:28:12
epoch [41/50] batch [80/392] time 0.438 (0.437) data 0.000 (0.005) loss 2.3109 (1.1790) lr 4.1221e-04 eta 0:27:59
epoch [41/50] batch [100/392] time 0.438 (0.437) data 0.000 (0.004) loss 0.8743 (1.1728) lr 4.1221e-04 eta 0:27:47
epoch [41/50] batch [120/392] time 0.428 (0.436) data 0.000 (0.003) loss 1.3702 (1.1395) lr 4.1221e-04 eta 0:27:37
epoch [41/50] batch [140/392] time 0.438 (0.436) data 0.000 (0.003) loss 0.8192 (1.1272) lr 4.1221e-04 eta 0:27:26
epoch [41/50] batch [160/392] time 0.437 (0.435) data 0.000 (0.002) loss 1.6666 (1.1382) lr 4.1221e-04 eta 0:27:16
epoch [41/50] batch [180/392] time 0.437 (0.435) data 0.000 (0.002) loss 1.2018 (1.1279) lr 4.1221e-04 eta 0:27:07
epoch [41/50] batch [200/392] time 0.430 (0.435) data 0.000 (0.002) loss 0.4957 (1.1080) lr 4.1221e-04 eta 0:26:57
epoch [41/50] batch [220/392] time 0.433 (0.435) data 0.000 (0.002) loss 0.7418 (1.1103) lr 4.1221e-04 eta 0:26:48
epoch [41/50] batch [240/392] time 0.427 (0.435) data 0.000 (0.002) loss 2.0103 (1.1111) lr 4.1221e-04 eta 0:26:40
epoch [41/50] batch [260/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.6832 (1.1205) lr 4.1221e-04 eta 0:26:31
epoch [41/50] batch [280/392] time 0.435 (0.435) data 0.000 (0.001) loss 0.1950 (1.1163) lr 4.1221e-04 eta 0:26:22
epoch [41/50] batch [300/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.4729 (1.1257) lr 4.1221e-04 eta 0:26:13
epoch [41/50] batch [320/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.8845 (1.1157) lr 4.1221e-04 eta 0:26:04
epoch [41/50] batch [340/392] time 0.431 (0.435) data 0.000 (0.001) loss 2.4233 (1.1263) lr 4.1221e-04 eta 0:25:55
epoch [41/50] batch [360/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.1316 (1.1346) lr 4.1221e-04 eta 0:25:46
epoch [41/50] batch [380/392] time 0.436 (0.434) data 0.000 (0.001) loss 0.7157 (1.1309) lr 4.1221e-04 eta 0:25:37
epoch [42/50] batch [20/392] time 0.439 (0.452) data 0.000 (0.017) loss 1.3425 (1.0119) lr 3.6258e-04 eta 0:26:26
epoch [42/50] batch [40/392] time 0.433 (0.443) data 0.000 (0.009) loss 0.7903 (1.0746) lr 3.6258e-04 eta 0:25:45
epoch [42/50] batch [60/392] time 0.438 (0.440) data 0.000 (0.006) loss 2.2889 (1.0905) lr 3.6258e-04 eta 0:25:25
epoch [42/50] batch [80/392] time 0.439 (0.438) data 0.000 (0.004) loss 2.7411 (1.1588) lr 3.6258e-04 eta 0:25:11
epoch [42/50] batch [100/392] time 0.438 (0.437) data 0.000 (0.004) loss 0.4057 (1.1381) lr 3.6258e-04 eta 0:24:59
epoch [42/50] batch [120/392] time 0.437 (0.437) data 0.000 (0.003) loss 0.3643 (1.1190) lr 3.6258e-04 eta 0:24:48
epoch [42/50] batch [140/392] time 0.428 (0.436) data 0.000 (0.003) loss 0.6457 (1.1503) lr 3.6258e-04 eta 0:24:38
epoch [42/50] batch [160/392] time 0.430 (0.436) data 0.000 (0.002) loss 0.9821 (1.1582) lr 3.6258e-04 eta 0:24:28
epoch [42/50] batch [180/392] time 0.429 (0.436) data 0.000 (0.002) loss 0.6900 (1.1416) lr 3.6258e-04 eta 0:24:18
epoch [42/50] batch [200/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.8142 (1.1319) lr 3.6258e-04 eta 0:24:09
epoch [42/50] batch [220/392] time 0.437 (0.435) data 0.000 (0.002) loss 1.8387 (1.1527) lr 3.6258e-04 eta 0:24:00
epoch [42/50] batch [240/392] time 0.429 (0.435) data 0.000 (0.002) loss 1.3995 (1.1550) lr 3.6258e-04 eta 0:23:51
epoch [42/50] batch [260/392] time 0.427 (0.435) data 0.000 (0.001) loss 1.2816 (1.1406) lr 3.6258e-04 eta 0:23:42
epoch [42/50] batch [280/392] time 0.437 (0.435) data 0.000 (0.001) loss 1.5434 (1.1429) lr 3.6258e-04 eta 0:23:33
epoch [42/50] batch [300/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.6561 (1.1396) lr 3.6258e-04 eta 0:23:23
epoch [42/50] batch [320/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.4027 (1.1431) lr 3.6258e-04 eta 0:23:14
epoch [42/50] batch [340/392] time 0.427 (0.435) data 0.000 (0.001) loss 0.9342 (1.1394) lr 3.6258e-04 eta 0:23:06
epoch [42/50] batch [360/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.7171 (1.1392) lr 3.6258e-04 eta 0:22:56
epoch [42/50] batch [380/392] time 0.435 (0.435) data 0.000 (0.001) loss 0.8657 (1.1338) lr 3.6258e-04 eta 0:22:48
epoch [43/50] batch [20/392] time 0.433 (0.451) data 0.000 (0.018) loss 1.8216 (1.2259) lr 3.1545e-04 eta 0:23:25
epoch [43/50] batch [40/392] time 0.438 (0.443) data 0.000 (0.009) loss 0.3033 (1.1692) lr 3.1545e-04 eta 0:22:50
epoch [43/50] batch [60/392] time 0.438 (0.440) data 0.000 (0.006) loss 3.2505 (1.1150) lr 3.1545e-04 eta 0:22:32
epoch [43/50] batch [80/392] time 0.429 (0.438) data 0.000 (0.005) loss 0.9422 (1.1049) lr 3.1545e-04 eta 0:22:17
epoch [43/50] batch [100/392] time 0.433 (0.437) data 0.000 (0.004) loss 0.6604 (1.1102) lr 3.1545e-04 eta 0:22:06
epoch [43/50] batch [120/392] time 0.438 (0.436) data 0.000 (0.003) loss 3.1738 (1.0983) lr 3.1545e-04 eta 0:21:55
epoch [43/50] batch [140/392] time 0.433 (0.436) data 0.000 (0.003) loss 1.9627 (1.1254) lr 3.1545e-04 eta 0:21:45
epoch [43/50] batch [160/392] time 0.433 (0.436) data 0.000 (0.002) loss 1.8272 (1.1260) lr 3.1545e-04 eta 0:21:36
epoch [43/50] batch [180/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.8349 (1.1227) lr 3.1545e-04 eta 0:21:26
epoch [43/50] batch [200/392] time 0.432 (0.435) data 0.000 (0.002) loss 1.8539 (1.1241) lr 3.1545e-04 eta 0:21:17
epoch [43/50] batch [220/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.4298 (1.1362) lr 3.1545e-04 eta 0:21:08
epoch [43/50] batch [240/392] time 0.432 (0.435) data 0.000 (0.002) loss 1.1589 (1.1129) lr 3.1545e-04 eta 0:20:58
epoch [43/50] batch [260/392] time 0.436 (0.435) data 0.000 (0.002) loss 0.5518 (1.0865) lr 3.1545e-04 eta 0:20:49
epoch [43/50] batch [280/392] time 0.436 (0.434) data 0.000 (0.001) loss 1.1323 (1.1075) lr 3.1545e-04 eta 0:20:40
epoch [43/50] batch [300/392] time 0.433 (0.434) data 0.000 (0.001) loss 0.6997 (1.1084) lr 3.1545e-04 eta 0:20:31
epoch [43/50] batch [320/392] time 0.434 (0.434) data 0.000 (0.001) loss 1.0943 (1.1168) lr 3.1545e-04 eta 0:20:23
epoch [43/50] batch [340/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.1677 (1.1293) lr 3.1545e-04 eta 0:20:14
epoch [43/50] batch [360/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.5396 (1.1292) lr 3.1545e-04 eta 0:20:05
epoch [43/50] batch [380/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.7466 (1.1287) lr 3.1545e-04 eta 0:19:56
epoch [44/50] batch [20/392] time 0.439 (0.451) data 0.000 (0.018) loss 0.9115 (1.2467) lr 2.7103e-04 eta 0:20:29
epoch [44/50] batch [40/392] time 0.439 (0.443) data 0.000 (0.009) loss 0.5986 (1.1963) lr 2.7103e-04 eta 0:19:56
epoch [44/50] batch [60/392] time 0.438 (0.440) data 0.000 (0.006) loss 0.1948 (1.2346) lr 2.7103e-04 eta 0:19:41
epoch [44/50] batch [80/392] time 0.435 (0.438) data 0.000 (0.005) loss 2.3199 (1.2529) lr 2.7103e-04 eta 0:19:27
epoch [44/50] batch [100/392] time 0.434 (0.438) data 0.000 (0.004) loss 2.8695 (1.2855) lr 2.7103e-04 eta 0:19:17
epoch [44/50] batch [120/392] time 0.438 (0.437) data 0.000 (0.003) loss 2.4191 (1.2563) lr 2.7103e-04 eta 0:19:06
epoch [44/50] batch [140/392] time 0.433 (0.437) data 0.000 (0.003) loss 1.5459 (1.2613) lr 2.7103e-04 eta 0:18:56
epoch [44/50] batch [160/392] time 0.437 (0.436) data 0.000 (0.002) loss 0.5920 (1.2216) lr 2.7103e-04 eta 0:18:46
epoch [44/50] batch [180/392] time 0.432 (0.436) data 0.000 (0.002) loss 0.8225 (1.2078) lr 2.7103e-04 eta 0:18:36
epoch [44/50] batch [200/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.8596 (1.1893) lr 2.7103e-04 eta 0:18:27
epoch [44/50] batch [220/392] time 0.434 (0.435) data 0.000 (0.002) loss 1.2454 (1.1903) lr 2.7103e-04 eta 0:18:18
epoch [44/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.6163 (1.1861) lr 2.7103e-04 eta 0:18:09
epoch [44/50] batch [260/392] time 0.442 (0.435) data 0.000 (0.002) loss 0.2570 (1.1768) lr 2.7103e-04 eta 0:18:00
epoch [44/50] batch [280/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.9320 (1.1783) lr 2.7103e-04 eta 0:17:51
epoch [44/50] batch [300/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.0503 (1.1971) lr 2.7103e-04 eta 0:17:41
epoch [44/50] batch [320/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.8837 (1.1773) lr 2.7103e-04 eta 0:17:33
epoch [44/50] batch [340/392] time 0.436 (0.434) data 0.000 (0.001) loss 2.0344 (1.1700) lr 2.7103e-04 eta 0:17:24
epoch [44/50] batch [360/392] time 0.429 (0.434) data 0.000 (0.001) loss 1.2478 (1.1632) lr 2.7103e-04 eta 0:17:15
epoch [44/50] batch [380/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.7176 (1.1623) lr 2.7103e-04 eta 0:17:06
epoch [45/50] batch [20/392] time 0.429 (0.452) data 0.000 (0.018) loss 0.9104 (1.1940) lr 2.2949e-04 eta 0:17:33
epoch [45/50] batch [40/392] time 0.434 (0.442) data 0.000 (0.009) loss 0.4734 (1.1729) lr 2.2949e-04 eta 0:17:02
epoch [45/50] batch [60/392] time 0.434 (0.440) data 0.000 (0.006) loss 1.5563 (1.1689) lr 2.2949e-04 eta 0:16:47
epoch [45/50] batch [80/392] time 0.429 (0.438) data 0.000 (0.005) loss 0.6134 (1.1801) lr 2.2949e-04 eta 0:16:35
epoch [45/50] batch [100/392] time 0.428 (0.437) data 0.000 (0.004) loss 0.4215 (1.1380) lr 2.2949e-04 eta 0:16:24
epoch [45/50] batch [120/392] time 0.435 (0.437) data 0.000 (0.003) loss 1.6469 (1.1508) lr 2.2949e-04 eta 0:16:14
epoch [45/50] batch [140/392] time 0.421 (0.436) data 0.000 (0.003) loss 0.7374 (1.1860) lr 2.2949e-04 eta 0:16:04
epoch [45/50] batch [160/392] time 0.427 (0.436) data 0.000 (0.002) loss 0.9372 (1.1581) lr 2.2949e-04 eta 0:15:55
epoch [45/50] batch [180/392] time 0.434 (0.435) data 0.000 (0.002) loss 0.7826 (1.1495) lr 2.2949e-04 eta 0:15:45
epoch [45/50] batch [200/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.4297 (1.1923) lr 2.2949e-04 eta 0:15:36
epoch [45/50] batch [220/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.9961 (1.1909) lr 2.2949e-04 eta 0:15:27
epoch [45/50] batch [240/392] time 0.425 (0.435) data 0.000 (0.002) loss 0.7064 (1.1884) lr 2.2949e-04 eta 0:15:18
epoch [45/50] batch [260/392] time 0.429 (0.435) data 0.000 (0.002) loss 0.6782 (1.1809) lr 2.2949e-04 eta 0:15:09
epoch [45/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.4775 (1.1692) lr 2.2949e-04 eta 0:15:00
epoch [45/50] batch [300/392] time 0.429 (0.435) data 0.000 (0.001) loss 0.8665 (1.1951) lr 2.2949e-04 eta 0:14:51
epoch [45/50] batch [320/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.7390 (1.1980) lr 2.2949e-04 eta 0:14:42
epoch [45/50] batch [340/392] time 0.437 (0.435) data 0.000 (0.001) loss 1.0184 (1.1830) lr 2.2949e-04 eta 0:14:34
epoch [45/50] batch [360/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.5987 (1.1713) lr 2.2949e-04 eta 0:14:25
epoch [45/50] batch [380/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.1476 (1.1640) lr 2.2949e-04 eta 0:14:16
epoch [46/50] batch [20/392] time 0.433 (0.450) data 0.000 (0.018) loss 0.4673 (1.0131) lr 1.9098e-04 eta 0:14:33
epoch [46/50] batch [40/392] time 0.433 (0.442) data 0.000 (0.009) loss 1.5008 (1.0471) lr 1.9098e-04 eta 0:14:08
epoch [46/50] batch [60/392] time 0.437 (0.439) data 0.000 (0.006) loss 1.7451 (1.1223) lr 1.9098e-04 eta 0:13:54
epoch [46/50] batch [80/392] time 0.438 (0.438) data 0.000 (0.005) loss 0.5324 (1.1375) lr 1.9098e-04 eta 0:13:42
epoch [46/50] batch [100/392] time 0.427 (0.437) data 0.000 (0.004) loss 0.6356 (1.1057) lr 1.9098e-04 eta 0:13:32
epoch [46/50] batch [120/392] time 0.436 (0.436) data 0.000 (0.003) loss 1.5825 (1.1192) lr 1.9098e-04 eta 0:13:22
epoch [46/50] batch [140/392] time 0.433 (0.436) data 0.000 (0.003) loss 0.8533 (1.0927) lr 1.9098e-04 eta 0:13:12
epoch [46/50] batch [160/392] time 0.427 (0.435) data 0.000 (0.002) loss 0.4130 (1.1134) lr 1.9098e-04 eta 0:13:03
epoch [46/50] batch [180/392] time 0.438 (0.435) data 0.001 (0.002) loss 3.1307 (1.1285) lr 1.9098e-04 eta 0:12:54
epoch [46/50] batch [200/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.6598 (1.1200) lr 1.9098e-04 eta 0:12:45
epoch [46/50] batch [220/392] time 0.439 (0.435) data 0.000 (0.002) loss 2.2184 (1.1180) lr 1.9098e-04 eta 0:12:36
epoch [46/50] batch [240/392] time 0.434 (0.435) data 0.000 (0.002) loss 0.7173 (1.0985) lr 1.9098e-04 eta 0:12:27
epoch [46/50] batch [260/392] time 0.436 (0.435) data 0.000 (0.001) loss 1.4372 (1.0770) lr 1.9098e-04 eta 0:12:19
epoch [46/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.1241 (1.0898) lr 1.9098e-04 eta 0:12:10
epoch [46/50] batch [300/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.5696 (1.0934) lr 1.9098e-04 eta 0:12:01
epoch [46/50] batch [320/392] time 0.439 (0.435) data 0.000 (0.001) loss 0.3571 (1.0916) lr 1.9098e-04 eta 0:11:52
epoch [46/50] batch [340/392] time 0.433 (0.434) data 0.000 (0.001) loss 1.6970 (1.0886) lr 1.9098e-04 eta 0:11:43
epoch [46/50] batch [360/392] time 0.432 (0.434) data 0.000 (0.001) loss 1.1364 (1.0823) lr 1.9098e-04 eta 0:11:35
epoch [46/50] batch [380/392] time 0.426 (0.434) data 0.000 (0.001) loss 2.0696 (1.0965) lr 1.9098e-04 eta 0:11:26
epoch [47/50] batch [20/392] time 0.434 (0.451) data 0.000 (0.017) loss 0.7152 (0.8212) lr 1.5567e-04 eta 0:11:37
epoch [47/50] batch [40/392] time 0.431 (0.442) data 0.000 (0.009) loss 1.3260 (1.0080) lr 1.5567e-04 eta 0:11:14
epoch [47/50] batch [60/392] time 0.433 (0.439) data 0.000 (0.006) loss 0.1638 (0.9867) lr 1.5567e-04 eta 0:11:01
epoch [47/50] batch [80/392] time 0.438 (0.437) data 0.000 (0.004) loss 1.2534 (1.0454) lr 1.5567e-04 eta 0:10:50
epoch [47/50] batch [100/392] time 0.426 (0.436) data 0.000 (0.004) loss 1.3558 (1.0374) lr 1.5567e-04 eta 0:10:40
epoch [47/50] batch [120/392] time 0.422 (0.436) data 0.000 (0.003) loss 0.2600 (1.0173) lr 1.5567e-04 eta 0:10:30
epoch [47/50] batch [140/392] time 0.428 (0.435) data 0.000 (0.003) loss 1.2331 (1.0541) lr 1.5567e-04 eta 0:10:21
epoch [47/50] batch [160/392] time 0.433 (0.435) data 0.000 (0.002) loss 0.8427 (1.0475) lr 1.5567e-04 eta 0:10:12
epoch [47/50] batch [180/392] time 0.426 (0.435) data 0.000 (0.002) loss 1.4685 (1.0299) lr 1.5567e-04 eta 0:10:03
epoch [47/50] batch [200/392] time 0.437 (0.435) data 0.000 (0.002) loss 1.4396 (1.0420) lr 1.5567e-04 eta 0:09:54
epoch [47/50] batch [220/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.6980 (1.0476) lr 1.5567e-04 eta 0:09:45
epoch [47/50] batch [240/392] time 0.433 (0.434) data 0.000 (0.002) loss 1.0965 (1.0447) lr 1.5567e-04 eta 0:09:36
epoch [47/50] batch [260/392] time 0.434 (0.434) data 0.000 (0.001) loss 2.2650 (1.0753) lr 1.5567e-04 eta 0:09:27
epoch [47/50] batch [280/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.5450 (1.0730) lr 1.5567e-04 eta 0:09:19
epoch [47/50] batch [300/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.6584 (1.0628) lr 1.5567e-04 eta 0:09:10
epoch [47/50] batch [320/392] time 0.436 (0.434) data 0.000 (0.001) loss 0.3762 (1.0559) lr 1.5567e-04 eta 0:09:01
epoch [47/50] batch [340/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.6707 (1.0592) lr 1.5567e-04 eta 0:08:52
epoch [47/50] batch [360/392] time 0.436 (0.434) data 0.000 (0.001) loss 2.2414 (1.0804) lr 1.5567e-04 eta 0:08:44
epoch [47/50] batch [380/392] time 0.436 (0.434) data 0.000 (0.001) loss 1.8278 (1.0800) lr 1.5567e-04 eta 0:08:35
epoch [48/50] batch [20/392] time 0.437 (0.451) data 0.000 (0.019) loss 0.6566 (1.0750) lr 1.2369e-04 eta 0:08:41
epoch [48/50] batch [40/392] time 0.437 (0.442) data 0.000 (0.009) loss 0.5321 (1.0637) lr 1.2369e-04 eta 0:08:21
epoch [48/50] batch [60/392] time 0.441 (0.439) data 0.003 (0.006) loss 1.1764 (1.0868) lr 1.2369e-04 eta 0:08:09
epoch [48/50] batch [80/392] time 0.437 (0.437) data 0.000 (0.005) loss 1.8707 (1.0874) lr 1.2369e-04 eta 0:07:59
epoch [48/50] batch [100/392] time 0.436 (0.437) data 0.000 (0.004) loss 2.3057 (1.1033) lr 1.2369e-04 eta 0:07:49
epoch [48/50] batch [120/392] time 0.432 (0.436) data 0.000 (0.003) loss 2.1723 (1.0627) lr 1.2369e-04 eta 0:07:40
epoch [48/50] batch [140/392] time 0.438 (0.436) data 0.000 (0.003) loss 0.6804 (1.0772) lr 1.2369e-04 eta 0:07:31
epoch [48/50] batch [160/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.4884 (1.0686) lr 1.2369e-04 eta 0:07:22
epoch [48/50] batch [180/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.7972 (1.0409) lr 1.2369e-04 eta 0:07:13
epoch [48/50] batch [200/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.0737 (1.0415) lr 1.2369e-04 eta 0:07:04
epoch [48/50] batch [220/392] time 0.430 (0.435) data 0.000 (0.002) loss 0.6796 (1.0161) lr 1.2369e-04 eta 0:06:55
epoch [48/50] batch [240/392] time 0.432 (0.434) data 0.000 (0.002) loss 0.6313 (1.0355) lr 1.2369e-04 eta 0:06:46
epoch [48/50] batch [260/392] time 0.433 (0.434) data 0.000 (0.002) loss 1.2932 (1.0086) lr 1.2369e-04 eta 0:06:37
epoch [48/50] batch [280/392] time 0.440 (0.434) data 0.000 (0.001) loss 0.8052 (1.0107) lr 1.2369e-04 eta 0:06:29
epoch [48/50] batch [300/392] time 0.433 (0.434) data 0.000 (0.001) loss 0.3855 (1.0142) lr 1.2369e-04 eta 0:06:20
epoch [48/50] batch [320/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.8990 (1.0155) lr 1.2369e-04 eta 0:06:11
epoch [48/50] batch [340/392] time 0.433 (0.434) data 0.000 (0.001) loss 0.9601 (1.0270) lr 1.2369e-04 eta 0:06:02
epoch [48/50] batch [360/392] time 0.430 (0.434) data 0.000 (0.001) loss 0.8242 (1.0326) lr 1.2369e-04 eta 0:05:54
epoch [48/50] batch [380/392] time 0.432 (0.434) data 0.000 (0.001) loss 0.3320 (1.0323) lr 1.2369e-04 eta 0:05:45
epoch [49/50] batch [20/392] time 0.423 (0.450) data 0.000 (0.017) loss 0.9619 (1.0446) lr 9.5173e-05 eta 0:05:43
epoch [49/50] batch [40/392] time 0.433 (0.441) data 0.000 (0.009) loss 0.4851 (1.0202) lr 9.5173e-05 eta 0:05:28
epoch [49/50] batch [60/392] time 0.437 (0.439) data 0.000 (0.006) loss 0.7053 (1.0785) lr 9.5173e-05 eta 0:05:17
epoch [49/50] batch [80/392] time 0.428 (0.437) data 0.000 (0.004) loss 1.1910 (1.0895) lr 9.5173e-05 eta 0:05:07
epoch [49/50] batch [100/392] time 0.439 (0.436) data 0.000 (0.004) loss 0.8310 (1.0188) lr 9.5173e-05 eta 0:04:58
epoch [49/50] batch [120/392] time 0.427 (0.436) data 0.000 (0.003) loss 1.3297 (1.0292) lr 9.5173e-05 eta 0:04:49
epoch [49/50] batch [140/392] time 0.440 (0.436) data 0.000 (0.003) loss 1.6512 (1.0348) lr 9.5173e-05 eta 0:04:40
epoch [49/50] batch [160/392] time 0.437 (0.436) data 0.000 (0.002) loss 1.3013 (1.0151) lr 9.5173e-05 eta 0:04:31
epoch [49/50] batch [180/392] time 0.434 (0.435) data 0.000 (0.002) loss 0.1214 (1.0021) lr 9.5173e-05 eta 0:04:22
epoch [49/50] batch [200/392] time 0.423 (0.435) data 0.000 (0.002) loss 0.8671 (1.0106) lr 9.5173e-05 eta 0:04:14
epoch [49/50] batch [220/392] time 0.436 (0.435) data 0.000 (0.002) loss 2.1831 (1.0083) lr 9.5173e-05 eta 0:04:05
epoch [49/50] batch [240/392] time 0.432 (0.435) data 0.000 (0.002) loss 1.7100 (0.9976) lr 9.5173e-05 eta 0:03:56
epoch [49/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.2398 (0.9953) lr 9.5173e-05 eta 0:03:47
epoch [49/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.1763 (1.0093) lr 9.5173e-05 eta 0:03:38
epoch [49/50] batch [300/392] time 0.433 (0.434) data 0.000 (0.001) loss 0.5494 (1.0036) lr 9.5173e-05 eta 0:03:30
epoch [49/50] batch [320/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.9570 (1.0106) lr 9.5173e-05 eta 0:03:21
epoch [49/50] batch [340/392] time 0.427 (0.434) data 0.000 (0.001) loss 1.9244 (1.0146) lr 9.5173e-05 eta 0:03:12
epoch [49/50] batch [360/392] time 0.432 (0.434) data 0.000 (0.001) loss 0.8783 (1.0112) lr 9.5173e-05 eta 0:03:04
epoch [49/50] batch [380/392] time 0.436 (0.434) data 0.000 (0.001) loss 2.0575 (1.0145) lr 9.5173e-05 eta 0:02:55
epoch [50/50] batch [20/392] time 0.433 (0.453) data 0.000 (0.019) loss 1.1845 (1.1474) lr 7.0224e-05 eta 0:02:48
epoch [50/50] batch [40/392] time 0.427 (0.443) data 0.000 (0.010) loss 0.4189 (1.0902) lr 7.0224e-05 eta 0:02:35
epoch [50/50] batch [60/392] time 0.439 (0.440) data 0.000 (0.006) loss 0.6620 (0.9651) lr 7.0224e-05 eta 0:02:26
epoch [50/50] batch [80/392] time 0.429 (0.438) data 0.000 (0.005) loss 1.0931 (0.9603) lr 7.0224e-05 eta 0:02:16
epoch [50/50] batch [100/392] time 0.438 (0.437) data 0.000 (0.004) loss 1.0900 (1.0240) lr 7.0224e-05 eta 0:02:07
epoch [50/50] batch [120/392] time 0.432 (0.437) data 0.000 (0.003) loss 1.4900 (1.0253) lr 7.0224e-05 eta 0:01:58
epoch [50/50] batch [140/392] time 0.438 (0.436) data 0.000 (0.003) loss 1.3793 (1.0345) lr 7.0224e-05 eta 0:01:49
epoch [50/50] batch [160/392] time 0.438 (0.436) data 0.000 (0.003) loss 5.3857 (1.0445) lr 7.0224e-05 eta 0:01:41
epoch [50/50] batch [180/392] time 0.436 (0.436) data 0.000 (0.002) loss 1.5528 (1.0515) lr 7.0224e-05 eta 0:01:32
epoch [50/50] batch [200/392] time 0.437 (0.436) data 0.000 (0.002) loss 1.0356 (1.0820) lr 7.0224e-05 eta 0:01:23
epoch [50/50] batch [220/392] time 0.434 (0.435) data 0.000 (0.002) loss 1.0266 (1.0830) lr 7.0224e-05 eta 0:01:14
epoch [50/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.1986 (1.0704) lr 7.0224e-05 eta 0:01:06
epoch [50/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.002) loss 2.8738 (1.0810) lr 7.0224e-05 eta 0:00:57
epoch [50/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.2294 (1.0929) lr 7.0224e-05 eta 0:00:48
epoch [50/50] batch [300/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.2830 (1.0768) lr 7.0224e-05 eta 0:00:40
epoch [50/50] batch [320/392] time 0.439 (0.435) data 0.000 (0.001) loss 0.4942 (1.0828) lr 7.0224e-05 eta 0:00:31
epoch [50/50] batch [340/392] time 0.434 (0.435) data 0.000 (0.001) loss 1.8400 (1.0903) lr 7.0224e-05 eta 0:00:22
epoch [50/50] batch [360/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.6886 (1.0833) lr 7.0224e-05 eta 0:00:13
epoch [50/50] batch [380/392] time 0.426 (0.435) data 0.000 (0.001) loss 0.2038 (1.0966) lr 7.0224e-05 eta 0:00:05
Checkpoint saved to output/base2new/train_base/stanford_cars/vit_b16_ep50_c4_BZ4_ProDA/seed2/prompt_learner/model.pth.tar-50
Finish training
Deploy the last-epoch model
Evaluate on the *test* set
=> result
* total: 4,006
* correct: 3,256
* accuracy: 81.28%
* error: 18.72%
* macro_f1: 81.22%
Elapsed: 2:23:22
