***************
** Arguments **
***************
backbone: 
config_file: configs/trainers/ProDA/vit_b16_ep50_c4_BZ4_ProDA.yaml
dataset_config_file: configs/datasets/stanford_cars.yaml
eval_only: False
head: 
load_epoch: None
model_dir: 
no_train: False
opts: ['DATASET.NUM_SHOTS', '16', 'DATASET.SUBSAMPLE_CLASSES', 'base']
output_dir: output/base2new/train_base/stanford_cars/vit_b16_ep50_c4_BZ4_ProDA/seed1
resume: 
root: /mnt/hdd/DATA
seed: 1
source_domains: None
target_domains: None
trainer: ProDA
transforms: None
************
** Config **
************
DATALOADER:
  K_TRANSFORMS: 1
  NUM_WORKERS: 8
  RETURN_IMG0: False
  TEST:
    BATCH_SIZE: 100
    SAMPLER: SequentialSampler
  TRAIN_U:
    BATCH_SIZE: 32
    N_DOMAIN: 0
    N_INS: 16
    SAME_AS_X: True
    SAMPLER: RandomSampler
  TRAIN_X:
    BATCH_SIZE: 4
    N_DOMAIN: 0
    N_INS: 16
    SAMPLER: RandomSampler
DATASET:
  ALL_AS_UNLABELED: False
  CIFAR_C_LEVEL: 1
  CIFAR_C_TYPE: 
  NAME: StanfordCars
  NUM_LABELED: -1
  NUM_SHOTS: 16
  ROOT: /mnt/hdd/DATA
  SOURCE_DOMAINS: ()
  STL10_FOLD: -1
  SUBSAMPLE_CLASSES: base
  TARGET_DOMAINS: ()
  VAL_PERCENT: 0.1
INPUT:
  COLORJITTER_B: 0.4
  COLORJITTER_C: 0.4
  COLORJITTER_H: 0.1
  COLORJITTER_S: 0.4
  CROP_PADDING: 4
  CUTOUT_LEN: 16
  CUTOUT_N: 1
  GB_K: 21
  GB_P: 0.5
  GN_MEAN: 0.0
  GN_STD: 0.15
  INTERPOLATION: bicubic
  NO_TRANSFORM: False
  PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
  PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
  RANDAUGMENT_M: 10
  RANDAUGMENT_N: 2
  RGS_P: 0.2
  RRCROP_SCALE: (0.08, 1.0)
  SIZE: (224, 224)
  TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize')
MODEL:
  BACKBONE:
    NAME: ViT-B/16
    PRETRAINED: True
  HEAD:
    ACTIVATION: relu
    BN: True
    DROPOUT: 0.0
    HIDDEN_LAYERS: ()
    NAME: 
  INIT_WEIGHTS: 
OPTIM:
  ADAM_BETA1: 0.9
  ADAM_BETA2: 0.999
  BASE_LR_MULT: 0.1
  GAMMA: 0.1
  LR: 0.002
  LR_SCHEDULER: cosine
  MAX_EPOCH: 50
  MOMENTUM: 0.9
  NAME: sgd
  NEW_LAYERS: ()
  RMSPROP_ALPHA: 0.99
  SGD_DAMPNING: 0
  SGD_NESTEROV: False
  STAGED_LR: False
  STEPSIZE: (-1,)
  WARMUP_CONS_LR: 1e-05
  WARMUP_EPOCH: 5
  WARMUP_MIN_LR: 1e-05
  WARMUP_RECOUNT: True
  WARMUP_TYPE: constant
  WEIGHT_DECAY: 0.0005
OUTPUT_DIR: output/base2new/train_base/stanford_cars/vit_b16_ep50_c4_BZ4_ProDA/seed1
RESUME: 
SEED: 1
TEST:
  COMPUTE_CMAT: False
  EVALUATOR: Classification
  FINAL_MODEL: last_step
  NO_TEST: False
  PER_CLASS_RESULT: False
  SPLIT: test
TRAIN:
  CHECKPOINT_FREQ: 0
  COUNT_ITER: train_x
  PRINT_FREQ: 20
TRAINER:
  CDAC:
    CLASS_LR_MULTI: 10
    P_THRESH: 0.95
    RAMPUP_COEF: 30
    RAMPUP_ITRS: 1000
    STRONG_TRANSFORMS: ()
    TOPK_MATCH: 5
  COCOOP:
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  COOP:
    CLASS_TOKEN_POSITION: end
    CSC: False
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  CROSSGRAD:
    ALPHA_D: 0.5
    ALPHA_F: 0.5
    EPS_D: 1.0
    EPS_F: 1.0
  DAEL:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DAELDG:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DDAIG:
    ALPHA: 0.5
    CLAMP: False
    CLAMP_MAX: 1.0
    CLAMP_MIN: -1.0
    G_ARCH: 
    LMDA: 0.3
    WARMUP: 0
  DOMAINMIX:
    ALPHA: 1.0
    BETA: 1.0
    TYPE: crossdomain
  ENTMIN:
    LMDA: 0.001
  FIXMATCH:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 1.0
  IVLP:
    CTX_INIT: a photo of a
    N_CTX_TEXT: 2
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_TEXT: 9
    PROMPT_DEPTH_VISION: 9
  M3SDA:
    LMDA: 0.5
    N_STEP_F: 4
  MAPLE:
    CTX_INIT: a photo of a
    N_CTX: 4
    PREC: fp16
    PROMPT_DEPTH: 9
  MCD:
    N_STEP_F: 4
  MEANTEACHER:
    EMA_ALPHA: 0.999
    RAMPUP: 5
    WEIGHT_U: 1.0
  MIXMATCH:
    MIXUP_BETA: 0.75
    RAMPUP: 20000
    TEMP: 2.0
    WEIGHT_U: 100.0
  MME:
    LMDA: 0.1
  NAME: ProDA
  ProDA:
    N_CTX: 4
    N_PROMPT: 32
    PREC: fp16
  SE:
    CONF_THRE: 0.95
    EMA_ALPHA: 0.999
    RAMPUP: 300
  VPT:
    CTX_INIT: a photo of a
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_VISION: 1
USE_CUDA: True
VERBOSE: True
VERSION: 1
Collecting env info ...
** System info **
PyTorch version: 2.2.1+cu121
Is debug build: False
CUDA used to build PyTorch: 12.1
ROCM used to build PyTorch: N/A

OS: Debian GNU/Linux 12 (bookworm) (x86_64)
GCC version: (Debian 12.2.0-14) 12.2.0
Clang version: Could not collect
CMake version: Could not collect
Libc version: glibc-2.36

Python version: 3.11.2 (main, Mar 13 2023, 12:18:29) [GCC 12.2.0] (64-bit runtime)
Python platform: Linux-6.5.13-3-pve-x86_64-with-glibc2.36
Is CUDA available: True
CUDA runtime version: 11.8.89
CUDA_MODULE_LOADING set to: LAZY
GPU models and configuration: 
GPU 0: NVIDIA A800 80GB PCIe
GPU 1: NVIDIA A800 80GB PCIe

Nvidia driver version: 525.147.05
cuDNN version: Could not collect
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True

CPU:
Architecture:                       x86_64
CPU op-mode(s):                     32-bit, 64-bit
Address sizes:                      46 bits physical, 57 bits virtual
Byte Order:                         Little Endian
CPU(s):                             64
On-line CPU(s) list:                18,20,22,23,25-27,29,31,32,34,37,46-49
Off-line CPU(s) list:               0-17,19,21,24,28,30,33,35,36,38-45,50-63
Vendor ID:                          GenuineIntel
Model name:                         Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz
CPU family:                         6
Model:                              106
Thread(s) per core:                 2
Core(s) per socket:                 16
Socket(s):                          2
Stepping:                           6
CPU(s) scaling MHz:                 98%
CPU max MHz:                        3500.0000
CPU min MHz:                        800.0000
BogoMIPS:                           5800.00
Flags:                              fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect wbnoinvd dtherm ida arat pln pts vnmi avx512vbmi umip pku ospke avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid fsrm md_clear pconfig flush_l1d arch_capabilities
Virtualization:                     VT-x
L1d cache:                          1.5 MiB (32 instances)
L1i cache:                          1 MiB (32 instances)
L2 cache:                           40 MiB (32 instances)
L3 cache:                           48 MiB (2 instances)
NUMA node(s):                       2
NUMA node0 CPU(s):                  0-15,32-47
NUMA node1 CPU(s):                  16-31,48-63
Vulnerability Gather data sampling: Vulnerable: No microcode
Vulnerability Itlb multihit:        Not affected
Vulnerability L1tf:                 Not affected
Vulnerability Mds:                  Not affected
Vulnerability Meltdown:             Not affected
Vulnerability Mmio stale data:      Mitigation; Clear CPU buffers; SMT vulnerable
Vulnerability Retbleed:             Not affected
Vulnerability Spec rstack overflow: Not affected
Vulnerability Spec store bypass:    Mitigation; Speculative Store Bypass disabled via prctl
Vulnerability Spectre v1:           Mitigation; usercopy/swapgs barriers and __user pointer sanitization
Vulnerability Spectre v2:           Mitigation; Enhanced / Automatic IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
Vulnerability Srbds:                Not affected
Vulnerability Tsx async abort:      Not affected

Versions of relevant libraries:
[pip3] flake8==3.7.9
[pip3] flake8==3.7.9
[pip3] numpy==1.26.4
[pip3] torch==2.2.1
[pip3] torchaudio==2.2.1
[pip3] torchvision==0.17.1
[pip3] triton==2.2.0
[conda] Could not collect
        Pillow (10.2.0)

Loading trainer: ProDA
Loading dataset: StanfordCars
Reading split from /mnt/hdd/DATA/stanford_cars/split_zhou_StanfordCars.json
Loading preprocessed few-shot data from /mnt/hdd/DATA/stanford_cars/split_fewshot/shot_16_shuffled-seed_1.pkl
SUBSAMPLE BASE CLASSES!
Building transform_train
+ random resized crop (size=(224, 224), scale=(0.08, 1.0))
+ random flip
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
Building transform_test
+ resize the smaller edge to 224
+ 224x224 center crop
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
---------  ------------
Dataset    StanfordCars
# classes  98
# train_x  1,568
# val      392
# test     3,991
---------  ------------
Loading CLIP (backbone: ViT-B/16)
Building custom CLIP
Turning off gradients in both the image and the text encoder
Parameters to be updated: {'prompt_learner.ctx'}
Loading evaluator: Classification
No checkpoint found, train from scratch
Initialize tensorboard (log_dir=output/base2new/train_base/stanford_cars/vit_b16_ep50_c4_BZ4_ProDA/seed1/tensorboard)
epoch [1/50] batch [20/392] time 0.423 (0.563) data 0.000 (0.034) loss 0.8373 (1.5204) lr 1.0000e-05 eta 3:03:50
epoch [1/50] batch [40/392] time 0.434 (0.496) data 0.000 (0.017) loss 1.1107 (1.4516) lr 1.0000e-05 eta 2:41:35
epoch [1/50] batch [60/392] time 0.424 (0.474) data 0.000 (0.011) loss 1.1770 (1.4184) lr 1.0000e-05 eta 2:34:20
epoch [1/50] batch [80/392] time 0.424 (0.463) data 0.000 (0.009) loss 1.0933 (1.4311) lr 1.0000e-05 eta 2:30:32
epoch [1/50] batch [100/392] time 0.435 (0.457) data 0.000 (0.007) loss 2.2725 (1.4472) lr 1.0000e-05 eta 2:28:24
epoch [1/50] batch [120/392] time 0.437 (0.453) data 0.000 (0.006) loss 1.6488 (1.4318) lr 1.0000e-05 eta 2:26:55
epoch [1/50] batch [140/392] time 0.425 (0.449) data 0.000 (0.005) loss 1.1665 (1.4476) lr 1.0000e-05 eta 2:25:44
epoch [1/50] batch [160/392] time 0.431 (0.447) data 0.000 (0.004) loss 2.5240 (1.4580) lr 1.0000e-05 eta 2:24:54
epoch [1/50] batch [180/392] time 0.425 (0.446) data 0.000 (0.004) loss 0.5962 (1.4545) lr 1.0000e-05 eta 2:24:11
epoch [1/50] batch [200/392] time 0.436 (0.444) data 0.000 (0.004) loss 3.1952 (1.4635) lr 1.0000e-05 eta 2:23:37
epoch [1/50] batch [220/392] time 0.428 (0.443) data 0.000 (0.003) loss 1.8430 (1.4525) lr 1.0000e-05 eta 2:23:06
epoch [1/50] batch [240/392] time 0.437 (0.442) data 0.000 (0.003) loss 1.7107 (1.4464) lr 1.0000e-05 eta 2:22:40
epoch [1/50] batch [260/392] time 0.423 (0.441) data 0.000 (0.003) loss 1.9084 (1.4600) lr 1.0000e-05 eta 2:22:15
epoch [1/50] batch [280/392] time 0.427 (0.441) data 0.000 (0.003) loss 0.2472 (1.4341) lr 1.0000e-05 eta 2:21:56
epoch [1/50] batch [300/392] time 0.437 (0.440) data 0.000 (0.002) loss 1.0748 (1.4390) lr 1.0000e-05 eta 2:21:37
epoch [1/50] batch [320/392] time 0.436 (0.440) data 0.000 (0.002) loss 0.9242 (1.4465) lr 1.0000e-05 eta 2:21:21
epoch [1/50] batch [340/392] time 0.438 (0.439) data 0.000 (0.002) loss 1.4828 (1.4382) lr 1.0000e-05 eta 2:21:03
epoch [1/50] batch [360/392] time 0.422 (0.439) data 0.000 (0.002) loss 0.3422 (1.4321) lr 1.0000e-05 eta 2:20:46
epoch [1/50] batch [380/392] time 0.432 (0.439) data 0.000 (0.002) loss 1.3706 (1.4215) lr 1.0000e-05 eta 2:20:32
epoch [2/50] batch [20/392] time 0.438 (0.452) data 0.000 (0.019) loss 2.0280 (1.5024) lr 1.0000e-05 eta 2:24:39
epoch [2/50] batch [40/392] time 0.434 (0.443) data 0.000 (0.010) loss 2.1567 (1.4782) lr 1.0000e-05 eta 2:21:36
epoch [2/50] batch [60/392] time 0.433 (0.440) data 0.000 (0.006) loss 1.7110 (1.5400) lr 1.0000e-05 eta 2:20:22
epoch [2/50] batch [80/392] time 0.432 (0.438) data 0.000 (0.005) loss 0.7121 (1.4452) lr 1.0000e-05 eta 2:19:44
epoch [2/50] batch [100/392] time 0.428 (0.437) data 0.000 (0.004) loss 3.0996 (1.4598) lr 1.0000e-05 eta 2:19:19
epoch [2/50] batch [120/392] time 0.439 (0.437) data 0.000 (0.003) loss 2.6491 (1.4447) lr 1.0000e-05 eta 2:19:00
epoch [2/50] batch [140/392] time 0.427 (0.436) data 0.000 (0.003) loss 0.2635 (1.4012) lr 1.0000e-05 eta 2:18:37
epoch [2/50] batch [160/392] time 0.434 (0.436) data 0.000 (0.003) loss 0.4850 (1.4073) lr 1.0000e-05 eta 2:18:23
epoch [2/50] batch [180/392] time 0.437 (0.436) data 0.000 (0.002) loss 1.0730 (1.3892) lr 1.0000e-05 eta 2:18:10
epoch [2/50] batch [200/392] time 0.428 (0.436) data 0.000 (0.002) loss 1.5157 (1.3860) lr 1.0000e-05 eta 2:17:57
epoch [2/50] batch [220/392] time 0.430 (0.435) data 0.000 (0.002) loss 1.1971 (1.3867) lr 1.0000e-05 eta 2:17:46
epoch [2/50] batch [240/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.6851 (1.3786) lr 1.0000e-05 eta 2:17:35
epoch [2/50] batch [260/392] time 0.434 (0.435) data 0.000 (0.002) loss 2.5296 (1.3757) lr 1.0000e-05 eta 2:17:23
epoch [2/50] batch [280/392] time 0.437 (0.435) data 0.000 (0.002) loss 3.7247 (1.3799) lr 1.0000e-05 eta 2:17:13
epoch [2/50] batch [300/392] time 0.432 (0.435) data 0.000 (0.001) loss 0.4933 (1.3563) lr 1.0000e-05 eta 2:17:05
epoch [2/50] batch [320/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.0048 (1.3619) lr 1.0000e-05 eta 2:16:55
epoch [2/50] batch [340/392] time 0.439 (0.435) data 0.000 (0.001) loss 0.2567 (1.3557) lr 1.0000e-05 eta 2:16:44
epoch [2/50] batch [360/392] time 0.427 (0.435) data 0.000 (0.001) loss 1.4273 (1.3605) lr 1.0000e-05 eta 2:16:32
epoch [2/50] batch [380/392] time 0.426 (0.435) data 0.000 (0.001) loss 1.1206 (1.3524) lr 1.0000e-05 eta 2:16:21
epoch [3/50] batch [20/392] time 0.422 (0.451) data 0.000 (0.018) loss 0.5681 (1.3245) lr 1.0000e-05 eta 2:21:20
epoch [3/50] batch [40/392] time 0.434 (0.442) data 0.000 (0.009) loss 0.8747 (1.3556) lr 1.0000e-05 eta 2:18:20
epoch [3/50] batch [60/392] time 0.428 (0.439) data 0.000 (0.006) loss 0.6847 (1.4341) lr 1.0000e-05 eta 2:17:18
epoch [3/50] batch [80/392] time 0.438 (0.438) data 0.000 (0.005) loss 1.9216 (1.3675) lr 1.0000e-05 eta 2:16:47
epoch [3/50] batch [100/392] time 0.438 (0.437) data 0.000 (0.004) loss 1.1710 (1.3344) lr 1.0000e-05 eta 2:16:27
epoch [3/50] batch [120/392] time 0.438 (0.437) data 0.000 (0.003) loss 2.4689 (1.3163) lr 1.0000e-05 eta 2:16:06
epoch [3/50] batch [140/392] time 0.434 (0.436) data 0.000 (0.003) loss 1.7874 (1.2910) lr 1.0000e-05 eta 2:15:49
epoch [3/50] batch [160/392] time 0.428 (0.436) data 0.000 (0.002) loss 0.8591 (1.3151) lr 1.0000e-05 eta 2:15:36
epoch [3/50] batch [180/392] time 0.437 (0.436) data 0.000 (0.002) loss 3.0061 (1.3114) lr 1.0000e-05 eta 2:15:23
epoch [3/50] batch [200/392] time 0.433 (0.436) data 0.000 (0.002) loss 3.2043 (1.3094) lr 1.0000e-05 eta 2:15:12
epoch [3/50] batch [220/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.7032 (1.3148) lr 1.0000e-05 eta 2:14:58
epoch [3/50] batch [240/392] time 0.437 (0.435) data 0.000 (0.002) loss 1.2644 (1.3048) lr 1.0000e-05 eta 2:14:46
epoch [3/50] batch [260/392] time 0.439 (0.435) data 0.000 (0.002) loss 0.2759 (1.3211) lr 1.0000e-05 eta 2:14:35
epoch [3/50] batch [280/392] time 0.428 (0.435) data 0.000 (0.001) loss 1.1433 (1.3189) lr 1.0000e-05 eta 2:14:24
epoch [3/50] batch [300/392] time 0.434 (0.435) data 0.000 (0.001) loss 0.6649 (1.3113) lr 1.0000e-05 eta 2:14:13
epoch [3/50] batch [320/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.0778 (1.3115) lr 1.0000e-05 eta 2:14:02
epoch [3/50] batch [340/392] time 0.428 (0.435) data 0.000 (0.001) loss 2.1018 (1.3142) lr 1.0000e-05 eta 2:13:52
epoch [3/50] batch [360/392] time 0.434 (0.435) data 0.000 (0.001) loss 1.0066 (1.3239) lr 1.0000e-05 eta 2:13:41
epoch [3/50] batch [380/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.8240 (1.3133) lr 1.0000e-05 eta 2:13:32
epoch [4/50] batch [20/392] time 0.427 (0.450) data 0.000 (0.017) loss 0.8752 (1.6329) lr 1.0000e-05 eta 2:18:03
epoch [4/50] batch [40/392] time 0.429 (0.441) data 0.000 (0.009) loss 0.8138 (1.4612) lr 1.0000e-05 eta 2:15:12
epoch [4/50] batch [60/392] time 0.438 (0.439) data 0.000 (0.006) loss 1.7346 (1.4664) lr 1.0000e-05 eta 2:14:16
epoch [4/50] batch [80/392] time 0.437 (0.437) data 0.000 (0.004) loss 0.6081 (1.4415) lr 1.0000e-05 eta 2:13:45
epoch [4/50] batch [100/392] time 0.432 (0.437) data 0.000 (0.004) loss 0.9223 (1.4415) lr 1.0000e-05 eta 2:13:21
epoch [4/50] batch [120/392] time 0.433 (0.436) data 0.000 (0.003) loss 0.7832 (1.4536) lr 1.0000e-05 eta 2:13:04
epoch [4/50] batch [140/392] time 0.434 (0.436) data 0.000 (0.003) loss 0.7611 (1.4662) lr 1.0000e-05 eta 2:12:51
epoch [4/50] batch [160/392] time 0.437 (0.436) data 0.000 (0.002) loss 0.4399 (1.4315) lr 1.0000e-05 eta 2:12:37
epoch [4/50] batch [180/392] time 0.436 (0.435) data 0.000 (0.002) loss 2.0572 (1.4236) lr 1.0000e-05 eta 2:12:21
epoch [4/50] batch [200/392] time 0.432 (0.435) data 0.000 (0.002) loss 1.2724 (1.4063) lr 1.0000e-05 eta 2:12:09
epoch [4/50] batch [220/392] time 0.434 (0.435) data 0.000 (0.002) loss 1.0240 (1.3808) lr 1.0000e-05 eta 2:11:57
epoch [4/50] batch [240/392] time 0.437 (0.435) data 0.000 (0.002) loss 2.2544 (1.4093) lr 1.0000e-05 eta 2:11:44
epoch [4/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.3585 (1.4014) lr 1.0000e-05 eta 2:11:34
epoch [4/50] batch [280/392] time 0.434 (0.434) data 0.000 (0.001) loss 1.1355 (1.3812) lr 1.0000e-05 eta 2:11:20
epoch [4/50] batch [300/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.2902 (1.3788) lr 1.0000e-05 eta 2:11:10
epoch [4/50] batch [320/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.3942 (1.3765) lr 1.0000e-05 eta 2:11:01
epoch [4/50] batch [340/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.8675 (1.3815) lr 1.0000e-05 eta 2:10:51
epoch [4/50] batch [360/392] time 0.438 (0.434) data 0.000 (0.001) loss 2.8897 (1.3857) lr 1.0000e-05 eta 2:10:43
epoch [4/50] batch [380/392] time 0.438 (0.434) data 0.000 (0.001) loss 0.9450 (1.3783) lr 1.0000e-05 eta 2:10:34
epoch [5/50] batch [20/392] time 0.440 (0.451) data 0.000 (0.017) loss 1.4478 (1.5630) lr 1.0000e-05 eta 2:15:18
epoch [5/50] batch [40/392] time 0.434 (0.443) data 0.000 (0.009) loss 0.9591 (1.3786) lr 1.0000e-05 eta 2:12:50
epoch [5/50] batch [60/392] time 0.434 (0.440) data 0.000 (0.006) loss 1.1264 (1.3437) lr 1.0000e-05 eta 2:11:50
epoch [5/50] batch [80/392] time 0.433 (0.439) data 0.000 (0.004) loss 3.6411 (1.3607) lr 1.0000e-05 eta 2:11:24
epoch [5/50] batch [100/392] time 0.435 (0.438) data 0.000 (0.004) loss 1.0051 (1.3742) lr 1.0000e-05 eta 2:10:55
epoch [5/50] batch [120/392] time 0.434 (0.437) data 0.000 (0.003) loss 1.3453 (1.3381) lr 1.0000e-05 eta 2:10:35
epoch [5/50] batch [140/392] time 0.429 (0.437) data 0.000 (0.003) loss 0.8676 (1.3303) lr 1.0000e-05 eta 2:10:20
epoch [5/50] batch [160/392] time 0.429 (0.437) data 0.000 (0.002) loss 1.3185 (1.3426) lr 1.0000e-05 eta 2:10:07
epoch [5/50] batch [180/392] time 0.438 (0.437) data 0.000 (0.002) loss 1.7258 (1.3428) lr 1.0000e-05 eta 2:09:53
epoch [5/50] batch [200/392] time 0.439 (0.436) data 0.000 (0.002) loss 1.3413 (1.3385) lr 1.0000e-05 eta 2:09:39
epoch [5/50] batch [220/392] time 0.439 (0.436) data 0.000 (0.002) loss 1.7620 (1.3369) lr 1.0000e-05 eta 2:09:25
epoch [5/50] batch [240/392] time 0.434 (0.436) data 0.000 (0.002) loss 0.1958 (1.3403) lr 1.0000e-05 eta 2:09:14
epoch [5/50] batch [260/392] time 0.434 (0.436) data 0.000 (0.001) loss 0.7848 (1.3307) lr 1.0000e-05 eta 2:09:02
epoch [5/50] batch [280/392] time 0.432 (0.436) data 0.000 (0.001) loss 0.3465 (1.3263) lr 1.0000e-05 eta 2:08:52
epoch [5/50] batch [300/392] time 0.428 (0.435) data 0.000 (0.001) loss 0.6363 (1.3209) lr 1.0000e-05 eta 2:08:42
epoch [5/50] batch [320/392] time 0.436 (0.435) data 0.000 (0.001) loss 1.5810 (1.3184) lr 1.0000e-05 eta 2:08:32
epoch [5/50] batch [340/392] time 0.427 (0.435) data 0.000 (0.001) loss 1.6766 (1.3142) lr 1.0000e-05 eta 2:08:21
epoch [5/50] batch [360/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.5084 (1.3144) lr 1.0000e-05 eta 2:08:10
epoch [5/50] batch [380/392] time 0.437 (0.435) data 0.000 (0.001) loss 1.6504 (1.3158) lr 1.0000e-05 eta 2:07:59
epoch [6/50] batch [20/392] time 0.427 (0.452) data 0.000 (0.018) loss 0.7448 (1.2936) lr 2.0000e-03 eta 2:12:39
epoch [6/50] batch [40/392] time 0.427 (0.443) data 0.000 (0.009) loss 0.3872 (1.2977) lr 2.0000e-03 eta 2:09:53
epoch [6/50] batch [60/392] time 0.438 (0.440) data 0.000 (0.006) loss 1.4465 (1.2848) lr 2.0000e-03 eta 2:08:50
epoch [6/50] batch [80/392] time 0.423 (0.438) data 0.000 (0.005) loss 1.7701 (1.2786) lr 2.0000e-03 eta 2:08:05
epoch [6/50] batch [100/392] time 0.438 (0.437) data 0.000 (0.004) loss 0.2499 (1.2703) lr 2.0000e-03 eta 2:07:43
epoch [6/50] batch [120/392] time 0.439 (0.436) data 0.000 (0.003) loss 0.9577 (1.2511) lr 2.0000e-03 eta 2:07:26
epoch [6/50] batch [140/392] time 0.434 (0.436) data 0.000 (0.003) loss 1.6445 (1.2757) lr 2.0000e-03 eta 2:07:11
epoch [6/50] batch [160/392] time 0.428 (0.436) data 0.000 (0.002) loss 1.5718 (1.3048) lr 2.0000e-03 eta 2:06:57
epoch [6/50] batch [180/392] time 0.434 (0.436) data 0.000 (0.002) loss 2.4577 (1.2980) lr 2.0000e-03 eta 2:06:45
epoch [6/50] batch [200/392] time 0.434 (0.435) data 0.000 (0.002) loss 1.3068 (1.2857) lr 2.0000e-03 eta 2:06:33
epoch [6/50] batch [220/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.4759 (1.3004) lr 2.0000e-03 eta 2:06:22
epoch [6/50] batch [240/392] time 0.439 (0.435) data 0.000 (0.002) loss 0.0555 (1.3173) lr 2.0000e-03 eta 2:06:12
epoch [6/50] batch [260/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.6210 (1.3067) lr 2.0000e-03 eta 2:06:02
epoch [6/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 2.3684 (1.3103) lr 2.0000e-03 eta 2:05:53
epoch [6/50] batch [300/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.7811 (1.3174) lr 2.0000e-03 eta 2:05:44
epoch [6/50] batch [320/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.6295 (1.3237) lr 2.0000e-03 eta 2:05:34
epoch [6/50] batch [340/392] time 0.429 (0.435) data 0.000 (0.001) loss 0.0476 (1.3292) lr 2.0000e-03 eta 2:05:25
epoch [6/50] batch [360/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.3043 (1.3380) lr 2.0000e-03 eta 2:05:15
epoch [6/50] batch [380/392] time 0.428 (0.435) data 0.000 (0.001) loss 0.9515 (1.3304) lr 2.0000e-03 eta 2:05:04
epoch [7/50] batch [20/392] time 0.438 (0.451) data 0.000 (0.017) loss 1.3374 (1.2667) lr 1.9980e-03 eta 2:09:35
epoch [7/50] batch [40/392] time 0.439 (0.442) data 0.000 (0.008) loss 1.6323 (1.3520) lr 1.9980e-03 eta 2:06:53
epoch [7/50] batch [60/392] time 0.438 (0.440) data 0.000 (0.006) loss 1.4146 (1.3715) lr 1.9980e-03 eta 2:06:03
epoch [7/50] batch [80/392] time 0.438 (0.438) data 0.000 (0.004) loss 1.9723 (1.3688) lr 1.9980e-03 eta 2:05:25
epoch [7/50] batch [100/392] time 0.423 (0.437) data 0.000 (0.003) loss 2.2477 (1.3726) lr 1.9980e-03 eta 2:04:59
epoch [7/50] batch [120/392] time 0.438 (0.437) data 0.000 (0.003) loss 1.4790 (1.3478) lr 1.9980e-03 eta 2:04:37
epoch [7/50] batch [140/392] time 0.433 (0.436) data 0.000 (0.003) loss 0.6532 (1.2848) lr 1.9980e-03 eta 2:04:20
epoch [7/50] batch [160/392] time 0.436 (0.436) data 0.000 (0.002) loss 0.9838 (1.2491) lr 1.9980e-03 eta 2:04:05
epoch [7/50] batch [180/392] time 0.434 (0.435) data 0.000 (0.002) loss 0.5476 (1.2082) lr 1.9980e-03 eta 2:03:51
epoch [7/50] batch [200/392] time 0.428 (0.435) data 0.000 (0.002) loss 2.8085 (1.2107) lr 1.9980e-03 eta 2:03:41
epoch [7/50] batch [220/392] time 0.437 (0.435) data 0.000 (0.002) loss 1.4340 (1.2051) lr 1.9980e-03 eta 2:03:33
epoch [7/50] batch [240/392] time 0.436 (0.435) data 0.000 (0.002) loss 1.4998 (1.1871) lr 1.9980e-03 eta 2:03:22
epoch [7/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.9574 (1.2031) lr 1.9980e-03 eta 2:03:11
epoch [7/50] batch [280/392] time 0.436 (0.435) data 0.000 (0.001) loss 1.3103 (1.2109) lr 1.9980e-03 eta 2:03:02
epoch [7/50] batch [300/392] time 0.421 (0.435) data 0.000 (0.001) loss 0.6668 (1.2069) lr 1.9980e-03 eta 2:02:49
epoch [7/50] batch [320/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.5209 (1.2089) lr 1.9980e-03 eta 2:02:38
epoch [7/50] batch [340/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.6880 (1.1981) lr 1.9980e-03 eta 2:02:28
epoch [7/50] batch [360/392] time 0.423 (0.434) data 0.000 (0.001) loss 0.6034 (1.1932) lr 1.9980e-03 eta 2:02:16
epoch [7/50] batch [380/392] time 0.425 (0.434) data 0.000 (0.001) loss 0.8497 (1.1893) lr 1.9980e-03 eta 2:02:06
epoch [8/50] batch [20/392] time 0.433 (0.450) data 0.000 (0.017) loss 0.4048 (1.1962) lr 1.9921e-03 eta 2:06:20
epoch [8/50] batch [40/392] time 0.437 (0.441) data 0.000 (0.009) loss 0.8887 (1.1436) lr 1.9921e-03 eta 2:03:42
epoch [8/50] batch [60/392] time 0.429 (0.439) data 0.000 (0.006) loss 1.1143 (1.1549) lr 1.9921e-03 eta 2:02:48
epoch [8/50] batch [80/392] time 0.439 (0.437) data 0.000 (0.004) loss 1.5112 (1.1150) lr 1.9921e-03 eta 2:02:15
epoch [8/50] batch [100/392] time 0.433 (0.436) data 0.000 (0.003) loss 0.7935 (1.1177) lr 1.9921e-03 eta 2:01:53
epoch [8/50] batch [120/392] time 0.433 (0.436) data 0.000 (0.003) loss 2.2868 (1.1319) lr 1.9921e-03 eta 2:01:39
epoch [8/50] batch [140/392] time 0.428 (0.436) data 0.000 (0.003) loss 0.8991 (1.1368) lr 1.9921e-03 eta 2:01:25
epoch [8/50] batch [160/392] time 0.422 (0.436) data 0.000 (0.002) loss 1.1648 (1.1371) lr 1.9921e-03 eta 2:01:12
epoch [8/50] batch [180/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.3228 (1.1122) lr 1.9921e-03 eta 2:00:58
epoch [8/50] batch [200/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.4287 (1.0952) lr 1.9921e-03 eta 2:00:46
epoch [8/50] batch [220/392] time 0.433 (0.435) data 0.000 (0.002) loss 0.9942 (1.1082) lr 1.9921e-03 eta 2:00:35
epoch [8/50] batch [240/392] time 0.423 (0.435) data 0.000 (0.002) loss 0.3242 (1.1050) lr 1.9921e-03 eta 2:00:22
epoch [8/50] batch [260/392] time 0.440 (0.435) data 0.000 (0.001) loss 2.1077 (1.1217) lr 1.9921e-03 eta 2:00:13
epoch [8/50] batch [280/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.4514 (1.1306) lr 1.9921e-03 eta 2:00:03
epoch [8/50] batch [300/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.7228 (1.1429) lr 1.9921e-03 eta 1:59:55
epoch [8/50] batch [320/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.2625 (1.1458) lr 1.9921e-03 eta 1:59:47
epoch [8/50] batch [340/392] time 0.427 (0.435) data 0.000 (0.001) loss 2.0183 (1.1462) lr 1.9921e-03 eta 1:59:37
epoch [8/50] batch [360/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.1954 (1.1534) lr 1.9921e-03 eta 1:59:28
epoch [8/50] batch [380/392] time 0.435 (0.434) data 0.000 (0.001) loss 0.8114 (1.1452) lr 1.9921e-03 eta 1:59:18
epoch [9/50] batch [20/392] time 0.439 (0.453) data 0.000 (0.018) loss 1.7760 (1.3672) lr 1.9823e-03 eta 2:04:12
epoch [9/50] batch [40/392] time 0.424 (0.444) data 0.000 (0.009) loss 0.5604 (1.3207) lr 1.9823e-03 eta 2:01:25
epoch [9/50] batch [60/392] time 0.434 (0.440) data 0.000 (0.006) loss 0.6404 (1.2893) lr 1.9823e-03 eta 2:00:20
epoch [9/50] batch [80/392] time 0.427 (0.439) data 0.000 (0.005) loss 0.4425 (1.1787) lr 1.9823e-03 eta 1:59:45
epoch [9/50] batch [100/392] time 0.434 (0.438) data 0.000 (0.004) loss 0.2085 (1.2022) lr 1.9823e-03 eta 1:59:20
epoch [9/50] batch [120/392] time 0.433 (0.437) data 0.000 (0.003) loss 1.5338 (1.2009) lr 1.9823e-03 eta 1:59:00
epoch [9/50] batch [140/392] time 0.437 (0.436) data 0.000 (0.003) loss 0.5724 (1.1621) lr 1.9823e-03 eta 1:58:44
epoch [9/50] batch [160/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.7250 (1.1686) lr 1.9823e-03 eta 1:58:29
epoch [9/50] batch [180/392] time 0.434 (0.436) data 0.000 (0.002) loss 0.8898 (1.1529) lr 1.9823e-03 eta 1:58:17
epoch [9/50] batch [200/392] time 0.428 (0.436) data 0.000 (0.002) loss 1.4812 (1.1487) lr 1.9823e-03 eta 1:58:04
epoch [9/50] batch [220/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.3786 (1.1162) lr 1.9823e-03 eta 1:57:50
epoch [9/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.4896 (1.1147) lr 1.9823e-03 eta 1:57:40
epoch [9/50] batch [260/392] time 0.434 (0.435) data 0.000 (0.002) loss 2.2541 (1.1223) lr 1.9823e-03 eta 1:57:30
epoch [9/50] batch [280/392] time 0.428 (0.435) data 0.000 (0.001) loss 1.5530 (1.1257) lr 1.9823e-03 eta 1:57:19
epoch [9/50] batch [300/392] time 0.437 (0.435) data 0.000 (0.001) loss 1.0149 (1.1372) lr 1.9823e-03 eta 1:57:07
epoch [9/50] batch [320/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.1088 (1.1317) lr 1.9823e-03 eta 1:56:59
epoch [9/50] batch [340/392] time 0.439 (0.435) data 0.000 (0.001) loss 0.8107 (1.1346) lr 1.9823e-03 eta 1:56:50
epoch [9/50] batch [360/392] time 0.440 (0.435) data 0.000 (0.001) loss 2.2246 (1.1413) lr 1.9823e-03 eta 1:56:42
epoch [9/50] batch [380/392] time 0.422 (0.435) data 0.000 (0.001) loss 1.1773 (1.1479) lr 1.9823e-03 eta 1:56:31
epoch [10/50] batch [20/392] time 0.437 (0.451) data 0.000 (0.017) loss 0.6728 (1.2230) lr 1.9686e-03 eta 2:00:37
epoch [10/50] batch [40/392] time 0.434 (0.443) data 0.000 (0.009) loss 1.4750 (1.1487) lr 1.9686e-03 eta 1:58:15
epoch [10/50] batch [60/392] time 0.433 (0.439) data 0.000 (0.006) loss 2.3240 (1.1673) lr 1.9686e-03 eta 1:57:15
epoch [10/50] batch [80/392] time 0.435 (0.438) data 0.000 (0.004) loss 0.7399 (1.2194) lr 1.9686e-03 eta 1:56:45
epoch [10/50] batch [100/392] time 0.429 (0.437) data 0.000 (0.004) loss 2.0297 (1.2130) lr 1.9686e-03 eta 1:56:25
epoch [10/50] batch [120/392] time 0.428 (0.437) data 0.000 (0.003) loss 1.4384 (1.2315) lr 1.9686e-03 eta 1:56:11
epoch [10/50] batch [140/392] time 0.439 (0.437) data 0.000 (0.003) loss 0.7585 (1.2333) lr 1.9686e-03 eta 1:55:58
epoch [10/50] batch [160/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.8107 (1.1905) lr 1.9686e-03 eta 1:55:44
epoch [10/50] batch [180/392] time 0.429 (0.436) data 0.000 (0.002) loss 0.6846 (1.1696) lr 1.9686e-03 eta 1:55:34
epoch [10/50] batch [200/392] time 0.437 (0.436) data 0.000 (0.002) loss 2.4312 (1.1695) lr 1.9686e-03 eta 1:55:23
epoch [10/50] batch [220/392] time 0.439 (0.436) data 0.000 (0.002) loss 0.3600 (1.1622) lr 1.9686e-03 eta 1:55:10
epoch [10/50] batch [240/392] time 0.433 (0.436) data 0.000 (0.002) loss 1.2635 (1.1488) lr 1.9686e-03 eta 1:54:58
epoch [10/50] batch [260/392] time 0.439 (0.436) data 0.000 (0.002) loss 0.3275 (1.1516) lr 1.9686e-03 eta 1:54:48
epoch [10/50] batch [280/392] time 0.434 (0.436) data 0.000 (0.001) loss 1.2945 (1.1383) lr 1.9686e-03 eta 1:54:38
epoch [10/50] batch [300/392] time 0.439 (0.435) data 0.000 (0.001) loss 0.0640 (1.1327) lr 1.9686e-03 eta 1:54:27
epoch [10/50] batch [320/392] time 0.435 (0.435) data 0.000 (0.001) loss 1.7160 (1.1319) lr 1.9686e-03 eta 1:54:17
epoch [10/50] batch [340/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.3811 (1.1498) lr 1.9686e-03 eta 1:54:08
epoch [10/50] batch [360/392] time 0.428 (0.435) data 0.000 (0.001) loss 0.6987 (1.1413) lr 1.9686e-03 eta 1:53:57
epoch [10/50] batch [380/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.0900 (1.1347) lr 1.9686e-03 eta 1:53:48
epoch [11/50] batch [20/392] time 0.434 (0.452) data 0.000 (0.017) loss 1.6516 (1.0554) lr 1.9511e-03 eta 1:57:53
epoch [11/50] batch [40/392] time 0.439 (0.443) data 0.000 (0.009) loss 0.8887 (1.0363) lr 1.9511e-03 eta 1:55:32
epoch [11/50] batch [60/392] time 0.429 (0.440) data 0.000 (0.006) loss 1.4859 (1.1158) lr 1.9511e-03 eta 1:54:38
epoch [11/50] batch [80/392] time 0.440 (0.439) data 0.000 (0.004) loss 1.7004 (1.1103) lr 1.9511e-03 eta 1:54:04
epoch [11/50] batch [100/392] time 0.439 (0.438) data 0.000 (0.004) loss 0.6672 (1.1040) lr 1.9511e-03 eta 1:53:42
epoch [11/50] batch [120/392] time 0.438 (0.437) data 0.000 (0.003) loss 0.5245 (1.1219) lr 1.9511e-03 eta 1:53:24
epoch [11/50] batch [140/392] time 0.434 (0.437) data 0.000 (0.003) loss 1.2960 (1.1114) lr 1.9511e-03 eta 1:53:08
epoch [11/50] batch [160/392] time 0.428 (0.436) data 0.000 (0.002) loss 1.3731 (1.1114) lr 1.9511e-03 eta 1:52:53
epoch [11/50] batch [180/392] time 0.439 (0.436) data 0.000 (0.002) loss 1.0123 (1.1185) lr 1.9511e-03 eta 1:52:42
epoch [11/50] batch [200/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.2381 (1.1168) lr 1.9511e-03 eta 1:52:31
epoch [11/50] batch [220/392] time 0.428 (0.436) data 0.000 (0.002) loss 0.5232 (1.0870) lr 1.9511e-03 eta 1:52:20
epoch [11/50] batch [240/392] time 0.440 (0.436) data 0.000 (0.002) loss 2.6056 (1.0743) lr 1.9511e-03 eta 1:52:10
epoch [11/50] batch [260/392] time 0.441 (0.436) data 0.000 (0.001) loss 0.4249 (1.0735) lr 1.9511e-03 eta 1:52:02
epoch [11/50] batch [280/392] time 0.433 (0.436) data 0.000 (0.001) loss 0.5932 (1.0610) lr 1.9511e-03 eta 1:51:51
epoch [11/50] batch [300/392] time 0.427 (0.436) data 0.000 (0.001) loss 2.3183 (1.0824) lr 1.9511e-03 eta 1:51:41
epoch [11/50] batch [320/392] time 0.423 (0.436) data 0.000 (0.001) loss 1.4081 (1.0745) lr 1.9511e-03 eta 1:51:29
epoch [11/50] batch [340/392] time 0.438 (0.435) data 0.000 (0.001) loss 2.7647 (1.0912) lr 1.9511e-03 eta 1:51:19
epoch [11/50] batch [360/392] time 0.433 (0.435) data 0.000 (0.001) loss 0.2232 (1.0896) lr 1.9511e-03 eta 1:51:08
epoch [11/50] batch [380/392] time 0.431 (0.435) data 0.000 (0.001) loss 0.9044 (1.0988) lr 1.9511e-03 eta 1:50:57
epoch [12/50] batch [20/392] time 0.437 (0.451) data 0.000 (0.018) loss 1.3221 (1.0451) lr 1.9298e-03 eta 1:54:44
epoch [12/50] batch [40/392] time 0.427 (0.441) data 0.000 (0.009) loss 1.2849 (1.0367) lr 1.9298e-03 eta 1:52:06
epoch [12/50] batch [60/392] time 0.438 (0.439) data 0.000 (0.006) loss 0.9180 (0.9778) lr 1.9298e-03 eta 1:51:18
epoch [12/50] batch [80/392] time 0.437 (0.437) data 0.000 (0.005) loss 0.4275 (0.9360) lr 1.9298e-03 eta 1:50:49
epoch [12/50] batch [100/392] time 0.437 (0.436) data 0.000 (0.004) loss 1.2934 (1.0095) lr 1.9298e-03 eta 1:50:29
epoch [12/50] batch [120/392] time 0.437 (0.436) data 0.000 (0.003) loss 0.6551 (1.0203) lr 1.9298e-03 eta 1:50:09
epoch [12/50] batch [140/392] time 0.437 (0.435) data 0.000 (0.003) loss 1.6310 (1.0505) lr 1.9298e-03 eta 1:49:56
epoch [12/50] batch [160/392] time 0.426 (0.435) data 0.000 (0.002) loss 0.5168 (1.0461) lr 1.9298e-03 eta 1:49:45
epoch [12/50] batch [180/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.6273 (1.0492) lr 1.9298e-03 eta 1:49:33
epoch [12/50] batch [200/392] time 0.423 (0.435) data 0.000 (0.002) loss 1.0183 (1.0620) lr 1.9298e-03 eta 1:49:20
epoch [12/50] batch [220/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.4668 (1.0586) lr 1.9298e-03 eta 1:49:08
epoch [12/50] batch [240/392] time 0.434 (0.434) data 0.000 (0.002) loss 1.0268 (1.0549) lr 1.9298e-03 eta 1:48:57
epoch [12/50] batch [260/392] time 0.432 (0.434) data 0.000 (0.002) loss 0.8474 (1.0447) lr 1.9298e-03 eta 1:48:47
epoch [12/50] batch [280/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.9187 (1.0724) lr 1.9298e-03 eta 1:48:38
epoch [12/50] batch [300/392] time 0.428 (0.434) data 0.000 (0.001) loss 0.4440 (1.0713) lr 1.9298e-03 eta 1:48:28
epoch [12/50] batch [320/392] time 0.427 (0.434) data 0.000 (0.001) loss 1.4221 (1.0731) lr 1.9298e-03 eta 1:48:19
epoch [12/50] batch [340/392] time 0.434 (0.434) data 0.000 (0.001) loss 0.6093 (1.0865) lr 1.9298e-03 eta 1:48:10
epoch [12/50] batch [360/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.2763 (1.0891) lr 1.9298e-03 eta 1:48:01
epoch [12/50] batch [380/392] time 0.432 (0.434) data 0.000 (0.001) loss 1.9822 (1.1006) lr 1.9298e-03 eta 1:47:50
epoch [13/50] batch [20/392] time 0.433 (0.452) data 0.000 (0.018) loss 0.8832 (1.1252) lr 1.9048e-03 eta 1:52:02
epoch [13/50] batch [40/392] time 0.433 (0.443) data 0.000 (0.009) loss 1.3494 (1.1930) lr 1.9048e-03 eta 1:49:34
epoch [13/50] batch [60/392] time 0.429 (0.440) data 0.000 (0.006) loss 0.8216 (1.2466) lr 1.9048e-03 eta 1:48:41
epoch [13/50] batch [80/392] time 0.434 (0.438) data 0.000 (0.005) loss 1.2844 (1.2072) lr 1.9048e-03 eta 1:48:08
epoch [13/50] batch [100/392] time 0.436 (0.437) data 0.000 (0.004) loss 0.4906 (1.1822) lr 1.9048e-03 eta 1:47:50
epoch [13/50] batch [120/392] time 0.437 (0.437) data 0.000 (0.003) loss 1.3186 (1.1548) lr 1.9048e-03 eta 1:47:33
epoch [13/50] batch [140/392] time 0.439 (0.436) data 0.000 (0.003) loss 1.6004 (1.1274) lr 1.9048e-03 eta 1:47:18
epoch [13/50] batch [160/392] time 0.433 (0.436) data 0.000 (0.002) loss 1.0619 (1.1400) lr 1.9048e-03 eta 1:47:06
epoch [13/50] batch [180/392] time 0.428 (0.436) data 0.000 (0.002) loss 1.8573 (1.1208) lr 1.9048e-03 eta 1:46:56
epoch [13/50] batch [200/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.3142 (1.0974) lr 1.9048e-03 eta 1:46:45
epoch [13/50] batch [220/392] time 0.435 (0.436) data 0.000 (0.002) loss 1.4760 (1.0875) lr 1.9048e-03 eta 1:46:34
epoch [13/50] batch [240/392] time 0.439 (0.436) data 0.000 (0.002) loss 0.0652 (1.0935) lr 1.9048e-03 eta 1:46:24
epoch [13/50] batch [260/392] time 0.425 (0.436) data 0.000 (0.002) loss 0.9671 (1.0923) lr 1.9048e-03 eta 1:46:16
epoch [13/50] batch [280/392] time 0.438 (0.436) data 0.000 (0.001) loss 0.9596 (1.0840) lr 1.9048e-03 eta 1:46:05
epoch [13/50] batch [300/392] time 0.428 (0.435) data 0.000 (0.001) loss 0.0321 (1.0906) lr 1.9048e-03 eta 1:45:54
epoch [13/50] batch [320/392] time 0.434 (0.435) data 0.000 (0.001) loss 1.3523 (1.1136) lr 1.9048e-03 eta 1:45:45
epoch [13/50] batch [340/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.3367 (1.1203) lr 1.9048e-03 eta 1:45:36
epoch [13/50] batch [360/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.6145 (1.1327) lr 1.9048e-03 eta 1:45:25
epoch [13/50] batch [380/392] time 0.432 (0.435) data 0.000 (0.001) loss 0.4447 (1.1289) lr 1.9048e-03 eta 1:45:15
epoch [14/50] batch [20/392] time 0.423 (0.452) data 0.000 (0.017) loss 0.7642 (1.0438) lr 1.8763e-03 eta 1:49:01
epoch [14/50] batch [40/392] time 0.428 (0.443) data 0.000 (0.009) loss 0.9595 (1.0882) lr 1.8763e-03 eta 1:46:46
epoch [14/50] batch [60/392] time 0.434 (0.440) data 0.000 (0.006) loss 1.9301 (1.1956) lr 1.8763e-03 eta 1:46:00
epoch [14/50] batch [80/392] time 0.428 (0.439) data 0.000 (0.004) loss 1.1878 (1.1886) lr 1.8763e-03 eta 1:45:31
epoch [14/50] batch [100/392] time 0.439 (0.438) data 0.000 (0.004) loss 1.7023 (1.1597) lr 1.8763e-03 eta 1:45:09
epoch [14/50] batch [120/392] time 0.429 (0.437) data 0.000 (0.003) loss 1.0114 (1.1580) lr 1.8763e-03 eta 1:44:51
epoch [14/50] batch [140/392] time 0.429 (0.437) data 0.000 (0.003) loss 1.2689 (1.1582) lr 1.8763e-03 eta 1:44:34
epoch [14/50] batch [160/392] time 0.428 (0.437) data 0.000 (0.002) loss 1.3387 (1.1684) lr 1.8763e-03 eta 1:44:22
epoch [14/50] batch [180/392] time 0.428 (0.436) data 0.000 (0.002) loss 1.5190 (1.1474) lr 1.8763e-03 eta 1:44:08
epoch [14/50] batch [200/392] time 0.439 (0.436) data 0.000 (0.002) loss 0.5591 (1.1275) lr 1.8763e-03 eta 1:43:59
epoch [14/50] batch [220/392] time 0.439 (0.436) data 0.000 (0.002) loss 1.0946 (1.1303) lr 1.8763e-03 eta 1:43:49
epoch [14/50] batch [240/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.0360 (1.1073) lr 1.8763e-03 eta 1:43:39
epoch [14/50] batch [260/392] time 0.438 (0.436) data 0.000 (0.001) loss 2.4027 (1.1143) lr 1.8763e-03 eta 1:43:29
epoch [14/50] batch [280/392] time 0.435 (0.436) data 0.000 (0.001) loss 0.4854 (1.1144) lr 1.8763e-03 eta 1:43:19
epoch [14/50] batch [300/392] time 0.438 (0.436) data 0.000 (0.001) loss 0.4968 (1.1062) lr 1.8763e-03 eta 1:43:10
epoch [14/50] batch [320/392] time 0.427 (0.436) data 0.000 (0.001) loss 1.9920 (1.1120) lr 1.8763e-03 eta 1:43:00
epoch [14/50] batch [340/392] time 0.426 (0.436) data 0.000 (0.001) loss 0.2920 (1.0981) lr 1.8763e-03 eta 1:42:50
epoch [14/50] batch [360/392] time 0.426 (0.436) data 0.000 (0.001) loss 0.6202 (1.1012) lr 1.8763e-03 eta 1:42:40
epoch [14/50] batch [380/392] time 0.422 (0.435) data 0.000 (0.001) loss 0.9978 (1.1000) lr 1.8763e-03 eta 1:42:30
epoch [15/50] batch [20/392] time 0.423 (0.451) data 0.000 (0.018) loss 0.4524 (1.0145) lr 1.8443e-03 eta 1:45:57
epoch [15/50] batch [40/392] time 0.426 (0.442) data 0.000 (0.009) loss 1.2955 (1.0007) lr 1.8443e-03 eta 1:43:42
epoch [15/50] batch [60/392] time 0.423 (0.439) data 0.000 (0.006) loss 2.0740 (1.1101) lr 1.8443e-03 eta 1:42:47
epoch [15/50] batch [80/392] time 0.428 (0.438) data 0.000 (0.005) loss 1.1124 (1.1189) lr 1.8443e-03 eta 1:42:22
epoch [15/50] batch [100/392] time 0.438 (0.437) data 0.000 (0.004) loss 1.2202 (1.1443) lr 1.8443e-03 eta 1:42:03
epoch [15/50] batch [120/392] time 0.428 (0.436) data 0.000 (0.003) loss 0.5582 (1.1606) lr 1.8443e-03 eta 1:41:45
epoch [15/50] batch [140/392] time 0.433 (0.436) data 0.000 (0.003) loss 1.6349 (1.1365) lr 1.8443e-03 eta 1:41:31
epoch [15/50] batch [160/392] time 0.432 (0.436) data 0.000 (0.002) loss 1.8161 (1.1341) lr 1.8443e-03 eta 1:41:19
epoch [15/50] batch [180/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.6684 (1.1006) lr 1.8443e-03 eta 1:41:04
epoch [15/50] batch [200/392] time 0.437 (0.435) data 0.000 (0.002) loss 2.1639 (1.1134) lr 1.8443e-03 eta 1:40:53
epoch [15/50] batch [220/392] time 0.429 (0.435) data 0.000 (0.002) loss 1.7183 (1.1310) lr 1.8443e-03 eta 1:40:43
epoch [15/50] batch [240/392] time 0.437 (0.435) data 0.000 (0.002) loss 1.6705 (1.1336) lr 1.8443e-03 eta 1:40:32
epoch [15/50] batch [260/392] time 0.422 (0.435) data 0.000 (0.002) loss 1.1270 (1.1088) lr 1.8443e-03 eta 1:40:19
epoch [15/50] batch [280/392] time 0.433 (0.435) data 0.000 (0.001) loss 0.5691 (1.1206) lr 1.8443e-03 eta 1:40:10
epoch [15/50] batch [300/392] time 0.438 (0.434) data 0.000 (0.001) loss 0.5048 (1.1040) lr 1.8443e-03 eta 1:40:00
epoch [15/50] batch [320/392] time 0.426 (0.434) data 0.000 (0.001) loss 0.9966 (1.1152) lr 1.8443e-03 eta 1:39:50
epoch [15/50] batch [340/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.4667 (1.1215) lr 1.8443e-03 eta 1:39:40
epoch [15/50] batch [360/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.4397 (1.1182) lr 1.8443e-03 eta 1:39:30
epoch [15/50] batch [380/392] time 0.432 (0.434) data 0.000 (0.001) loss 2.5610 (1.1100) lr 1.8443e-03 eta 1:39:21
epoch [16/50] batch [20/392] time 0.423 (0.451) data 0.000 (0.017) loss 0.8014 (1.0876) lr 1.8090e-03 eta 1:42:56
epoch [16/50] batch [40/392] time 0.433 (0.442) data 0.000 (0.009) loss 0.9296 (1.1140) lr 1.8090e-03 eta 1:40:41
epoch [16/50] batch [60/392] time 0.437 (0.438) data 0.000 (0.006) loss 0.4159 (1.0339) lr 1.8090e-03 eta 1:39:48
epoch [16/50] batch [80/392] time 0.421 (0.437) data 0.000 (0.004) loss 2.9588 (1.1150) lr 1.8090e-03 eta 1:39:19
epoch [16/50] batch [100/392] time 0.433 (0.436) data 0.000 (0.004) loss 0.5275 (1.0785) lr 1.8090e-03 eta 1:38:56
epoch [16/50] batch [120/392] time 0.438 (0.435) data 0.000 (0.003) loss 1.4454 (1.0802) lr 1.8090e-03 eta 1:38:41
epoch [16/50] batch [140/392] time 0.433 (0.435) data 0.000 (0.003) loss 1.2920 (1.0698) lr 1.8090e-03 eta 1:38:29
epoch [16/50] batch [160/392] time 0.432 (0.435) data 0.000 (0.002) loss 0.5848 (1.0403) lr 1.8090e-03 eta 1:38:18
epoch [16/50] batch [180/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.5095 (1.0308) lr 1.8090e-03 eta 1:38:08
epoch [16/50] batch [200/392] time 0.437 (0.435) data 0.000 (0.002) loss 1.6598 (1.0506) lr 1.8090e-03 eta 1:37:57
epoch [16/50] batch [220/392] time 0.432 (0.435) data 0.000 (0.002) loss 1.5084 (1.0475) lr 1.8090e-03 eta 1:37:46
epoch [16/50] batch [240/392] time 0.432 (0.434) data 0.000 (0.002) loss 0.3325 (1.0578) lr 1.8090e-03 eta 1:37:36
epoch [16/50] batch [260/392] time 0.438 (0.434) data 0.000 (0.001) loss 0.8198 (1.0559) lr 1.8090e-03 eta 1:37:27
epoch [16/50] batch [280/392] time 0.428 (0.434) data 0.000 (0.001) loss 0.3327 (1.0491) lr 1.8090e-03 eta 1:37:17
epoch [16/50] batch [300/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.4048 (1.0492) lr 1.8090e-03 eta 1:37:07
epoch [16/50] batch [320/392] time 0.436 (0.434) data 0.000 (0.001) loss 0.1728 (1.0578) lr 1.8090e-03 eta 1:36:57
epoch [16/50] batch [340/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.9250 (1.0635) lr 1.8090e-03 eta 1:36:47
epoch [16/50] batch [360/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.4256 (1.0441) lr 1.8090e-03 eta 1:36:38
epoch [16/50] batch [380/392] time 0.425 (0.434) data 0.000 (0.001) loss 0.8159 (1.0420) lr 1.8090e-03 eta 1:36:28
epoch [17/50] batch [20/392] time 0.438 (0.453) data 0.000 (0.018) loss 0.9738 (1.1222) lr 1.7705e-03 eta 1:40:27
epoch [17/50] batch [40/392] time 0.433 (0.443) data 0.000 (0.009) loss 1.9307 (1.0453) lr 1.7705e-03 eta 1:38:09
epoch [17/50] batch [60/392] time 0.428 (0.440) data 0.000 (0.006) loss 1.1680 (1.1221) lr 1.7705e-03 eta 1:37:17
epoch [17/50] batch [80/392] time 0.440 (0.439) data 0.000 (0.005) loss 1.8446 (1.1369) lr 1.7705e-03 eta 1:36:50
epoch [17/50] batch [100/392] time 0.437 (0.438) data 0.000 (0.004) loss 0.6205 (1.1239) lr 1.7705e-03 eta 1:36:31
epoch [17/50] batch [120/392] time 0.437 (0.437) data 0.000 (0.003) loss 0.9299 (1.1414) lr 1.7705e-03 eta 1:36:14
epoch [17/50] batch [140/392] time 0.439 (0.437) data 0.000 (0.003) loss 3.9389 (1.1547) lr 1.7705e-03 eta 1:36:01
epoch [17/50] batch [160/392] time 0.434 (0.437) data 0.000 (0.002) loss 1.3775 (1.1556) lr 1.7705e-03 eta 1:35:48
epoch [17/50] batch [180/392] time 0.437 (0.436) data 0.000 (0.002) loss 1.1053 (1.1620) lr 1.7705e-03 eta 1:35:35
epoch [17/50] batch [200/392] time 0.437 (0.436) data 0.000 (0.002) loss 0.6421 (1.1626) lr 1.7705e-03 eta 1:35:23
epoch [17/50] batch [220/392] time 0.428 (0.436) data 0.000 (0.002) loss 0.5440 (1.1739) lr 1.7705e-03 eta 1:35:10
epoch [17/50] batch [240/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.7131 (1.1723) lr 1.7705e-03 eta 1:34:59
epoch [17/50] batch [260/392] time 0.428 (0.435) data 0.000 (0.002) loss 2.3326 (1.1564) lr 1.7705e-03 eta 1:34:48
epoch [17/50] batch [280/392] time 0.439 (0.435) data 0.000 (0.001) loss 0.8369 (1.1668) lr 1.7705e-03 eta 1:34:37
epoch [17/50] batch [300/392] time 0.433 (0.435) data 0.000 (0.001) loss 0.7790 (1.1682) lr 1.7705e-03 eta 1:34:27
epoch [17/50] batch [320/392] time 0.428 (0.435) data 0.000 (0.001) loss 1.9733 (1.1721) lr 1.7705e-03 eta 1:34:18
epoch [17/50] batch [340/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.5008 (1.1684) lr 1.7705e-03 eta 1:34:09
epoch [17/50] batch [360/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.6611 (1.1630) lr 1.7705e-03 eta 1:33:59
epoch [17/50] batch [380/392] time 0.426 (0.435) data 0.000 (0.001) loss 2.1625 (1.1645) lr 1.7705e-03 eta 1:33:49
epoch [18/50] batch [20/392] time 0.433 (0.452) data 0.000 (0.019) loss 1.7540 (1.2121) lr 1.7290e-03 eta 1:37:12
epoch [18/50] batch [40/392] time 0.428 (0.443) data 0.000 (0.009) loss 1.2591 (1.1618) lr 1.7290e-03 eta 1:35:10
epoch [18/50] batch [60/392] time 0.437 (0.440) data 0.000 (0.006) loss 0.5959 (1.1702) lr 1.7290e-03 eta 1:34:25
epoch [18/50] batch [80/392] time 0.438 (0.438) data 0.000 (0.005) loss 0.7238 (1.0976) lr 1.7290e-03 eta 1:33:55
epoch [18/50] batch [100/392] time 0.427 (0.437) data 0.000 (0.004) loss 0.8456 (1.0884) lr 1.7290e-03 eta 1:33:35
epoch [18/50] batch [120/392] time 0.435 (0.437) data 0.000 (0.003) loss 1.4710 (1.0822) lr 1.7290e-03 eta 1:33:20
epoch [18/50] batch [140/392] time 0.437 (0.437) data 0.000 (0.003) loss 0.8704 (1.0818) lr 1.7290e-03 eta 1:33:08
epoch [18/50] batch [160/392] time 0.439 (0.436) data 0.000 (0.002) loss 1.3862 (1.0926) lr 1.7290e-03 eta 1:32:55
epoch [18/50] batch [180/392] time 0.433 (0.436) data 0.000 (0.002) loss 1.6094 (1.0789) lr 1.7290e-03 eta 1:32:43
epoch [18/50] batch [200/392] time 0.427 (0.436) data 0.000 (0.002) loss 1.5718 (1.0771) lr 1.7290e-03 eta 1:32:31
epoch [18/50] batch [220/392] time 0.424 (0.436) data 0.000 (0.002) loss 1.5780 (1.0930) lr 1.7290e-03 eta 1:32:20
epoch [18/50] batch [240/392] time 0.438 (0.436) data 0.000 (0.002) loss 1.8304 (1.0781) lr 1.7290e-03 eta 1:32:10
epoch [18/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.4517 (1.0640) lr 1.7290e-03 eta 1:32:00
epoch [18/50] batch [280/392] time 0.426 (0.435) data 0.000 (0.001) loss 0.7716 (1.0724) lr 1.7290e-03 eta 1:31:47
epoch [18/50] batch [300/392] time 0.431 (0.435) data 0.000 (0.001) loss 3.0887 (1.0776) lr 1.7290e-03 eta 1:31:37
epoch [18/50] batch [320/392] time 0.423 (0.435) data 0.000 (0.001) loss 1.0014 (1.0754) lr 1.7290e-03 eta 1:31:28
epoch [18/50] batch [340/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.0109 (1.0673) lr 1.7290e-03 eta 1:31:18
epoch [18/50] batch [360/392] time 0.423 (0.435) data 0.000 (0.001) loss 2.6284 (1.0615) lr 1.7290e-03 eta 1:31:08
epoch [18/50] batch [380/392] time 0.425 (0.435) data 0.000 (0.001) loss 1.4605 (1.0620) lr 1.7290e-03 eta 1:30:57
epoch [19/50] batch [20/392] time 0.433 (0.453) data 0.000 (0.019) loss 0.8060 (0.8082) lr 1.6845e-03 eta 1:34:36
epoch [19/50] batch [40/392] time 0.430 (0.443) data 0.000 (0.010) loss 1.2008 (1.0407) lr 1.6845e-03 eta 1:32:24
epoch [19/50] batch [60/392] time 0.438 (0.441) data 0.000 (0.006) loss 0.3004 (0.9986) lr 1.6845e-03 eta 1:31:40
epoch [19/50] batch [80/392] time 0.438 (0.439) data 0.000 (0.005) loss 3.1922 (0.9497) lr 1.6845e-03 eta 1:31:09
epoch [19/50] batch [100/392] time 0.428 (0.438) data 0.000 (0.004) loss 0.8488 (0.9567) lr 1.6845e-03 eta 1:30:45
epoch [19/50] batch [120/392] time 0.427 (0.437) data 0.000 (0.003) loss 1.9414 (0.9464) lr 1.6845e-03 eta 1:30:25
epoch [19/50] batch [140/392] time 0.438 (0.436) data 0.000 (0.003) loss 0.2991 (0.9706) lr 1.6845e-03 eta 1:30:06
epoch [19/50] batch [160/392] time 0.438 (0.436) data 0.000 (0.003) loss 2.1262 (0.9915) lr 1.6845e-03 eta 1:29:55
epoch [19/50] batch [180/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.7643 (0.9876) lr 1.6845e-03 eta 1:29:42
epoch [19/50] batch [200/392] time 0.434 (0.435) data 0.000 (0.002) loss 1.4245 (0.9945) lr 1.6845e-03 eta 1:29:31
epoch [19/50] batch [220/392] time 0.427 (0.435) data 0.000 (0.002) loss 0.8313 (1.0181) lr 1.6845e-03 eta 1:29:21
epoch [19/50] batch [240/392] time 0.424 (0.435) data 0.000 (0.002) loss 0.6607 (1.0260) lr 1.6845e-03 eta 1:29:13
epoch [19/50] batch [260/392] time 0.432 (0.435) data 0.000 (0.002) loss 1.7878 (1.0309) lr 1.6845e-03 eta 1:29:02
epoch [19/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.7448 (1.0160) lr 1.6845e-03 eta 1:28:53
epoch [19/50] batch [300/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.3649 (1.0143) lr 1.6845e-03 eta 1:28:43
epoch [19/50] batch [320/392] time 0.428 (0.435) data 0.000 (0.001) loss 0.6218 (1.0269) lr 1.6845e-03 eta 1:28:33
epoch [19/50] batch [340/392] time 0.437 (0.435) data 0.000 (0.001) loss 1.6045 (1.0424) lr 1.6845e-03 eta 1:28:24
epoch [19/50] batch [360/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.7048 (1.0384) lr 1.6845e-03 eta 1:28:13
epoch [19/50] batch [380/392] time 0.432 (0.434) data 0.000 (0.001) loss 1.8671 (1.0429) lr 1.6845e-03 eta 1:28:04
epoch [20/50] batch [20/392] time 0.428 (0.450) data 0.000 (0.017) loss 1.4155 (0.9730) lr 1.6374e-03 eta 1:31:04
epoch [20/50] batch [40/392] time 0.437 (0.442) data 0.000 (0.009) loss 0.7429 (1.1295) lr 1.6374e-03 eta 1:29:19
epoch [20/50] batch [60/392] time 0.437 (0.439) data 0.000 (0.006) loss 0.4912 (1.0104) lr 1.6374e-03 eta 1:28:31
epoch [20/50] batch [80/392] time 0.429 (0.437) data 0.000 (0.004) loss 1.0538 (1.0434) lr 1.6374e-03 eta 1:27:59
epoch [20/50] batch [100/392] time 0.435 (0.437) data 0.000 (0.004) loss 0.6721 (1.0700) lr 1.6374e-03 eta 1:27:42
epoch [20/50] batch [120/392] time 0.429 (0.436) data 0.000 (0.003) loss 1.1878 (1.0582) lr 1.6374e-03 eta 1:27:25
epoch [20/50] batch [140/392] time 0.433 (0.436) data 0.000 (0.003) loss 1.0068 (1.0639) lr 1.6374e-03 eta 1:27:11
epoch [20/50] batch [160/392] time 0.440 (0.435) data 0.000 (0.002) loss 1.3988 (1.0681) lr 1.6374e-03 eta 1:27:01
epoch [20/50] batch [180/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.6541 (1.0577) lr 1.6374e-03 eta 1:26:51
epoch [20/50] batch [200/392] time 0.428 (0.435) data 0.000 (0.002) loss 2.2773 (1.0400) lr 1.6374e-03 eta 1:26:39
epoch [20/50] batch [220/392] time 0.435 (0.435) data 0.000 (0.002) loss 1.1042 (1.0470) lr 1.6374e-03 eta 1:26:30
epoch [20/50] batch [240/392] time 0.434 (0.435) data 0.000 (0.002) loss 1.9924 (1.0688) lr 1.6374e-03 eta 1:26:21
epoch [20/50] batch [260/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.4322 (1.0568) lr 1.6374e-03 eta 1:26:11
epoch [20/50] batch [280/392] time 0.428 (0.435) data 0.000 (0.001) loss 0.8068 (1.0581) lr 1.6374e-03 eta 1:26:03
epoch [20/50] batch [300/392] time 0.428 (0.435) data 0.000 (0.001) loss 1.0808 (1.0537) lr 1.6374e-03 eta 1:25:53
epoch [20/50] batch [320/392] time 0.434 (0.435) data 0.000 (0.001) loss 1.2880 (1.0524) lr 1.6374e-03 eta 1:25:44
epoch [20/50] batch [340/392] time 0.428 (0.435) data 0.000 (0.001) loss 2.0933 (1.0455) lr 1.6374e-03 eta 1:25:35
epoch [20/50] batch [360/392] time 0.438 (0.435) data 0.000 (0.001) loss 2.2127 (1.0480) lr 1.6374e-03 eta 1:25:25
epoch [20/50] batch [380/392] time 0.432 (0.435) data 0.000 (0.001) loss 0.6243 (1.0502) lr 1.6374e-03 eta 1:25:16
epoch [21/50] batch [20/392] time 0.438 (0.454) data 0.000 (0.019) loss 0.7325 (1.0359) lr 1.5878e-03 eta 1:28:49
epoch [21/50] batch [40/392] time 0.427 (0.444) data 0.000 (0.009) loss 0.4370 (1.0734) lr 1.5878e-03 eta 1:26:40
epoch [21/50] batch [60/392] time 0.434 (0.441) data 0.000 (0.006) loss 1.2962 (1.0651) lr 1.5878e-03 eta 1:25:56
epoch [21/50] batch [80/392] time 0.439 (0.439) data 0.000 (0.005) loss 1.9030 (1.0395) lr 1.5878e-03 eta 1:25:32
epoch [21/50] batch [100/392] time 0.429 (0.438) data 0.000 (0.004) loss 0.0498 (1.0292) lr 1.5878e-03 eta 1:25:11
epoch [21/50] batch [120/392] time 0.433 (0.438) data 0.000 (0.003) loss 1.6611 (1.0356) lr 1.5878e-03 eta 1:24:54
epoch [21/50] batch [140/392] time 0.434 (0.437) data 0.000 (0.003) loss 0.6591 (1.0490) lr 1.5878e-03 eta 1:24:41
epoch [21/50] batch [160/392] time 0.437 (0.437) data 0.000 (0.002) loss 0.7124 (1.0388) lr 1.5878e-03 eta 1:24:29
epoch [21/50] batch [180/392] time 0.432 (0.437) data 0.000 (0.002) loss 0.9443 (1.0092) lr 1.5878e-03 eta 1:24:15
epoch [21/50] batch [200/392] time 0.439 (0.436) data 0.000 (0.002) loss 0.8232 (1.0216) lr 1.5878e-03 eta 1:24:05
epoch [21/50] batch [220/392] time 0.439 (0.436) data 0.000 (0.002) loss 1.0777 (1.0025) lr 1.5878e-03 eta 1:23:54
epoch [21/50] batch [240/392] time 0.433 (0.436) data 0.000 (0.002) loss 1.2641 (1.0255) lr 1.5878e-03 eta 1:23:42
epoch [21/50] batch [260/392] time 0.434 (0.436) data 0.000 (0.002) loss 4.2768 (1.0228) lr 1.5878e-03 eta 1:23:33
epoch [21/50] batch [280/392] time 0.443 (0.436) data 0.000 (0.001) loss 0.2533 (1.0247) lr 1.5878e-03 eta 1:23:24
epoch [21/50] batch [300/392] time 0.435 (0.436) data 0.000 (0.001) loss 2.1684 (1.0332) lr 1.5878e-03 eta 1:23:13
epoch [21/50] batch [320/392] time 0.439 (0.436) data 0.000 (0.001) loss 1.8992 (1.0396) lr 1.5878e-03 eta 1:23:05
epoch [21/50] batch [340/392] time 0.435 (0.436) data 0.000 (0.001) loss 0.7423 (1.0383) lr 1.5878e-03 eta 1:22:55
epoch [21/50] batch [360/392] time 0.428 (0.436) data 0.000 (0.001) loss 1.4132 (1.0494) lr 1.5878e-03 eta 1:22:44
epoch [21/50] batch [380/392] time 0.436 (0.435) data 0.000 (0.001) loss 0.9023 (1.0588) lr 1.5878e-03 eta 1:22:35
epoch [22/50] batch [20/392] time 0.439 (0.454) data 0.000 (0.017) loss 0.4299 (0.9855) lr 1.5358e-03 eta 1:25:47
epoch [22/50] batch [40/392] time 0.434 (0.444) data 0.000 (0.009) loss 0.0547 (0.9884) lr 1.5358e-03 eta 1:23:50
epoch [22/50] batch [60/392] time 0.438 (0.441) data 0.000 (0.006) loss 0.6210 (0.9593) lr 1.5358e-03 eta 1:23:08
epoch [22/50] batch [80/392] time 0.439 (0.440) data 0.000 (0.004) loss 1.0073 (1.0730) lr 1.5358e-03 eta 1:22:43
epoch [22/50] batch [100/392] time 0.438 (0.439) data 0.000 (0.004) loss 0.9568 (1.1034) lr 1.5358e-03 eta 1:22:21
epoch [22/50] batch [120/392] time 0.434 (0.438) data 0.000 (0.003) loss 0.7146 (1.1269) lr 1.5358e-03 eta 1:22:04
epoch [22/50] batch [140/392] time 0.434 (0.437) data 0.000 (0.003) loss 0.6744 (1.1295) lr 1.5358e-03 eta 1:21:49
epoch [22/50] batch [160/392] time 0.439 (0.437) data 0.000 (0.002) loss 0.5020 (1.1149) lr 1.5358e-03 eta 1:21:36
epoch [22/50] batch [180/392] time 0.438 (0.437) data 0.000 (0.002) loss 0.1739 (1.0888) lr 1.5358e-03 eta 1:21:23
epoch [22/50] batch [200/392] time 0.428 (0.436) data 0.000 (0.002) loss 2.4268 (1.0862) lr 1.5358e-03 eta 1:21:11
epoch [22/50] batch [220/392] time 0.439 (0.436) data 0.000 (0.002) loss 1.5801 (1.0796) lr 1.5358e-03 eta 1:21:00
epoch [22/50] batch [240/392] time 0.439 (0.436) data 0.000 (0.002) loss 0.7201 (1.0899) lr 1.5358e-03 eta 1:20:51
epoch [22/50] batch [260/392] time 0.439 (0.436) data 0.000 (0.001) loss 1.1726 (1.0890) lr 1.5358e-03 eta 1:20:42
epoch [22/50] batch [280/392] time 0.439 (0.436) data 0.000 (0.001) loss 0.3657 (1.0758) lr 1.5358e-03 eta 1:20:32
epoch [22/50] batch [300/392] time 0.429 (0.436) data 0.000 (0.001) loss 1.8144 (1.0777) lr 1.5358e-03 eta 1:20:23
epoch [22/50] batch [320/392] time 0.439 (0.436) data 0.000 (0.001) loss 0.7065 (1.0979) lr 1.5358e-03 eta 1:20:14
epoch [22/50] batch [340/392] time 0.435 (0.436) data 0.000 (0.001) loss 1.5796 (1.1129) lr 1.5358e-03 eta 1:20:04
epoch [22/50] batch [360/392] time 0.438 (0.436) data 0.000 (0.001) loss 1.0252 (1.1062) lr 1.5358e-03 eta 1:19:55
epoch [22/50] batch [380/392] time 0.429 (0.436) data 0.000 (0.001) loss 0.6498 (1.0953) lr 1.5358e-03 eta 1:19:45
epoch [23/50] batch [20/392] time 0.429 (0.452) data 0.000 (0.018) loss 0.6186 (1.1212) lr 1.4818e-03 eta 1:22:33
epoch [23/50] batch [40/392] time 0.438 (0.443) data 0.000 (0.009) loss 2.5301 (1.1184) lr 1.4818e-03 eta 1:20:44
epoch [23/50] batch [60/392] time 0.440 (0.440) data 0.000 (0.006) loss 1.9964 (1.1514) lr 1.4818e-03 eta 1:20:05
epoch [23/50] batch [80/392] time 0.423 (0.439) data 0.000 (0.005) loss 1.1887 (1.1094) lr 1.4818e-03 eta 1:19:40
epoch [23/50] batch [100/392] time 0.438 (0.438) data 0.000 (0.004) loss 1.2849 (1.1513) lr 1.4818e-03 eta 1:19:21
epoch [23/50] batch [120/392] time 0.437 (0.437) data 0.000 (0.003) loss 0.0164 (1.1374) lr 1.4818e-03 eta 1:19:01
epoch [23/50] batch [140/392] time 0.434 (0.436) data 0.000 (0.003) loss 0.5766 (1.0915) lr 1.4818e-03 eta 1:18:45
epoch [23/50] batch [160/392] time 0.426 (0.436) data 0.000 (0.002) loss 0.5525 (1.0808) lr 1.4818e-03 eta 1:18:35
epoch [23/50] batch [180/392] time 0.434 (0.435) data 0.000 (0.002) loss 0.5829 (1.0622) lr 1.4818e-03 eta 1:18:21
epoch [23/50] batch [200/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.7266 (1.0550) lr 1.4818e-03 eta 1:18:08
epoch [23/50] batch [220/392] time 0.440 (0.435) data 0.000 (0.002) loss 0.5477 (1.0455) lr 1.4818e-03 eta 1:18:00
epoch [23/50] batch [240/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.6302 (1.0709) lr 1.4818e-03 eta 1:17:51
epoch [23/50] batch [260/392] time 0.433 (0.435) data 0.000 (0.001) loss 0.8953 (1.0546) lr 1.4818e-03 eta 1:17:42
epoch [23/50] batch [280/392] time 0.434 (0.435) data 0.000 (0.001) loss 1.8507 (1.0476) lr 1.4818e-03 eta 1:17:34
epoch [23/50] batch [300/392] time 0.434 (0.435) data 0.000 (0.001) loss 0.8619 (1.0526) lr 1.4818e-03 eta 1:17:24
epoch [23/50] batch [320/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.5754 (1.0549) lr 1.4818e-03 eta 1:17:15
epoch [23/50] batch [340/392] time 0.424 (0.435) data 0.000 (0.001) loss 1.1008 (1.0616) lr 1.4818e-03 eta 1:17:05
epoch [23/50] batch [360/392] time 0.438 (0.435) data 0.000 (0.001) loss 2.6603 (1.0725) lr 1.4818e-03 eta 1:16:56
epoch [23/50] batch [380/392] time 0.437 (0.435) data 0.000 (0.001) loss 2.4491 (1.0808) lr 1.4818e-03 eta 1:16:47
epoch [24/50] batch [20/392] time 0.432 (0.452) data 0.000 (0.020) loss 0.3027 (1.1588) lr 1.4258e-03 eta 1:19:30
epoch [24/50] batch [40/392] time 0.429 (0.443) data 0.000 (0.010) loss 2.2059 (1.2751) lr 1.4258e-03 eta 1:17:50
epoch [24/50] batch [60/392] time 0.438 (0.440) data 0.000 (0.007) loss 0.6565 (1.2273) lr 1.4258e-03 eta 1:17:07
epoch [24/50] batch [80/392] time 0.440 (0.438) data 0.000 (0.005) loss 0.2714 (1.1894) lr 1.4258e-03 eta 1:16:43
epoch [24/50] batch [100/392] time 0.426 (0.437) data 0.000 (0.004) loss 0.7576 (1.1095) lr 1.4258e-03 eta 1:16:26
epoch [24/50] batch [120/392] time 0.438 (0.437) data 0.000 (0.003) loss 0.2139 (1.0760) lr 1.4258e-03 eta 1:16:12
epoch [24/50] batch [140/392] time 0.440 (0.437) data 0.000 (0.003) loss 0.6324 (1.0336) lr 1.4258e-03 eta 1:16:00
epoch [24/50] batch [160/392] time 0.433 (0.436) data 0.000 (0.003) loss 1.4963 (1.0232) lr 1.4258e-03 eta 1:15:47
epoch [24/50] batch [180/392] time 0.427 (0.436) data 0.000 (0.002) loss 0.3654 (1.0329) lr 1.4258e-03 eta 1:15:35
epoch [24/50] batch [200/392] time 0.424 (0.436) data 0.000 (0.002) loss 1.6606 (1.0327) lr 1.4258e-03 eta 1:15:24
epoch [24/50] batch [220/392] time 0.439 (0.436) data 0.000 (0.002) loss 1.1814 (1.0509) lr 1.4258e-03 eta 1:15:13
epoch [24/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.5432 (1.0363) lr 1.4258e-03 eta 1:15:03
epoch [24/50] batch [260/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.5291 (1.0335) lr 1.4258e-03 eta 1:14:53
epoch [24/50] batch [280/392] time 0.429 (0.435) data 0.000 (0.002) loss 1.0297 (1.0526) lr 1.4258e-03 eta 1:14:43
epoch [24/50] batch [300/392] time 0.440 (0.435) data 0.000 (0.001) loss 0.8277 (1.0474) lr 1.4258e-03 eta 1:14:34
epoch [24/50] batch [320/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.1935 (1.0550) lr 1.4258e-03 eta 1:14:24
epoch [24/50] batch [340/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.2117 (1.0516) lr 1.4258e-03 eta 1:14:15
epoch [24/50] batch [360/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.6765 (1.0545) lr 1.4258e-03 eta 1:14:05
epoch [24/50] batch [380/392] time 0.436 (0.435) data 0.000 (0.001) loss 0.5950 (1.0644) lr 1.4258e-03 eta 1:13:56
epoch [25/50] batch [20/392] time 0.439 (0.452) data 0.000 (0.017) loss 0.4591 (1.1580) lr 1.3681e-03 eta 1:16:40
epoch [25/50] batch [40/392] time 0.428 (0.443) data 0.000 (0.009) loss 1.0243 (1.0505) lr 1.3681e-03 eta 1:15:00
epoch [25/50] batch [60/392] time 0.439 (0.440) data 0.000 (0.006) loss 1.8091 (1.0708) lr 1.3681e-03 eta 1:14:22
epoch [25/50] batch [80/392] time 0.434 (0.439) data 0.000 (0.004) loss 0.0865 (1.0738) lr 1.3681e-03 eta 1:13:58
epoch [25/50] batch [100/392] time 0.439 (0.438) data 0.000 (0.004) loss 0.0881 (1.0487) lr 1.3681e-03 eta 1:13:40
epoch [25/50] batch [120/392] time 0.428 (0.437) data 0.000 (0.003) loss 0.5202 (1.0623) lr 1.3681e-03 eta 1:13:25
epoch [25/50] batch [140/392] time 0.427 (0.437) data 0.000 (0.003) loss 1.5974 (1.0541) lr 1.3681e-03 eta 1:13:11
epoch [25/50] batch [160/392] time 0.438 (0.437) data 0.000 (0.002) loss 0.4466 (1.0261) lr 1.3681e-03 eta 1:13:00
epoch [25/50] batch [180/392] time 0.432 (0.436) data 0.000 (0.002) loss 0.9141 (1.0115) lr 1.3681e-03 eta 1:12:47
epoch [25/50] batch [200/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.7470 (1.0302) lr 1.3681e-03 eta 1:12:35
epoch [25/50] batch [220/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.4005 (1.0339) lr 1.3681e-03 eta 1:12:24
epoch [25/50] batch [240/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.7746 (1.0263) lr 1.3681e-03 eta 1:12:13
epoch [25/50] batch [260/392] time 0.424 (0.435) data 0.000 (0.001) loss 1.9659 (1.0213) lr 1.3681e-03 eta 1:12:02
epoch [25/50] batch [280/392] time 0.435 (0.435) data 0.000 (0.001) loss 0.9354 (1.0185) lr 1.3681e-03 eta 1:11:52
epoch [25/50] batch [300/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.8711 (1.0139) lr 1.3681e-03 eta 1:11:43
epoch [25/50] batch [320/392] time 0.440 (0.435) data 0.000 (0.001) loss 0.8003 (1.0246) lr 1.3681e-03 eta 1:11:33
epoch [25/50] batch [340/392] time 0.422 (0.435) data 0.000 (0.001) loss 1.0093 (1.0226) lr 1.3681e-03 eta 1:11:22
epoch [25/50] batch [360/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.5689 (1.0230) lr 1.3681e-03 eta 1:11:13
epoch [25/50] batch [380/392] time 0.427 (0.435) data 0.000 (0.001) loss 0.5546 (1.0376) lr 1.3681e-03 eta 1:11:04
epoch [26/50] batch [20/392] time 0.434 (0.453) data 0.000 (0.018) loss 1.9036 (1.1083) lr 1.3090e-03 eta 1:13:46
epoch [26/50] batch [40/392] time 0.439 (0.444) data 0.000 (0.009) loss 1.1728 (1.0365) lr 1.3090e-03 eta 1:12:15
epoch [26/50] batch [60/392] time 0.439 (0.441) data 0.000 (0.006) loss 1.3014 (1.0554) lr 1.3090e-03 eta 1:11:32
epoch [26/50] batch [80/392] time 0.433 (0.439) data 0.000 (0.005) loss 0.1631 (1.0995) lr 1.3090e-03 eta 1:11:05
epoch [26/50] batch [100/392] time 0.437 (0.438) data 0.000 (0.004) loss 0.9614 (1.0732) lr 1.3090e-03 eta 1:10:47
epoch [26/50] batch [120/392] time 0.429 (0.437) data 0.000 (0.003) loss 1.5959 (1.0449) lr 1.3090e-03 eta 1:10:30
epoch [26/50] batch [140/392] time 0.438 (0.436) data 0.000 (0.003) loss 0.8411 (1.0383) lr 1.3090e-03 eta 1:10:15
epoch [26/50] batch [160/392] time 0.437 (0.436) data 0.000 (0.002) loss 0.9957 (1.0541) lr 1.3090e-03 eta 1:10:03
epoch [26/50] batch [180/392] time 0.433 (0.436) data 0.000 (0.002) loss 0.4317 (1.0474) lr 1.3090e-03 eta 1:09:51
epoch [26/50] batch [200/392] time 0.437 (0.436) data 0.000 (0.002) loss 0.4964 (1.0507) lr 1.3090e-03 eta 1:09:41
epoch [26/50] batch [220/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.7548 (1.0334) lr 1.3090e-03 eta 1:09:31
epoch [26/50] batch [240/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.5785 (1.0302) lr 1.3090e-03 eta 1:09:20
epoch [26/50] batch [260/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.2363 (1.0274) lr 1.3090e-03 eta 1:09:11
epoch [26/50] batch [280/392] time 0.422 (0.435) data 0.000 (0.001) loss 0.6271 (1.0410) lr 1.3090e-03 eta 1:09:01
epoch [26/50] batch [300/392] time 0.432 (0.435) data 0.000 (0.001) loss 0.2087 (1.0522) lr 1.3090e-03 eta 1:08:52
epoch [26/50] batch [320/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.5506 (1.0585) lr 1.3090e-03 eta 1:08:42
epoch [26/50] batch [340/392] time 0.428 (0.435) data 0.000 (0.001) loss 0.8482 (1.0690) lr 1.3090e-03 eta 1:08:32
epoch [26/50] batch [360/392] time 0.427 (0.435) data 0.000 (0.001) loss 0.5143 (1.0583) lr 1.3090e-03 eta 1:08:23
epoch [26/50] batch [380/392] time 0.436 (0.435) data 0.000 (0.001) loss 0.2058 (1.0521) lr 1.3090e-03 eta 1:08:14
epoch [27/50] batch [20/392] time 0.434 (0.451) data 0.000 (0.017) loss 1.9866 (1.2579) lr 1.2487e-03 eta 1:10:37
epoch [27/50] batch [40/392] time 0.434 (0.443) data 0.000 (0.009) loss 0.8799 (1.2038) lr 1.2487e-03 eta 1:09:13
epoch [27/50] batch [60/392] time 0.434 (0.440) data 0.000 (0.006) loss 1.2940 (1.0974) lr 1.2487e-03 eta 1:08:34
epoch [27/50] batch [80/392] time 0.438 (0.439) data 0.000 (0.005) loss 1.7708 (1.0889) lr 1.2487e-03 eta 1:08:14
epoch [27/50] batch [100/392] time 0.427 (0.438) data 0.000 (0.004) loss 1.0021 (1.0820) lr 1.2487e-03 eta 1:07:58
epoch [27/50] batch [120/392] time 0.434 (0.437) data 0.000 (0.003) loss 0.1149 (1.0546) lr 1.2487e-03 eta 1:07:40
epoch [27/50] batch [140/392] time 0.434 (0.437) data 0.000 (0.003) loss 3.1649 (1.0378) lr 1.2487e-03 eta 1:07:28
epoch [27/50] batch [160/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.6048 (1.0131) lr 1.2487e-03 eta 1:07:15
epoch [27/50] batch [180/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.2575 (1.0071) lr 1.2487e-03 eta 1:07:04
epoch [27/50] batch [200/392] time 0.423 (0.436) data 0.000 (0.002) loss 0.2872 (1.0050) lr 1.2487e-03 eta 1:06:53
epoch [27/50] batch [220/392] time 0.433 (0.436) data 0.000 (0.002) loss 2.0196 (1.0160) lr 1.2487e-03 eta 1:06:41
epoch [27/50] batch [240/392] time 0.427 (0.435) data 0.000 (0.002) loss 2.1774 (1.0437) lr 1.2487e-03 eta 1:06:31
epoch [27/50] batch [260/392] time 0.428 (0.435) data 0.000 (0.001) loss 0.4413 (1.0508) lr 1.2487e-03 eta 1:06:21
epoch [27/50] batch [280/392] time 0.428 (0.435) data 0.000 (0.001) loss 0.4143 (1.0436) lr 1.2487e-03 eta 1:06:12
epoch [27/50] batch [300/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.6351 (1.0297) lr 1.2487e-03 eta 1:06:02
epoch [27/50] batch [320/392] time 0.429 (0.435) data 0.000 (0.001) loss 0.5819 (1.0249) lr 1.2487e-03 eta 1:05:53
epoch [27/50] batch [340/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.2870 (1.0298) lr 1.2487e-03 eta 1:05:44
epoch [27/50] batch [360/392] time 0.439 (0.435) data 0.000 (0.001) loss 0.9608 (1.0218) lr 1.2487e-03 eta 1:05:35
epoch [27/50] batch [380/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.2808 (1.0484) lr 1.2487e-03 eta 1:05:26
epoch [28/50] batch [20/392] time 0.438 (0.452) data 0.000 (0.018) loss 1.2021 (1.1199) lr 1.1874e-03 eta 1:07:42
epoch [28/50] batch [40/392] time 0.438 (0.443) data 0.000 (0.009) loss 0.6117 (1.0946) lr 1.1874e-03 eta 1:06:20
epoch [28/50] batch [60/392] time 0.441 (0.441) data 0.000 (0.006) loss 1.1163 (1.0776) lr 1.1874e-03 eta 1:05:45
epoch [28/50] batch [80/392] time 0.439 (0.439) data 0.000 (0.005) loss 0.8493 (1.0107) lr 1.1874e-03 eta 1:05:22
epoch [28/50] batch [100/392] time 0.433 (0.438) data 0.000 (0.004) loss 1.2004 (1.0128) lr 1.1874e-03 eta 1:05:04
epoch [28/50] batch [120/392] time 0.434 (0.437) data 0.000 (0.003) loss 0.8862 (1.0005) lr 1.1874e-03 eta 1:04:51
epoch [28/50] batch [140/392] time 0.437 (0.437) data 0.000 (0.003) loss 1.4539 (0.9892) lr 1.1874e-03 eta 1:04:38
epoch [28/50] batch [160/392] time 0.428 (0.436) data 0.000 (0.002) loss 0.4258 (0.9993) lr 1.1874e-03 eta 1:04:24
epoch [28/50] batch [180/392] time 0.437 (0.436) data 0.000 (0.002) loss 1.5733 (1.0466) lr 1.1874e-03 eta 1:04:11
epoch [28/50] batch [200/392] time 0.438 (0.436) data 0.000 (0.002) loss 1.2527 (1.0283) lr 1.1874e-03 eta 1:04:01
epoch [28/50] batch [220/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.0732 (1.0306) lr 1.1874e-03 eta 1:03:49
epoch [28/50] batch [240/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.2786 (1.0540) lr 1.1874e-03 eta 1:03:40
epoch [28/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.2890 (1.0509) lr 1.1874e-03 eta 1:03:30
epoch [28/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 2.0382 (1.0591) lr 1.1874e-03 eta 1:03:20
epoch [28/50] batch [300/392] time 0.422 (0.435) data 0.000 (0.001) loss 0.9878 (1.0609) lr 1.1874e-03 eta 1:03:10
epoch [28/50] batch [320/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.6273 (1.0718) lr 1.1874e-03 eta 1:03:02
epoch [28/50] batch [340/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.8443 (1.0848) lr 1.1874e-03 eta 1:02:53
epoch [28/50] batch [360/392] time 0.422 (0.435) data 0.000 (0.001) loss 2.3528 (1.0719) lr 1.1874e-03 eta 1:02:43
epoch [28/50] batch [380/392] time 0.436 (0.435) data 0.000 (0.001) loss 1.1216 (1.0715) lr 1.1874e-03 eta 1:02:34
epoch [29/50] batch [20/392] time 0.438 (0.453) data 0.000 (0.017) loss 0.9013 (0.7900) lr 1.1253e-03 eta 1:04:55
epoch [29/50] batch [40/392] time 0.433 (0.443) data 0.000 (0.009) loss 0.3738 (1.1064) lr 1.1253e-03 eta 1:03:23
epoch [29/50] batch [60/392] time 0.423 (0.440) data 0.000 (0.006) loss 1.8884 (1.0730) lr 1.1253e-03 eta 1:02:46
epoch [29/50] batch [80/392] time 0.439 (0.438) data 0.000 (0.004) loss 1.7237 (1.0475) lr 1.1253e-03 eta 1:02:24
epoch [29/50] batch [100/392] time 0.438 (0.437) data 0.000 (0.004) loss 1.2307 (1.1242) lr 1.1253e-03 eta 1:02:07
epoch [29/50] batch [120/392] time 0.437 (0.437) data 0.000 (0.003) loss 0.7738 (1.0886) lr 1.1253e-03 eta 1:01:52
epoch [29/50] batch [140/392] time 0.440 (0.436) data 0.000 (0.003) loss 0.9587 (1.0585) lr 1.1253e-03 eta 1:01:42
epoch [29/50] batch [160/392] time 0.439 (0.436) data 0.000 (0.002) loss 1.1812 (1.0351) lr 1.1253e-03 eta 1:01:29
epoch [29/50] batch [180/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.5278 (1.0066) lr 1.1253e-03 eta 1:01:19
epoch [29/50] batch [200/392] time 0.427 (0.435) data 0.000 (0.002) loss 0.4345 (1.0036) lr 1.1253e-03 eta 1:01:07
epoch [29/50] batch [220/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.6571 (1.0238) lr 1.1253e-03 eta 1:00:58
epoch [29/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.4378 (1.0196) lr 1.1253e-03 eta 1:00:48
epoch [29/50] batch [260/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.6420 (1.0146) lr 1.1253e-03 eta 1:00:38
epoch [29/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.5876 (1.0122) lr 1.1253e-03 eta 1:00:29
epoch [29/50] batch [300/392] time 0.434 (0.435) data 0.000 (0.001) loss 0.5734 (1.0257) lr 1.1253e-03 eta 1:00:20
epoch [29/50] batch [320/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.9249 (1.0196) lr 1.1253e-03 eta 1:00:10
epoch [29/50] batch [340/392] time 0.424 (0.435) data 0.000 (0.001) loss 1.9637 (1.0291) lr 1.1253e-03 eta 1:00:01
epoch [29/50] batch [360/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.1267 (1.0211) lr 1.1253e-03 eta 0:59:51
epoch [29/50] batch [380/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.8836 (1.0267) lr 1.1253e-03 eta 0:59:42
epoch [30/50] batch [20/392] time 0.423 (0.450) data 0.000 (0.017) loss 1.8153 (1.0072) lr 1.0628e-03 eta 1:01:37
epoch [30/50] batch [40/392] time 0.426 (0.442) data 0.000 (0.009) loss 0.8561 (1.0915) lr 1.0628e-03 eta 1:00:20
epoch [30/50] batch [60/392] time 0.437 (0.439) data 0.000 (0.006) loss 0.5685 (1.0635) lr 1.0628e-03 eta 0:59:48
epoch [30/50] batch [80/392] time 0.437 (0.438) data 0.000 (0.004) loss 0.4301 (1.1071) lr 1.0628e-03 eta 0:59:28
epoch [30/50] batch [100/392] time 0.437 (0.437) data 0.000 (0.004) loss 0.6789 (1.0897) lr 1.0628e-03 eta 0:59:09
epoch [30/50] batch [120/392] time 0.428 (0.436) data 0.000 (0.003) loss 1.3413 (1.0862) lr 1.0628e-03 eta 0:58:55
epoch [30/50] batch [140/392] time 0.437 (0.435) data 0.000 (0.003) loss 0.2213 (1.0648) lr 1.0628e-03 eta 0:58:43
epoch [30/50] batch [160/392] time 0.439 (0.435) data 0.000 (0.002) loss 1.4956 (1.0726) lr 1.0628e-03 eta 0:58:33
epoch [30/50] batch [180/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.1705 (1.0848) lr 1.0628e-03 eta 0:58:22
epoch [30/50] batch [200/392] time 0.438 (0.435) data 0.000 (0.002) loss 2.2000 (1.0697) lr 1.0628e-03 eta 0:58:13
epoch [30/50] batch [220/392] time 0.435 (0.435) data 0.000 (0.002) loss 1.6083 (1.0578) lr 1.0628e-03 eta 0:58:03
epoch [30/50] batch [240/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.6013 (1.0522) lr 1.0628e-03 eta 0:57:54
epoch [30/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.6694 (1.0393) lr 1.0628e-03 eta 0:57:46
epoch [30/50] batch [280/392] time 0.429 (0.435) data 0.000 (0.001) loss 0.6338 (1.0408) lr 1.0628e-03 eta 0:57:36
epoch [30/50] batch [300/392] time 0.436 (0.435) data 0.000 (0.001) loss 1.3648 (1.0441) lr 1.0628e-03 eta 0:57:27
epoch [30/50] batch [320/392] time 0.424 (0.435) data 0.000 (0.001) loss 0.6023 (1.0497) lr 1.0628e-03 eta 0:57:18
epoch [30/50] batch [340/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.6891 (1.0530) lr 1.0628e-03 eta 0:57:10
epoch [30/50] batch [360/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.3453 (1.0525) lr 1.0628e-03 eta 0:57:01
epoch [30/50] batch [380/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.6446 (1.0566) lr 1.0628e-03 eta 0:56:52
epoch [31/50] batch [20/392] time 0.439 (0.451) data 0.000 (0.017) loss 1.4871 (1.0641) lr 1.0000e-03 eta 0:58:46
epoch [31/50] batch [40/392] time 0.438 (0.442) data 0.000 (0.008) loss 0.9965 (0.9620) lr 1.0000e-03 eta 0:57:28
epoch [31/50] batch [60/392] time 0.423 (0.439) data 0.000 (0.006) loss 0.6171 (0.9519) lr 1.0000e-03 eta 0:56:58
epoch [31/50] batch [80/392] time 0.428 (0.438) data 0.000 (0.004) loss 0.9012 (1.0267) lr 1.0000e-03 eta 0:56:38
epoch [31/50] batch [100/392] time 0.435 (0.437) data 0.000 (0.003) loss 1.5762 (1.0201) lr 1.0000e-03 eta 0:56:24
epoch [31/50] batch [120/392] time 0.438 (0.437) data 0.000 (0.003) loss 1.6329 (1.0141) lr 1.0000e-03 eta 0:56:12
epoch [31/50] batch [140/392] time 0.435 (0.436) data 0.000 (0.002) loss 1.1159 (1.0693) lr 1.0000e-03 eta 0:56:00
epoch [31/50] batch [160/392] time 0.428 (0.436) data 0.000 (0.002) loss 1.0264 (1.0804) lr 1.0000e-03 eta 0:55:49
epoch [31/50] batch [180/392] time 0.438 (0.436) data 0.000 (0.002) loss 1.9142 (1.0774) lr 1.0000e-03 eta 0:55:39
epoch [31/50] batch [200/392] time 0.434 (0.436) data 0.000 (0.002) loss 0.2207 (1.0775) lr 1.0000e-03 eta 0:55:29
epoch [31/50] batch [220/392] time 0.428 (0.436) data 0.000 (0.002) loss 3.0506 (1.0761) lr 1.0000e-03 eta 0:55:20
epoch [31/50] batch [240/392] time 0.433 (0.436) data 0.000 (0.002) loss 1.1563 (1.0709) lr 1.0000e-03 eta 0:55:11
epoch [31/50] batch [260/392] time 0.428 (0.436) data 0.000 (0.001) loss 1.9044 (1.0750) lr 1.0000e-03 eta 0:55:01
epoch [31/50] batch [280/392] time 0.428 (0.435) data 0.000 (0.001) loss 0.4258 (1.0597) lr 1.0000e-03 eta 0:54:51
epoch [31/50] batch [300/392] time 0.434 (0.435) data 0.000 (0.001) loss 0.7958 (1.0564) lr 1.0000e-03 eta 0:54:41
epoch [31/50] batch [320/392] time 0.427 (0.435) data 0.000 (0.001) loss 0.8848 (1.0546) lr 1.0000e-03 eta 0:54:32
epoch [31/50] batch [340/392] time 0.438 (0.435) data 0.000 (0.001) loss 2.0132 (1.0591) lr 1.0000e-03 eta 0:54:23
epoch [31/50] batch [360/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.6379 (1.0554) lr 1.0000e-03 eta 0:54:13
epoch [31/50] batch [380/392] time 0.433 (0.435) data 0.000 (0.001) loss 2.7098 (1.0635) lr 1.0000e-03 eta 0:54:04
epoch [32/50] batch [20/392] time 0.439 (0.452) data 0.000 (0.019) loss 2.3617 (0.8164) lr 9.3721e-04 eta 0:55:56
epoch [32/50] batch [40/392] time 0.427 (0.443) data 0.000 (0.009) loss 0.2740 (0.9512) lr 9.3721e-04 eta 0:54:38
epoch [32/50] batch [60/392] time 0.427 (0.439) data 0.000 (0.006) loss 0.2404 (0.9345) lr 9.3721e-04 eta 0:54:06
epoch [32/50] batch [80/392] time 0.424 (0.438) data 0.000 (0.005) loss 0.4891 (0.9002) lr 9.3721e-04 eta 0:53:44
epoch [32/50] batch [100/392] time 0.428 (0.437) data 0.000 (0.004) loss 1.3334 (0.9143) lr 9.3721e-04 eta 0:53:29
epoch [32/50] batch [120/392] time 0.427 (0.436) data 0.000 (0.003) loss 0.4940 (0.9148) lr 9.3721e-04 eta 0:53:16
epoch [32/50] batch [140/392] time 0.437 (0.436) data 0.000 (0.003) loss 0.7571 (0.9439) lr 9.3721e-04 eta 0:53:05
epoch [32/50] batch [160/392] time 0.427 (0.436) data 0.000 (0.002) loss 0.5322 (0.9634) lr 9.3721e-04 eta 0:52:54
epoch [32/50] batch [180/392] time 0.436 (0.435) data 0.000 (0.002) loss 1.4575 (0.9669) lr 9.3721e-04 eta 0:52:43
epoch [32/50] batch [200/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.2775 (0.9790) lr 9.3721e-04 eta 0:52:33
epoch [32/50] batch [220/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.5918 (0.9880) lr 9.3721e-04 eta 0:52:23
epoch [32/50] batch [240/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.8011 (1.0144) lr 9.3721e-04 eta 0:52:13
epoch [32/50] batch [260/392] time 0.436 (0.435) data 0.000 (0.002) loss 1.1757 (1.0152) lr 9.3721e-04 eta 0:52:03
epoch [32/50] batch [280/392] time 0.437 (0.435) data 0.000 (0.001) loss 2.4231 (1.0267) lr 9.3721e-04 eta 0:51:54
epoch [32/50] batch [300/392] time 0.436 (0.434) data 0.000 (0.001) loss 1.2817 (1.0244) lr 9.3721e-04 eta 0:51:45
epoch [32/50] batch [320/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.7148 (1.0261) lr 9.3721e-04 eta 0:51:35
epoch [32/50] batch [340/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.3840 (1.0249) lr 9.3721e-04 eta 0:51:26
epoch [32/50] batch [360/392] time 0.428 (0.434) data 0.000 (0.001) loss 0.8098 (1.0279) lr 9.3721e-04 eta 0:51:17
epoch [32/50] batch [380/392] time 0.432 (0.434) data 0.000 (0.001) loss 0.7619 (1.0272) lr 9.3721e-04 eta 0:51:08
epoch [33/50] batch [20/392] time 0.424 (0.451) data 0.000 (0.017) loss 0.3833 (1.0907) lr 8.7467e-04 eta 0:52:55
epoch [33/50] batch [40/392] time 0.424 (0.443) data 0.000 (0.009) loss 0.4794 (1.1189) lr 8.7467e-04 eta 0:51:44
epoch [33/50] batch [60/392] time 0.426 (0.440) data 0.000 (0.006) loss 1.2013 (1.0380) lr 8.7467e-04 eta 0:51:16
epoch [33/50] batch [80/392] time 0.428 (0.438) data 0.000 (0.004) loss 0.5714 (1.0290) lr 8.7467e-04 eta 0:50:57
epoch [33/50] batch [100/392] time 0.434 (0.437) data 0.000 (0.004) loss 0.6885 (1.0368) lr 8.7467e-04 eta 0:50:43
epoch [33/50] batch [120/392] time 0.427 (0.437) data 0.000 (0.003) loss 0.6091 (1.0241) lr 8.7467e-04 eta 0:50:29
epoch [33/50] batch [140/392] time 0.427 (0.436) data 0.000 (0.003) loss 1.3796 (1.0235) lr 8.7467e-04 eta 0:50:16
epoch [33/50] batch [160/392] time 0.438 (0.436) data 0.000 (0.002) loss 1.9435 (0.9976) lr 8.7467e-04 eta 0:50:05
epoch [33/50] batch [180/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.5472 (1.0052) lr 8.7467e-04 eta 0:49:53
epoch [33/50] batch [200/392] time 0.432 (0.435) data 0.000 (0.002) loss 0.8350 (1.0040) lr 8.7467e-04 eta 0:49:43
epoch [33/50] batch [220/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.8720 (1.0185) lr 8.7467e-04 eta 0:49:32
epoch [33/50] batch [240/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.7919 (1.0145) lr 8.7467e-04 eta 0:49:23
epoch [33/50] batch [260/392] time 0.431 (0.435) data 0.000 (0.001) loss 0.4195 (1.0162) lr 8.7467e-04 eta 0:49:13
epoch [33/50] batch [280/392] time 0.437 (0.435) data 0.000 (0.001) loss 1.6507 (1.0202) lr 8.7467e-04 eta 0:49:04
epoch [33/50] batch [300/392] time 0.428 (0.434) data 0.000 (0.001) loss 0.6980 (1.0268) lr 8.7467e-04 eta 0:48:55
epoch [33/50] batch [320/392] time 0.429 (0.434) data 0.000 (0.001) loss 0.3625 (1.0258) lr 8.7467e-04 eta 0:48:45
epoch [33/50] batch [340/392] time 0.439 (0.434) data 0.000 (0.001) loss 1.2490 (1.0261) lr 8.7467e-04 eta 0:48:36
epoch [33/50] batch [360/392] time 0.438 (0.434) data 0.000 (0.001) loss 0.5192 (1.0125) lr 8.7467e-04 eta 0:48:27
epoch [33/50] batch [380/392] time 0.432 (0.434) data 0.000 (0.001) loss 0.6329 (1.0206) lr 8.7467e-04 eta 0:48:18
epoch [34/50] batch [20/392] time 0.437 (0.451) data 0.000 (0.017) loss 0.3899 (1.2035) lr 8.1262e-04 eta 0:49:57
epoch [34/50] batch [40/392] time 0.423 (0.441) data 0.000 (0.008) loss 2.0902 (1.1732) lr 8.1262e-04 eta 0:48:44
epoch [34/50] batch [60/392] time 0.428 (0.438) data 0.000 (0.006) loss 0.6060 (1.1340) lr 8.1262e-04 eta 0:48:14
epoch [34/50] batch [80/392] time 0.427 (0.437) data 0.000 (0.004) loss 1.4243 (1.1425) lr 8.1262e-04 eta 0:47:58
epoch [34/50] batch [100/392] time 0.434 (0.437) data 0.000 (0.003) loss 1.4527 (1.1124) lr 8.1262e-04 eta 0:47:46
epoch [34/50] batch [120/392] time 0.438 (0.436) data 0.000 (0.003) loss 1.7650 (1.1378) lr 8.1262e-04 eta 0:47:32
epoch [34/50] batch [140/392] time 0.437 (0.436) data 0.000 (0.003) loss 0.5980 (1.0842) lr 8.1262e-04 eta 0:47:22
epoch [34/50] batch [160/392] time 0.429 (0.435) data 0.000 (0.002) loss 1.3652 (1.0758) lr 8.1262e-04 eta 0:47:11
epoch [34/50] batch [180/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.5332 (1.0765) lr 8.1262e-04 eta 0:47:01
epoch [34/50] batch [200/392] time 0.423 (0.435) data 0.000 (0.002) loss 1.3489 (1.0601) lr 8.1262e-04 eta 0:46:52
epoch [34/50] batch [220/392] time 0.425 (0.435) data 0.000 (0.002) loss 1.6577 (1.0833) lr 8.1262e-04 eta 0:46:42
epoch [34/50] batch [240/392] time 0.437 (0.435) data 0.000 (0.002) loss 1.9733 (1.0702) lr 8.1262e-04 eta 0:46:32
epoch [34/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.001) loss 2.0657 (1.0630) lr 8.1262e-04 eta 0:46:23
epoch [34/50] batch [280/392] time 0.428 (0.434) data 0.000 (0.001) loss 0.5813 (1.0575) lr 8.1262e-04 eta 0:46:13
epoch [34/50] batch [300/392] time 0.438 (0.434) data 0.000 (0.001) loss 2.2592 (1.0671) lr 8.1262e-04 eta 0:46:04
epoch [34/50] batch [320/392] time 0.434 (0.434) data 0.000 (0.001) loss 0.6867 (1.0553) lr 8.1262e-04 eta 0:45:55
epoch [34/50] batch [340/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.2650 (1.0600) lr 8.1262e-04 eta 0:45:45
epoch [34/50] batch [360/392] time 0.438 (0.434) data 0.000 (0.001) loss 0.4793 (1.0626) lr 8.1262e-04 eta 0:45:36
epoch [34/50] batch [380/392] time 0.432 (0.434) data 0.000 (0.001) loss 1.5972 (1.0591) lr 8.1262e-04 eta 0:45:27
epoch [35/50] batch [20/392] time 0.437 (0.451) data 0.000 (0.019) loss 0.4016 (1.0193) lr 7.5131e-04 eta 0:47:00
epoch [35/50] batch [40/392] time 0.434 (0.442) data 0.000 (0.009) loss 1.3231 (1.1172) lr 7.5131e-04 eta 0:45:57
epoch [35/50] batch [60/392] time 0.433 (0.440) data 0.000 (0.006) loss 0.1311 (1.1257) lr 7.5131e-04 eta 0:45:31
epoch [35/50] batch [80/392] time 0.437 (0.437) data 0.000 (0.005) loss 0.3168 (1.0893) lr 7.5131e-04 eta 0:45:08
epoch [35/50] batch [100/392] time 0.437 (0.437) data 0.000 (0.004) loss 1.0136 (1.0898) lr 7.5131e-04 eta 0:44:56
epoch [35/50] batch [120/392] time 0.432 (0.436) data 0.000 (0.003) loss 0.9760 (1.0820) lr 7.5131e-04 eta 0:44:44
epoch [35/50] batch [140/392] time 0.422 (0.436) data 0.000 (0.003) loss 1.9567 (1.0812) lr 7.5131e-04 eta 0:44:32
epoch [35/50] batch [160/392] time 0.433 (0.436) data 0.000 (0.002) loss 0.3715 (1.0986) lr 7.5131e-04 eta 0:44:22
epoch [35/50] batch [180/392] time 0.422 (0.435) data 0.000 (0.002) loss 0.4033 (1.0744) lr 7.5131e-04 eta 0:44:10
epoch [35/50] batch [200/392] time 0.433 (0.435) data 0.000 (0.002) loss 0.2968 (1.0863) lr 7.5131e-04 eta 0:44:01
epoch [35/50] batch [220/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.6739 (1.0807) lr 7.5131e-04 eta 0:43:51
epoch [35/50] batch [240/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.4129 (1.0685) lr 7.5131e-04 eta 0:43:42
epoch [35/50] batch [260/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.8261 (1.0705) lr 7.5131e-04 eta 0:43:33
epoch [35/50] batch [280/392] time 0.437 (0.435) data 0.000 (0.001) loss 1.0528 (1.0680) lr 7.5131e-04 eta 0:43:24
epoch [35/50] batch [300/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.1710 (1.0568) lr 7.5131e-04 eta 0:43:15
epoch [35/50] batch [320/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.3316 (1.0552) lr 7.5131e-04 eta 0:43:06
epoch [35/50] batch [340/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.2056 (1.0692) lr 7.5131e-04 eta 0:42:57
epoch [35/50] batch [360/392] time 0.438 (0.434) data 0.000 (0.001) loss 0.9860 (1.0660) lr 7.5131e-04 eta 0:42:48
epoch [35/50] batch [380/392] time 0.432 (0.434) data 0.000 (0.001) loss 0.4196 (1.0703) lr 7.5131e-04 eta 0:42:39
epoch [36/50] batch [20/392] time 0.438 (0.453) data 0.000 (0.019) loss 0.4132 (1.0514) lr 6.9098e-04 eta 0:44:17
epoch [36/50] batch [40/392] time 0.434 (0.443) data 0.000 (0.010) loss 0.4544 (0.9237) lr 6.9098e-04 eta 0:43:07
epoch [36/50] batch [60/392] time 0.426 (0.440) data 0.000 (0.007) loss 0.4826 (0.9245) lr 6.9098e-04 eta 0:42:38
epoch [36/50] batch [80/392] time 0.434 (0.438) data 0.000 (0.005) loss 2.3315 (0.9902) lr 6.9098e-04 eta 0:42:20
epoch [36/50] batch [100/392] time 0.438 (0.437) data 0.000 (0.004) loss 0.9930 (0.9561) lr 6.9098e-04 eta 0:42:06
epoch [36/50] batch [120/392] time 0.438 (0.437) data 0.000 (0.003) loss 2.0992 (0.9532) lr 6.9098e-04 eta 0:41:54
epoch [36/50] batch [140/392] time 0.434 (0.436) data 0.000 (0.003) loss 0.6564 (0.9428) lr 6.9098e-04 eta 0:41:41
epoch [36/50] batch [160/392] time 0.438 (0.436) data 0.000 (0.003) loss 2.0657 (0.9789) lr 6.9098e-04 eta 0:41:31
epoch [36/50] batch [180/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.2320 (0.9605) lr 6.9098e-04 eta 0:41:21
epoch [36/50] batch [200/392] time 0.434 (0.435) data 0.000 (0.002) loss 0.8589 (0.9481) lr 6.9098e-04 eta 0:41:12
epoch [36/50] batch [220/392] time 0.437 (0.435) data 0.000 (0.002) loss 2.0147 (0.9688) lr 6.9098e-04 eta 0:41:02
epoch [36/50] batch [240/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.5490 (0.9737) lr 6.9098e-04 eta 0:40:52
epoch [36/50] batch [260/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.0489 (0.9871) lr 6.9098e-04 eta 0:40:43
epoch [36/50] batch [280/392] time 0.433 (0.435) data 0.000 (0.002) loss 0.5455 (0.9729) lr 6.9098e-04 eta 0:40:34
epoch [36/50] batch [300/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.5780 (0.9849) lr 6.9098e-04 eta 0:40:24
epoch [36/50] batch [320/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.4301 (0.9892) lr 6.9098e-04 eta 0:40:15
epoch [36/50] batch [340/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.4288 (0.9872) lr 6.9098e-04 eta 0:40:06
epoch [36/50] batch [360/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.9153 (0.9897) lr 6.9098e-04 eta 0:39:57
epoch [36/50] batch [380/392] time 0.430 (0.434) data 0.000 (0.001) loss 0.9991 (0.9851) lr 6.9098e-04 eta 0:39:48
epoch [37/50] batch [20/392] time 0.438 (0.452) data 0.000 (0.018) loss 1.3510 (0.9693) lr 6.3188e-04 eta 0:41:10
epoch [37/50] batch [40/392] time 0.427 (0.442) data 0.000 (0.009) loss 0.3975 (0.8371) lr 6.3188e-04 eta 0:40:10
epoch [37/50] batch [60/392] time 0.437 (0.439) data 0.000 (0.006) loss 0.2458 (0.8569) lr 6.3188e-04 eta 0:39:42
epoch [37/50] batch [80/392] time 0.437 (0.437) data 0.000 (0.005) loss 0.8861 (0.9502) lr 6.3188e-04 eta 0:39:23
epoch [37/50] batch [100/392] time 0.432 (0.436) data 0.000 (0.004) loss 0.3983 (0.9212) lr 6.3188e-04 eta 0:39:09
epoch [37/50] batch [120/392] time 0.437 (0.435) data 0.000 (0.003) loss 1.5444 (0.9131) lr 6.3188e-04 eta 0:38:57
epoch [37/50] batch [140/392] time 0.437 (0.435) data 0.000 (0.003) loss 0.5997 (0.9602) lr 6.3188e-04 eta 0:38:47
epoch [37/50] batch [160/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.4580 (1.0072) lr 6.3188e-04 eta 0:38:37
epoch [37/50] batch [180/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.2731 (0.9955) lr 6.3188e-04 eta 0:38:27
epoch [37/50] batch [200/392] time 0.427 (0.435) data 0.000 (0.002) loss 0.6633 (0.9892) lr 6.3188e-04 eta 0:38:18
epoch [37/50] batch [220/392] time 0.436 (0.434) data 0.000 (0.002) loss 1.3410 (0.9775) lr 6.3188e-04 eta 0:38:08
epoch [37/50] batch [240/392] time 0.427 (0.434) data 0.000 (0.002) loss 1.3902 (0.9963) lr 6.3188e-04 eta 0:37:59
epoch [37/50] batch [260/392] time 0.434 (0.434) data 0.000 (0.001) loss 2.2630 (1.0076) lr 6.3188e-04 eta 0:37:50
epoch [37/50] batch [280/392] time 0.437 (0.434) data 0.000 (0.001) loss 2.2876 (1.0007) lr 6.3188e-04 eta 0:37:40
epoch [37/50] batch [300/392] time 0.422 (0.434) data 0.000 (0.001) loss 1.0915 (0.9825) lr 6.3188e-04 eta 0:37:31
epoch [37/50] batch [320/392] time 0.427 (0.434) data 0.000 (0.001) loss 0.4452 (0.9801) lr 6.3188e-04 eta 0:37:22
epoch [37/50] batch [340/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.2430 (0.9822) lr 6.3188e-04 eta 0:37:12
epoch [37/50] batch [360/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.7739 (0.9913) lr 6.3188e-04 eta 0:37:03
epoch [37/50] batch [380/392] time 0.435 (0.434) data 0.000 (0.001) loss 3.1309 (0.9987) lr 6.3188e-04 eta 0:36:54
epoch [38/50] batch [20/392] time 0.423 (0.449) data 0.000 (0.018) loss 2.0645 (0.9773) lr 5.7422e-04 eta 0:37:57
epoch [38/50] batch [40/392] time 0.427 (0.441) data 0.000 (0.009) loss 0.9532 (1.0784) lr 5.7422e-04 eta 0:37:08
epoch [38/50] batch [60/392] time 0.422 (0.437) data 0.000 (0.006) loss 0.6843 (1.0714) lr 5.7422e-04 eta 0:36:43
epoch [38/50] batch [80/392] time 0.438 (0.437) data 0.000 (0.005) loss 0.2444 (1.0490) lr 5.7422e-04 eta 0:36:29
epoch [38/50] batch [100/392] time 0.433 (0.436) data 0.000 (0.004) loss 0.7268 (1.0178) lr 5.7422e-04 eta 0:36:17
epoch [38/50] batch [120/392] time 0.428 (0.435) data 0.000 (0.003) loss 0.8612 (1.0073) lr 5.7422e-04 eta 0:36:05
epoch [38/50] batch [140/392] time 0.438 (0.435) data 0.000 (0.003) loss 0.3743 (0.9943) lr 5.7422e-04 eta 0:35:55
epoch [38/50] batch [160/392] time 0.433 (0.435) data 0.000 (0.002) loss 0.0963 (0.9953) lr 5.7422e-04 eta 0:35:45
epoch [38/50] batch [180/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.8192 (0.9919) lr 5.7422e-04 eta 0:35:36
epoch [38/50] batch [200/392] time 0.437 (0.434) data 0.000 (0.002) loss 1.8357 (0.9952) lr 5.7422e-04 eta 0:35:26
epoch [38/50] batch [220/392] time 0.427 (0.434) data 0.000 (0.002) loss 1.2616 (0.9886) lr 5.7422e-04 eta 0:35:17
epoch [38/50] batch [240/392] time 0.437 (0.434) data 0.000 (0.002) loss 1.2792 (0.9676) lr 5.7422e-04 eta 0:35:08
epoch [38/50] batch [260/392] time 0.432 (0.434) data 0.000 (0.001) loss 0.2528 (0.9687) lr 5.7422e-04 eta 0:34:59
epoch [38/50] batch [280/392] time 0.422 (0.434) data 0.000 (0.001) loss 1.7872 (0.9731) lr 5.7422e-04 eta 0:34:50
epoch [38/50] batch [300/392] time 0.433 (0.434) data 0.000 (0.001) loss 0.6294 (0.9679) lr 5.7422e-04 eta 0:34:41
epoch [38/50] batch [320/392] time 0.437 (0.434) data 0.000 (0.001) loss 3.0921 (0.9653) lr 5.7422e-04 eta 0:34:31
epoch [38/50] batch [340/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.1618 (0.9746) lr 5.7422e-04 eta 0:34:22
epoch [38/50] batch [360/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.2955 (0.9801) lr 5.7422e-04 eta 0:34:13
epoch [38/50] batch [380/392] time 0.431 (0.434) data 0.000 (0.001) loss 1.5711 (0.9921) lr 5.7422e-04 eta 0:34:05
epoch [39/50] batch [20/392] time 0.423 (0.454) data 0.000 (0.019) loss 0.3567 (0.7359) lr 5.1825e-04 eta 0:35:24
epoch [39/50] batch [40/392] time 0.428 (0.443) data 0.000 (0.010) loss 0.8661 (0.8687) lr 5.1825e-04 eta 0:34:25
epoch [39/50] batch [60/392] time 0.439 (0.440) data 0.000 (0.006) loss 1.6024 (0.9380) lr 5.1825e-04 eta 0:34:01
epoch [39/50] batch [80/392] time 0.438 (0.438) data 0.000 (0.005) loss 0.8327 (0.9044) lr 5.1825e-04 eta 0:33:44
epoch [39/50] batch [100/392] time 0.437 (0.437) data 0.000 (0.004) loss 1.4846 (0.9541) lr 5.1825e-04 eta 0:33:33
epoch [39/50] batch [120/392] time 0.437 (0.436) data 0.000 (0.003) loss 0.9926 (0.9582) lr 5.1825e-04 eta 0:33:20
epoch [39/50] batch [140/392] time 0.438 (0.436) data 0.000 (0.003) loss 2.0174 (0.9332) lr 5.1825e-04 eta 0:33:09
epoch [39/50] batch [160/392] time 0.428 (0.436) data 0.000 (0.003) loss 1.2966 (0.9675) lr 5.1825e-04 eta 0:32:59
epoch [39/50] batch [180/392] time 0.438 (0.436) data 0.000 (0.002) loss 1.6753 (1.0166) lr 5.1825e-04 eta 0:32:50
epoch [39/50] batch [200/392] time 0.427 (0.435) data 0.000 (0.002) loss 0.7215 (1.0356) lr 5.1825e-04 eta 0:32:40
epoch [39/50] batch [220/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.4433 (1.0516) lr 5.1825e-04 eta 0:32:30
epoch [39/50] batch [240/392] time 0.433 (0.435) data 0.000 (0.002) loss 0.8800 (1.0585) lr 5.1825e-04 eta 0:32:21
epoch [39/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.002) loss 2.1250 (1.0684) lr 5.1825e-04 eta 0:32:12
epoch [39/50] batch [280/392] time 0.433 (0.435) data 0.000 (0.002) loss 1.3511 (1.0711) lr 5.1825e-04 eta 0:32:02
epoch [39/50] batch [300/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.2774 (1.0957) lr 5.1825e-04 eta 0:31:53
epoch [39/50] batch [320/392] time 0.439 (0.435) data 0.000 (0.001) loss 2.7191 (1.1070) lr 5.1825e-04 eta 0:31:44
epoch [39/50] batch [340/392] time 0.429 (0.434) data 0.000 (0.001) loss 1.0299 (1.1002) lr 5.1825e-04 eta 0:31:36
epoch [39/50] batch [360/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.0268 (1.0864) lr 5.1825e-04 eta 0:31:27
epoch [39/50] batch [380/392] time 0.436 (0.434) data 0.000 (0.001) loss 0.9807 (1.0864) lr 5.1825e-04 eta 0:31:18
epoch [40/50] batch [20/392] time 0.437 (0.451) data 0.000 (0.017) loss 0.4068 (0.9941) lr 4.6417e-04 eta 0:32:16
epoch [40/50] batch [40/392] time 0.427 (0.442) data 0.000 (0.009) loss 0.9252 (1.1245) lr 4.6417e-04 eta 0:31:29
epoch [40/50] batch [60/392] time 0.428 (0.439) data 0.000 (0.006) loss 0.9363 (1.0864) lr 4.6417e-04 eta 0:31:07
epoch [40/50] batch [80/392] time 0.438 (0.438) data 0.000 (0.004) loss 1.2177 (1.0738) lr 4.6417e-04 eta 0:30:53
epoch [40/50] batch [100/392] time 0.435 (0.437) data 0.000 (0.004) loss 0.3204 (1.0681) lr 4.6417e-04 eta 0:30:40
epoch [40/50] batch [120/392] time 0.437 (0.436) data 0.000 (0.003) loss 0.1985 (1.0274) lr 4.6417e-04 eta 0:30:29
epoch [40/50] batch [140/392] time 0.438 (0.436) data 0.000 (0.003) loss 0.2855 (1.0201) lr 4.6417e-04 eta 0:30:19
epoch [40/50] batch [160/392] time 0.428 (0.436) data 0.000 (0.002) loss 2.3145 (1.0209) lr 4.6417e-04 eta 0:30:09
epoch [40/50] batch [180/392] time 0.435 (0.436) data 0.000 (0.002) loss 0.1676 (0.9945) lr 4.6417e-04 eta 0:29:59
epoch [40/50] batch [200/392] time 0.433 (0.435) data 0.000 (0.002) loss 0.5057 (0.9802) lr 4.6417e-04 eta 0:29:50
epoch [40/50] batch [220/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.3740 (0.9927) lr 4.6417e-04 eta 0:29:40
epoch [40/50] batch [240/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.9761 (0.9791) lr 4.6417e-04 eta 0:29:31
epoch [40/50] batch [260/392] time 0.432 (0.435) data 0.000 (0.001) loss 2.0961 (0.9964) lr 4.6417e-04 eta 0:29:21
epoch [40/50] batch [280/392] time 0.427 (0.435) data 0.000 (0.001) loss 1.9319 (1.0045) lr 4.6417e-04 eta 0:29:12
epoch [40/50] batch [300/392] time 0.427 (0.435) data 0.000 (0.001) loss 0.8751 (1.0012) lr 4.6417e-04 eta 0:29:03
epoch [40/50] batch [320/392] time 0.437 (0.435) data 0.000 (0.001) loss 0.8819 (1.0172) lr 4.6417e-04 eta 0:28:54
epoch [40/50] batch [340/392] time 0.433 (0.435) data 0.000 (0.001) loss 2.1975 (1.0210) lr 4.6417e-04 eta 0:28:46
epoch [40/50] batch [360/392] time 0.429 (0.435) data 0.000 (0.001) loss 1.0376 (1.0316) lr 4.6417e-04 eta 0:28:37
epoch [40/50] batch [380/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.5632 (1.0390) lr 4.6417e-04 eta 0:28:28
epoch [41/50] batch [20/392] time 0.438 (0.453) data 0.000 (0.018) loss 0.2932 (0.8755) lr 4.1221e-04 eta 0:29:26
epoch [41/50] batch [40/392] time 0.429 (0.444) data 0.000 (0.009) loss 1.4274 (1.0132) lr 4.1221e-04 eta 0:28:41
epoch [41/50] batch [60/392] time 0.429 (0.441) data 0.000 (0.006) loss 0.4798 (0.9660) lr 4.1221e-04 eta 0:28:22
epoch [41/50] batch [80/392] time 0.439 (0.439) data 0.000 (0.005) loss 2.0771 (0.9593) lr 4.1221e-04 eta 0:28:07
epoch [41/50] batch [100/392] time 0.435 (0.438) data 0.000 (0.004) loss 0.3567 (0.9563) lr 4.1221e-04 eta 0:27:55
epoch [41/50] batch [120/392] time 0.435 (0.438) data 0.000 (0.003) loss 1.7435 (0.9686) lr 4.1221e-04 eta 0:27:43
epoch [41/50] batch [140/392] time 0.434 (0.437) data 0.000 (0.003) loss 0.2592 (0.9770) lr 4.1221e-04 eta 0:27:33
epoch [41/50] batch [160/392] time 0.434 (0.437) data 0.000 (0.002) loss 2.5254 (1.0029) lr 4.1221e-04 eta 0:27:23
epoch [41/50] batch [180/392] time 0.438 (0.437) data 0.000 (0.002) loss 0.8638 (0.9922) lr 4.1221e-04 eta 0:27:13
epoch [41/50] batch [200/392] time 0.437 (0.436) data 0.000 (0.002) loss 1.6824 (0.9983) lr 4.1221e-04 eta 0:27:03
epoch [41/50] batch [220/392] time 0.428 (0.436) data 0.000 (0.002) loss 0.5569 (0.9976) lr 4.1221e-04 eta 0:26:53
epoch [41/50] batch [240/392] time 0.439 (0.436) data 0.000 (0.002) loss 1.4583 (0.9852) lr 4.1221e-04 eta 0:26:44
epoch [41/50] batch [260/392] time 0.439 (0.436) data 0.001 (0.002) loss 0.2909 (0.9870) lr 4.1221e-04 eta 0:26:35
epoch [41/50] batch [280/392] time 0.438 (0.436) data 0.000 (0.001) loss 1.6479 (0.9853) lr 4.1221e-04 eta 0:26:26
epoch [41/50] batch [300/392] time 0.439 (0.436) data 0.000 (0.001) loss 1.0215 (0.9969) lr 4.1221e-04 eta 0:26:17
epoch [41/50] batch [320/392] time 0.438 (0.436) data 0.000 (0.001) loss 0.4348 (0.9894) lr 4.1221e-04 eta 0:26:08
epoch [41/50] batch [340/392] time 0.428 (0.436) data 0.000 (0.001) loss 2.0229 (0.9924) lr 4.1221e-04 eta 0:25:59
epoch [41/50] batch [360/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.1341 (1.0016) lr 4.1221e-04 eta 0:25:49
epoch [41/50] batch [380/392] time 0.436 (0.435) data 0.000 (0.001) loss 0.4088 (1.0129) lr 4.1221e-04 eta 0:25:40
epoch [42/50] batch [20/392] time 0.426 (0.450) data 0.000 (0.018) loss 0.0773 (1.3098) lr 3.6258e-04 eta 0:26:18
epoch [42/50] batch [40/392] time 0.436 (0.442) data 0.000 (0.009) loss 1.2091 (1.2026) lr 3.6258e-04 eta 0:25:40
epoch [42/50] batch [60/392] time 0.432 (0.439) data 0.000 (0.006) loss 1.2153 (1.1863) lr 3.6258e-04 eta 0:25:22
epoch [42/50] batch [80/392] time 0.437 (0.437) data 0.000 (0.005) loss 1.4476 (1.1609) lr 3.6258e-04 eta 0:25:07
epoch [42/50] batch [100/392] time 0.437 (0.436) data 0.000 (0.004) loss 0.1308 (1.1032) lr 3.6258e-04 eta 0:24:56
epoch [42/50] batch [120/392] time 0.428 (0.436) data 0.000 (0.003) loss 0.5436 (1.1162) lr 3.6258e-04 eta 0:24:45
epoch [42/50] batch [140/392] time 0.426 (0.435) data 0.000 (0.003) loss 0.8704 (1.1208) lr 3.6258e-04 eta 0:24:35
epoch [42/50] batch [160/392] time 0.436 (0.435) data 0.000 (0.002) loss 2.0106 (1.1012) lr 3.6258e-04 eta 0:24:24
epoch [42/50] batch [180/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.2849 (1.0774) lr 3.6258e-04 eta 0:24:14
epoch [42/50] batch [200/392] time 0.427 (0.434) data 0.000 (0.002) loss 0.7294 (1.0675) lr 3.6258e-04 eta 0:24:05
epoch [42/50] batch [220/392] time 0.437 (0.434) data 0.000 (0.002) loss 1.3530 (1.0557) lr 3.6258e-04 eta 0:23:55
epoch [42/50] batch [240/392] time 0.437 (0.434) data 0.000 (0.002) loss 0.8757 (1.0455) lr 3.6258e-04 eta 0:23:46
epoch [42/50] batch [260/392] time 0.434 (0.434) data 0.000 (0.002) loss 1.4409 (1.0344) lr 3.6258e-04 eta 0:23:38
epoch [42/50] batch [280/392] time 0.432 (0.434) data 0.000 (0.001) loss 1.1021 (1.0303) lr 3.6258e-04 eta 0:23:28
epoch [42/50] batch [300/392] time 0.435 (0.434) data 0.000 (0.001) loss 0.7026 (1.0233) lr 3.6258e-04 eta 0:23:20
epoch [42/50] batch [320/392] time 0.429 (0.434) data 0.000 (0.001) loss 2.2342 (1.0329) lr 3.6258e-04 eta 0:23:11
epoch [42/50] batch [340/392] time 0.429 (0.434) data 0.000 (0.001) loss 1.3461 (1.0347) lr 3.6258e-04 eta 0:23:02
epoch [42/50] batch [360/392] time 0.432 (0.434) data 0.000 (0.001) loss 1.4610 (1.0399) lr 3.6258e-04 eta 0:22:54
epoch [42/50] batch [380/392] time 0.437 (0.434) data 0.000 (0.001) loss 0.3207 (1.0298) lr 3.6258e-04 eta 0:22:45
epoch [43/50] batch [20/392] time 0.432 (0.450) data 0.000 (0.017) loss 0.9613 (0.6964) lr 3.1545e-04 eta 0:23:23
epoch [43/50] batch [40/392] time 0.428 (0.441) data 0.000 (0.009) loss 1.4929 (0.8198) lr 3.1545e-04 eta 0:22:46
epoch [43/50] batch [60/392] time 0.438 (0.439) data 0.000 (0.006) loss 1.5467 (0.8558) lr 3.1545e-04 eta 0:22:30
epoch [43/50] batch [80/392] time 0.436 (0.437) data 0.000 (0.004) loss 0.3359 (0.8669) lr 3.1545e-04 eta 0:22:16
epoch [43/50] batch [100/392] time 0.437 (0.437) data 0.000 (0.004) loss 0.3661 (0.8979) lr 3.1545e-04 eta 0:22:06
epoch [43/50] batch [120/392] time 0.433 (0.436) data 0.000 (0.003) loss 0.6558 (0.9142) lr 3.1545e-04 eta 0:21:55
epoch [43/50] batch [140/392] time 0.429 (0.436) data 0.000 (0.003) loss 0.2731 (0.9274) lr 3.1545e-04 eta 0:21:46
epoch [43/50] batch [160/392] time 0.435 (0.436) data 0.000 (0.002) loss 0.9785 (0.9394) lr 3.1545e-04 eta 0:21:36
epoch [43/50] batch [180/392] time 0.439 (0.436) data 0.000 (0.002) loss 1.7831 (0.9478) lr 3.1545e-04 eta 0:21:27
epoch [43/50] batch [200/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.5120 (0.9463) lr 3.1545e-04 eta 0:21:18
epoch [43/50] batch [220/392] time 0.439 (0.435) data 0.000 (0.002) loss 1.0380 (0.9283) lr 3.1545e-04 eta 0:21:09
epoch [43/50] batch [240/392] time 0.429 (0.435) data 0.000 (0.002) loss 0.7225 (0.9438) lr 3.1545e-04 eta 0:21:01
epoch [43/50] batch [260/392] time 0.434 (0.435) data 0.000 (0.001) loss 2.1450 (0.9606) lr 3.1545e-04 eta 0:20:52
epoch [43/50] batch [280/392] time 0.440 (0.435) data 0.000 (0.001) loss 0.3826 (0.9423) lr 3.1545e-04 eta 0:20:43
epoch [43/50] batch [300/392] time 0.439 (0.435) data 0.000 (0.001) loss 0.4994 (0.9560) lr 3.1545e-04 eta 0:20:34
epoch [43/50] batch [320/392] time 0.429 (0.435) data 0.000 (0.001) loss 2.0341 (0.9630) lr 3.1545e-04 eta 0:20:25
epoch [43/50] batch [340/392] time 0.433 (0.435) data 0.000 (0.001) loss 0.3377 (0.9652) lr 3.1545e-04 eta 0:20:16
epoch [43/50] batch [360/392] time 0.428 (0.435) data 0.000 (0.001) loss 0.2843 (0.9619) lr 3.1545e-04 eta 0:20:07
epoch [43/50] batch [380/392] time 0.436 (0.435) data 0.000 (0.001) loss 0.5334 (0.9563) lr 3.1545e-04 eta 0:19:58
epoch [44/50] batch [20/392] time 0.433 (0.453) data 0.000 (0.019) loss 0.6312 (1.2446) lr 2.7103e-04 eta 0:20:34
epoch [44/50] batch [40/392] time 0.438 (0.443) data 0.000 (0.009) loss 0.9595 (1.1268) lr 2.7103e-04 eta 0:19:58
epoch [44/50] batch [60/392] time 0.437 (0.440) data 0.000 (0.006) loss 0.3299 (1.0677) lr 2.7103e-04 eta 0:19:40
epoch [44/50] batch [80/392] time 0.425 (0.438) data 0.000 (0.005) loss 0.6057 (1.0606) lr 2.7103e-04 eta 0:19:26
epoch [44/50] batch [100/392] time 0.437 (0.437) data 0.000 (0.004) loss 2.1246 (1.0375) lr 2.7103e-04 eta 0:19:15
epoch [44/50] batch [120/392] time 0.436 (0.436) data 0.000 (0.003) loss 1.0755 (1.0235) lr 2.7103e-04 eta 0:19:04
epoch [44/50] batch [140/392] time 0.438 (0.436) data 0.000 (0.003) loss 1.0203 (1.0138) lr 2.7103e-04 eta 0:18:54
epoch [44/50] batch [160/392] time 0.439 (0.436) data 0.000 (0.002) loss 0.5349 (1.0387) lr 2.7103e-04 eta 0:18:45
epoch [44/50] batch [180/392] time 0.438 (0.436) data 0.000 (0.002) loss 1.8759 (1.0846) lr 2.7103e-04 eta 0:18:36
epoch [44/50] batch [200/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.4651 (1.0772) lr 2.7103e-04 eta 0:18:27
epoch [44/50] batch [220/392] time 0.433 (0.435) data 0.000 (0.002) loss 1.8592 (1.0731) lr 2.7103e-04 eta 0:18:18
epoch [44/50] batch [240/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.3660 (1.0740) lr 2.7103e-04 eta 0:18:09
epoch [44/50] batch [260/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.0551 (1.0510) lr 2.7103e-04 eta 0:18:01
epoch [44/50] batch [280/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.8361 (1.0384) lr 2.7103e-04 eta 0:17:52
epoch [44/50] batch [300/392] time 0.427 (0.435) data 0.000 (0.001) loss 1.2019 (1.0238) lr 2.7103e-04 eta 0:17:43
epoch [44/50] batch [320/392] time 0.439 (0.435) data 0.000 (0.001) loss 0.9934 (1.0244) lr 2.7103e-04 eta 0:17:34
epoch [44/50] batch [340/392] time 0.439 (0.435) data 0.000 (0.001) loss 1.1322 (1.0337) lr 2.7103e-04 eta 0:17:25
epoch [44/50] batch [360/392] time 0.439 (0.435) data 0.000 (0.001) loss 0.1538 (1.0235) lr 2.7103e-04 eta 0:17:16
epoch [44/50] batch [380/392] time 0.432 (0.435) data 0.000 (0.001) loss 1.5790 (1.0271) lr 2.7103e-04 eta 0:17:07
epoch [45/50] batch [20/392] time 0.433 (0.452) data 0.000 (0.017) loss 1.4269 (0.9192) lr 2.2949e-04 eta 0:17:32
epoch [45/50] batch [40/392] time 0.437 (0.442) data 0.000 (0.009) loss 0.8378 (0.8802) lr 2.2949e-04 eta 0:17:03
epoch [45/50] batch [60/392] time 0.433 (0.439) data 0.000 (0.006) loss 0.9402 (0.8747) lr 2.2949e-04 eta 0:16:46
epoch [45/50] batch [80/392] time 0.428 (0.437) data 0.000 (0.004) loss 0.9982 (0.8536) lr 2.2949e-04 eta 0:16:33
epoch [45/50] batch [100/392] time 0.437 (0.437) data 0.000 (0.004) loss 1.3361 (0.8576) lr 2.2949e-04 eta 0:16:23
epoch [45/50] batch [120/392] time 0.434 (0.436) data 0.000 (0.003) loss 0.2167 (0.8886) lr 2.2949e-04 eta 0:16:13
epoch [45/50] batch [140/392] time 0.433 (0.435) data 0.000 (0.003) loss 2.0944 (0.8983) lr 2.2949e-04 eta 0:16:03
epoch [45/50] batch [160/392] time 0.436 (0.435) data 0.000 (0.002) loss 0.7872 (0.9176) lr 2.2949e-04 eta 0:15:54
epoch [45/50] batch [180/392] time 0.427 (0.435) data 0.000 (0.002) loss 0.3084 (0.9267) lr 2.2949e-04 eta 0:15:44
epoch [45/50] batch [200/392] time 0.433 (0.435) data 0.000 (0.002) loss 0.3310 (0.9601) lr 2.2949e-04 eta 0:15:35
epoch [45/50] batch [220/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.1343 (0.9538) lr 2.2949e-04 eta 0:15:26
epoch [45/50] batch [240/392] time 0.427 (0.435) data 0.000 (0.002) loss 0.6013 (0.9570) lr 2.2949e-04 eta 0:15:17
epoch [45/50] batch [260/392] time 0.438 (0.434) data 0.000 (0.001) loss 0.8422 (0.9783) lr 2.2949e-04 eta 0:15:08
epoch [45/50] batch [280/392] time 0.434 (0.434) data 0.000 (0.001) loss 0.3604 (0.9880) lr 2.2949e-04 eta 0:15:00
epoch [45/50] batch [300/392] time 0.433 (0.434) data 0.000 (0.001) loss 0.8870 (1.0088) lr 2.2949e-04 eta 0:14:51
epoch [45/50] batch [320/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.0980 (1.0070) lr 2.2949e-04 eta 0:14:42
epoch [45/50] batch [340/392] time 0.434 (0.434) data 0.000 (0.001) loss 0.5928 (1.0047) lr 2.2949e-04 eta 0:14:34
epoch [45/50] batch [360/392] time 0.434 (0.434) data 0.000 (0.001) loss 0.7939 (1.0145) lr 2.2949e-04 eta 0:14:25
epoch [45/50] batch [380/392] time 0.431 (0.434) data 0.000 (0.001) loss 0.6143 (1.0316) lr 2.2949e-04 eta 0:14:16
epoch [46/50] batch [20/392] time 0.437 (0.452) data 0.000 (0.018) loss 1.1157 (1.2045) lr 1.9098e-04 eta 0:14:36
epoch [46/50] batch [40/392] time 0.437 (0.444) data 0.000 (0.009) loss 1.4545 (1.2011) lr 1.9098e-04 eta 0:14:11
epoch [46/50] batch [60/392] time 0.438 (0.440) data 0.000 (0.006) loss 0.4625 (1.1376) lr 1.9098e-04 eta 0:13:55
epoch [46/50] batch [80/392] time 0.427 (0.438) data 0.000 (0.005) loss 0.9318 (1.0882) lr 1.9098e-04 eta 0:13:43
epoch [46/50] batch [100/392] time 0.438 (0.437) data 0.000 (0.004) loss 0.8347 (1.0907) lr 1.9098e-04 eta 0:13:33
epoch [46/50] batch [120/392] time 0.439 (0.437) data 0.000 (0.003) loss 0.5721 (1.0644) lr 1.9098e-04 eta 0:13:23
epoch [46/50] batch [140/392] time 0.438 (0.436) data 0.000 (0.003) loss 0.8027 (1.0494) lr 1.9098e-04 eta 0:13:14
epoch [46/50] batch [160/392] time 0.438 (0.436) data 0.000 (0.002) loss 0.9926 (1.0578) lr 1.9098e-04 eta 0:13:04
epoch [46/50] batch [180/392] time 0.431 (0.436) data 0.003 (0.002) loss 0.8404 (1.0362) lr 1.9098e-04 eta 0:12:55
epoch [46/50] batch [200/392] time 0.438 (0.435) data 0.000 (0.002) loss 1.3824 (1.0177) lr 1.9098e-04 eta 0:12:46
epoch [46/50] batch [220/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.1056 (1.0210) lr 1.9098e-04 eta 0:12:37
epoch [46/50] batch [240/392] time 0.428 (0.435) data 0.000 (0.002) loss 0.6480 (1.0228) lr 1.9098e-04 eta 0:12:28
epoch [46/50] batch [260/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.5884 (1.0428) lr 1.9098e-04 eta 0:12:19
epoch [46/50] batch [280/392] time 0.432 (0.435) data 0.000 (0.001) loss 0.6354 (1.0399) lr 1.9098e-04 eta 0:12:10
epoch [46/50] batch [300/392] time 0.437 (0.435) data 0.000 (0.001) loss 1.2015 (1.0393) lr 1.9098e-04 eta 0:12:01
epoch [46/50] batch [320/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.9740 (1.0447) lr 1.9098e-04 eta 0:11:52
epoch [46/50] batch [340/392] time 0.435 (0.434) data 0.000 (0.001) loss 0.7790 (1.0392) lr 1.9098e-04 eta 0:11:43
epoch [46/50] batch [360/392] time 0.429 (0.434) data 0.000 (0.001) loss 0.8260 (1.0515) lr 1.9098e-04 eta 0:11:35
epoch [46/50] batch [380/392] time 0.427 (0.434) data 0.000 (0.001) loss 1.8947 (1.0443) lr 1.9098e-04 eta 0:11:26
epoch [47/50] batch [20/392] time 0.428 (0.452) data 0.000 (0.017) loss 0.7915 (0.8391) lr 1.5567e-04 eta 0:11:39
epoch [47/50] batch [40/392] time 0.437 (0.443) data 0.000 (0.009) loss 0.1306 (0.8365) lr 1.5567e-04 eta 0:11:16
epoch [47/50] batch [60/392] time 0.438 (0.440) data 0.000 (0.006) loss 0.3352 (0.8764) lr 1.5567e-04 eta 0:11:03
epoch [47/50] batch [80/392] time 0.428 (0.438) data 0.000 (0.004) loss 0.2948 (0.9190) lr 1.5567e-04 eta 0:10:51
epoch [47/50] batch [100/392] time 0.438 (0.437) data 0.000 (0.004) loss 2.0927 (0.9550) lr 1.5567e-04 eta 0:10:41
epoch [47/50] batch [120/392] time 0.437 (0.437) data 0.000 (0.003) loss 0.5710 (0.9913) lr 1.5567e-04 eta 0:10:32
epoch [47/50] batch [140/392] time 0.438 (0.436) data 0.000 (0.003) loss 1.6951 (1.0003) lr 1.5567e-04 eta 0:10:22
epoch [47/50] batch [160/392] time 0.437 (0.436) data 0.000 (0.002) loss 2.8644 (1.0277) lr 1.5567e-04 eta 0:10:13
epoch [47/50] batch [180/392] time 0.427 (0.435) data 0.000 (0.002) loss 1.0733 (1.0182) lr 1.5567e-04 eta 0:10:04
epoch [47/50] batch [200/392] time 0.428 (0.435) data 0.000 (0.002) loss 1.0737 (1.0275) lr 1.5567e-04 eta 0:09:55
epoch [47/50] batch [220/392] time 0.438 (0.435) data 0.000 (0.002) loss 0.8459 (1.0362) lr 1.5567e-04 eta 0:09:46
epoch [47/50] batch [240/392] time 0.437 (0.435) data 0.000 (0.002) loss 0.7125 (1.0312) lr 1.5567e-04 eta 0:09:37
epoch [47/50] batch [260/392] time 0.432 (0.435) data 0.000 (0.001) loss 2.2366 (1.0515) lr 1.5567e-04 eta 0:09:28
epoch [47/50] batch [280/392] time 0.439 (0.435) data 0.000 (0.001) loss 0.8094 (1.0495) lr 1.5567e-04 eta 0:09:19
epoch [47/50] batch [300/392] time 0.437 (0.435) data 0.000 (0.001) loss 2.1594 (1.0628) lr 1.5567e-04 eta 0:09:11
epoch [47/50] batch [320/392] time 0.432 (0.434) data 0.000 (0.001) loss 0.1595 (1.0529) lr 1.5567e-04 eta 0:09:02
epoch [47/50] batch [340/392] time 0.436 (0.434) data 0.000 (0.001) loss 1.1674 (1.0310) lr 1.5567e-04 eta 0:08:53
epoch [47/50] batch [360/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.5568 (1.0256) lr 1.5567e-04 eta 0:08:44
epoch [47/50] batch [380/392] time 0.427 (0.434) data 0.000 (0.001) loss 1.4504 (1.0275) lr 1.5567e-04 eta 0:08:36
epoch [48/50] batch [20/392] time 0.437 (0.452) data 0.000 (0.017) loss 0.8079 (0.9211) lr 1.2369e-04 eta 0:08:42
epoch [48/50] batch [40/392] time 0.437 (0.442) data 0.000 (0.009) loss 0.3632 (0.7941) lr 1.2369e-04 eta 0:08:22
epoch [48/50] batch [60/392] time 0.438 (0.439) data 0.000 (0.006) loss 0.9687 (0.8612) lr 1.2369e-04 eta 0:08:10
epoch [48/50] batch [80/392] time 0.427 (0.438) data 0.000 (0.004) loss 0.4986 (0.8549) lr 1.2369e-04 eta 0:07:59
epoch [48/50] batch [100/392] time 0.437 (0.437) data 0.000 (0.004) loss 1.2712 (0.8707) lr 1.2369e-04 eta 0:07:50
epoch [48/50] batch [120/392] time 0.433 (0.436) data 0.000 (0.003) loss 1.8922 (0.9189) lr 1.2369e-04 eta 0:07:40
epoch [48/50] batch [140/392] time 0.438 (0.436) data 0.000 (0.003) loss 0.1826 (0.9460) lr 1.2369e-04 eta 0:07:31
epoch [48/50] batch [160/392] time 0.437 (0.436) data 0.000 (0.002) loss 0.6498 (0.9910) lr 1.2369e-04 eta 0:07:22
epoch [48/50] batch [180/392] time 0.427 (0.435) data 0.000 (0.002) loss 0.6302 (0.9595) lr 1.2369e-04 eta 0:07:13
epoch [48/50] batch [200/392] time 0.437 (0.435) data 0.000 (0.002) loss 2.0595 (0.9764) lr 1.2369e-04 eta 0:07:04
epoch [48/50] batch [220/392] time 0.436 (0.435) data 0.000 (0.002) loss 1.4250 (1.0046) lr 1.2369e-04 eta 0:06:55
epoch [48/50] batch [240/392] time 0.432 (0.435) data 0.000 (0.002) loss 1.4855 (0.9890) lr 1.2369e-04 eta 0:06:47
epoch [48/50] batch [260/392] time 0.427 (0.435) data 0.000 (0.001) loss 1.3568 (0.9796) lr 1.2369e-04 eta 0:06:38
epoch [48/50] batch [280/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.0664 (0.9727) lr 1.2369e-04 eta 0:06:29
epoch [48/50] batch [300/392] time 0.437 (0.434) data 0.000 (0.001) loss 1.0662 (0.9723) lr 1.2369e-04 eta 0:06:20
epoch [48/50] batch [320/392] time 0.436 (0.434) data 0.000 (0.001) loss 0.7786 (0.9704) lr 1.2369e-04 eta 0:06:11
epoch [48/50] batch [340/392] time 0.438 (0.434) data 0.000 (0.001) loss 1.0508 (0.9882) lr 1.2369e-04 eta 0:06:03
epoch [48/50] batch [360/392] time 0.426 (0.434) data 0.000 (0.001) loss 0.8310 (0.9861) lr 1.2369e-04 eta 0:05:54
epoch [48/50] batch [380/392] time 0.420 (0.434) data 0.000 (0.001) loss 1.4421 (0.9896) lr 1.2369e-04 eta 0:05:45
epoch [49/50] batch [20/392] time 0.422 (0.450) data 0.000 (0.017) loss 1.5749 (1.3022) lr 9.5173e-05 eta 0:05:43
epoch [49/50] batch [40/392] time 0.436 (0.442) data 0.000 (0.009) loss 3.1862 (1.2464) lr 9.5173e-05 eta 0:05:28
epoch [49/50] batch [60/392] time 0.432 (0.439) data 0.000 (0.006) loss 0.4101 (1.1516) lr 9.5173e-05 eta 0:05:17
epoch [49/50] batch [80/392] time 0.423 (0.438) data 0.000 (0.004) loss 0.4692 (1.1565) lr 9.5173e-05 eta 0:05:08
epoch [49/50] batch [100/392] time 0.437 (0.437) data 0.000 (0.003) loss 0.5984 (1.1186) lr 9.5173e-05 eta 0:04:58
epoch [49/50] batch [120/392] time 0.433 (0.436) data 0.000 (0.003) loss 0.8760 (1.1384) lr 9.5173e-05 eta 0:04:49
epoch [49/50] batch [140/392] time 0.432 (0.436) data 0.000 (0.003) loss 1.2840 (1.1367) lr 9.5173e-05 eta 0:04:40
epoch [49/50] batch [160/392] time 0.428 (0.436) data 0.000 (0.002) loss 1.5602 (1.0938) lr 9.5173e-05 eta 0:04:31
epoch [49/50] batch [180/392] time 0.433 (0.436) data 0.000 (0.002) loss 0.5884 (1.1071) lr 9.5173e-05 eta 0:04:23
epoch [49/50] batch [200/392] time 0.439 (0.436) data 0.000 (0.002) loss 1.6392 (1.1165) lr 9.5173e-05 eta 0:04:14
epoch [49/50] batch [220/392] time 0.434 (0.436) data 0.000 (0.002) loss 1.2014 (1.0916) lr 9.5173e-05 eta 0:04:05
epoch [49/50] batch [240/392] time 0.440 (0.436) data 0.000 (0.002) loss 2.1355 (1.0855) lr 9.5173e-05 eta 0:03:56
epoch [49/50] batch [260/392] time 0.433 (0.435) data 0.000 (0.001) loss 1.4550 (1.0862) lr 9.5173e-05 eta 0:03:48
epoch [49/50] batch [280/392] time 0.424 (0.435) data 0.000 (0.001) loss 2.1420 (1.1114) lr 9.5173e-05 eta 0:03:39
epoch [49/50] batch [300/392] time 0.439 (0.435) data 0.000 (0.001) loss 0.8477 (1.1134) lr 9.5173e-05 eta 0:03:30
epoch [49/50] batch [320/392] time 0.434 (0.435) data 0.000 (0.001) loss 1.2584 (1.1036) lr 9.5173e-05 eta 0:03:21
epoch [49/50] batch [340/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.3396 (1.1047) lr 9.5173e-05 eta 0:03:13
epoch [49/50] batch [360/392] time 0.434 (0.435) data 0.000 (0.001) loss 1.1140 (1.0909) lr 9.5173e-05 eta 0:03:04
epoch [49/50] batch [380/392] time 0.437 (0.435) data 0.000 (0.001) loss 1.9929 (1.0856) lr 9.5173e-05 eta 0:02:55
epoch [50/50] batch [20/392] time 0.434 (0.453) data 0.000 (0.019) loss 0.9396 (0.9660) lr 7.0224e-05 eta 0:02:48
epoch [50/50] batch [40/392] time 0.426 (0.444) data 0.000 (0.009) loss 0.5909 (0.9238) lr 7.0224e-05 eta 0:02:36
epoch [50/50] batch [60/392] time 0.438 (0.441) data 0.000 (0.006) loss 1.9332 (0.9628) lr 7.0224e-05 eta 0:02:26
epoch [50/50] batch [80/392] time 0.429 (0.440) data 0.000 (0.005) loss 1.5259 (0.9948) lr 7.0224e-05 eta 0:02:17
epoch [50/50] batch [100/392] time 0.438 (0.439) data 0.000 (0.004) loss 0.5774 (0.9688) lr 7.0224e-05 eta 0:02:08
epoch [50/50] batch [120/392] time 0.438 (0.438) data 0.000 (0.003) loss 0.2127 (0.9704) lr 7.0224e-05 eta 0:01:59
epoch [50/50] batch [140/392] time 0.428 (0.437) data 0.000 (0.003) loss 0.9775 (1.0231) lr 7.0224e-05 eta 0:01:50
epoch [50/50] batch [160/392] time 0.437 (0.437) data 0.000 (0.003) loss 0.3653 (1.0179) lr 7.0224e-05 eta 0:01:41
epoch [50/50] batch [180/392] time 0.434 (0.436) data 0.000 (0.002) loss 0.3139 (1.0248) lr 7.0224e-05 eta 0:01:32
epoch [50/50] batch [200/392] time 0.437 (0.436) data 0.000 (0.002) loss 2.0475 (1.0381) lr 7.0224e-05 eta 0:01:23
epoch [50/50] batch [220/392] time 0.437 (0.436) data 0.000 (0.002) loss 0.4204 (1.0324) lr 7.0224e-05 eta 0:01:15
epoch [50/50] batch [240/392] time 0.433 (0.436) data 0.000 (0.002) loss 1.7323 (1.0290) lr 7.0224e-05 eta 0:01:06
epoch [50/50] batch [260/392] time 0.428 (0.436) data 0.000 (0.002) loss 0.9279 (1.0211) lr 7.0224e-05 eta 0:00:57
epoch [50/50] batch [280/392] time 0.437 (0.436) data 0.000 (0.002) loss 1.8064 (1.0295) lr 7.0224e-05 eta 0:00:48
epoch [50/50] batch [300/392] time 0.438 (0.435) data 0.000 (0.001) loss 1.5161 (1.0106) lr 7.0224e-05 eta 0:00:40
epoch [50/50] batch [320/392] time 0.433 (0.435) data 0.000 (0.001) loss 0.5157 (0.9979) lr 7.0224e-05 eta 0:00:31
epoch [50/50] batch [340/392] time 0.438 (0.435) data 0.000 (0.001) loss 0.5920 (0.9935) lr 7.0224e-05 eta 0:00:22
epoch [50/50] batch [360/392] time 0.438 (0.435) data 0.000 (0.001) loss 3.1442 (1.0035) lr 7.0224e-05 eta 0:00:13
epoch [50/50] batch [380/392] time 0.426 (0.435) data 0.000 (0.001) loss 0.2549 (0.9996) lr 7.0224e-05 eta 0:00:05
Checkpoint saved to output/base2new/train_base/stanford_cars/vit_b16_ep50_c4_BZ4_ProDA/seed1/prompt_learner/model.pth.tar-50
Finish training
Deploy the last-epoch model
Evaluate on the *test* set
=> result
* total: 3,991
* correct: 3,344
* accuracy: 83.79%
* error: 16.21%
* macro_f1: 83.56%
Elapsed: 2:23:26
