***************
** Arguments **
***************
backbone: 
config_file: configs/trainers/ProDA/vit_b16_ep50_c4_BZ4_ProDA.yaml
dataset_config_file: configs/datasets/caltech101.yaml
eval_only: False
head: 
load_epoch: None
model_dir: 
no_train: False
opts: ['DATASET.NUM_SHOTS', '16', 'DATASET.SUBSAMPLE_CLASSES', 'base']
output_dir: output/base2new/train_base/caltech101/vit_b16_ep50_c4_BZ4_ProDA/seed1
resume: 
root: /mnt/hdd/DATA
seed: 1
source_domains: None
target_domains: None
trainer: ProDA
transforms: None
************
** Config **
************
DATALOADER:
  K_TRANSFORMS: 1
  NUM_WORKERS: 8
  RETURN_IMG0: False
  TEST:
    BATCH_SIZE: 100
    SAMPLER: SequentialSampler
  TRAIN_U:
    BATCH_SIZE: 32
    N_DOMAIN: 0
    N_INS: 16
    SAME_AS_X: True
    SAMPLER: RandomSampler
  TRAIN_X:
    BATCH_SIZE: 4
    N_DOMAIN: 0
    N_INS: 16
    SAMPLER: RandomSampler
DATASET:
  ALL_AS_UNLABELED: False
  CIFAR_C_LEVEL: 1
  CIFAR_C_TYPE: 
  NAME: Caltech101
  NUM_LABELED: -1
  NUM_SHOTS: 16
  ROOT: /mnt/hdd/DATA
  SOURCE_DOMAINS: ()
  STL10_FOLD: -1
  SUBSAMPLE_CLASSES: base
  TARGET_DOMAINS: ()
  VAL_PERCENT: 0.1
INPUT:
  COLORJITTER_B: 0.4
  COLORJITTER_C: 0.4
  COLORJITTER_H: 0.1
  COLORJITTER_S: 0.4
  CROP_PADDING: 4
  CUTOUT_LEN: 16
  CUTOUT_N: 1
  GB_K: 21
  GB_P: 0.5
  GN_MEAN: 0.0
  GN_STD: 0.15
  INTERPOLATION: bicubic
  NO_TRANSFORM: False
  PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
  PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
  RANDAUGMENT_M: 10
  RANDAUGMENT_N: 2
  RGS_P: 0.2
  RRCROP_SCALE: (0.08, 1.0)
  SIZE: (224, 224)
  TRANSFORMS: ('random_resized_crop', 'random_flip', 'normalize')
MODEL:
  BACKBONE:
    NAME: ViT-B/16
    PRETRAINED: True
  HEAD:
    ACTIVATION: relu
    BN: True
    DROPOUT: 0.0
    HIDDEN_LAYERS: ()
    NAME: 
  INIT_WEIGHTS: 
OPTIM:
  ADAM_BETA1: 0.9
  ADAM_BETA2: 0.999
  BASE_LR_MULT: 0.1
  GAMMA: 0.1
  LR: 0.002
  LR_SCHEDULER: cosine
  MAX_EPOCH: 50
  MOMENTUM: 0.9
  NAME: sgd
  NEW_LAYERS: ()
  RMSPROP_ALPHA: 0.99
  SGD_DAMPNING: 0
  SGD_NESTEROV: False
  STAGED_LR: False
  STEPSIZE: (-1,)
  WARMUP_CONS_LR: 1e-05
  WARMUP_EPOCH: 5
  WARMUP_MIN_LR: 1e-05
  WARMUP_RECOUNT: True
  WARMUP_TYPE: constant
  WEIGHT_DECAY: 0.0005
OUTPUT_DIR: output/base2new/train_base/caltech101/vit_b16_ep50_c4_BZ4_ProDA/seed1
RESUME: 
SEED: 1
TEST:
  COMPUTE_CMAT: False
  EVALUATOR: Classification
  FINAL_MODEL: last_step
  NO_TEST: False
  PER_CLASS_RESULT: False
  SPLIT: test
TRAIN:
  CHECKPOINT_FREQ: 0
  COUNT_ITER: train_x
  PRINT_FREQ: 20
TRAINER:
  CDAC:
    CLASS_LR_MULTI: 10
    P_THRESH: 0.95
    RAMPUP_COEF: 30
    RAMPUP_ITRS: 1000
    STRONG_TRANSFORMS: ()
    TOPK_MATCH: 5
  COCOOP:
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  COOP:
    CLASS_TOKEN_POSITION: end
    CSC: False
    CTX_INIT: 
    N_CTX: 16
    PREC: fp16
  CROSSGRAD:
    ALPHA_D: 0.5
    ALPHA_F: 0.5
    EPS_D: 1.0
    EPS_F: 1.0
  DAEL:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DAELDG:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 0.5
  DDAIG:
    ALPHA: 0.5
    CLAMP: False
    CLAMP_MAX: 1.0
    CLAMP_MIN: -1.0
    G_ARCH: 
    LMDA: 0.3
    WARMUP: 0
  DOMAINMIX:
    ALPHA: 1.0
    BETA: 1.0
    TYPE: crossdomain
  ENTMIN:
    LMDA: 0.001
  FIXMATCH:
    CONF_THRE: 0.95
    STRONG_TRANSFORMS: ()
    WEIGHT_U: 1.0
  IVLP:
    CTX_INIT: a photo of a
    N_CTX_TEXT: 2
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_TEXT: 9
    PROMPT_DEPTH_VISION: 9
  M3SDA:
    LMDA: 0.5
    N_STEP_F: 4
  MAPLE:
    CTX_INIT: a photo of a
    N_CTX: 4
    PREC: fp16
    PROMPT_DEPTH: 9
  MCD:
    N_STEP_F: 4
  MEANTEACHER:
    EMA_ALPHA: 0.999
    RAMPUP: 5
    WEIGHT_U: 1.0
  MIXMATCH:
    MIXUP_BETA: 0.75
    RAMPUP: 20000
    TEMP: 2.0
    WEIGHT_U: 100.0
  MME:
    LMDA: 0.1
  NAME: ProDA
  ProDA:
    N_CTX: 4
    N_PROMPT: 32
    PREC: fp16
  SE:
    CONF_THRE: 0.95
    EMA_ALPHA: 0.999
    RAMPUP: 300
  VPT:
    CTX_INIT: a photo of a
    N_CTX_VISION: 2
    PREC: fp16
    PROMPT_DEPTH_VISION: 1
USE_CUDA: True
VERBOSE: True
VERSION: 1
Collecting env info ...
** System info **
PyTorch version: 2.2.1+cu121
Is debug build: False
CUDA used to build PyTorch: 12.1
ROCM used to build PyTorch: N/A

OS: Debian GNU/Linux 12 (bookworm) (x86_64)
GCC version: (Debian 12.2.0-14) 12.2.0
Clang version: Could not collect
CMake version: Could not collect
Libc version: glibc-2.36

Python version: 3.11.2 (main, Mar 13 2023, 12:18:29) [GCC 12.2.0] (64-bit runtime)
Python platform: Linux-6.5.13-3-pve-x86_64-with-glibc2.36
Is CUDA available: True
CUDA runtime version: 11.8.89
CUDA_MODULE_LOADING set to: LAZY
GPU models and configuration: 
GPU 0: NVIDIA A800 80GB PCIe
GPU 1: NVIDIA A800 80GB PCIe

Nvidia driver version: 525.147.05
cuDNN version: Could not collect
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True

CPU:
Architecture:                       x86_64
CPU op-mode(s):                     32-bit, 64-bit
Address sizes:                      46 bits physical, 57 bits virtual
Byte Order:                         Little Endian
CPU(s):                             64
On-line CPU(s) list:                18,20,22,23,25-27,29,31,32,34,37,46-49
Off-line CPU(s) list:               0-17,19,21,24,28,30,33,35,36,38-45,50-63
Vendor ID:                          GenuineIntel
Model name:                         Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz
CPU family:                         6
Model:                              106
Thread(s) per core:                 2
Core(s) per socket:                 16
Socket(s):                          2
Stepping:                           6
CPU(s) scaling MHz:                 96%
CPU max MHz:                        3500.0000
CPU min MHz:                        800.0000
BogoMIPS:                           5800.00
Flags:                              fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 invpcid_single intel_ppin ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect wbnoinvd dtherm ida arat pln pts vnmi avx512vbmi umip pku ospke avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid fsrm md_clear pconfig flush_l1d arch_capabilities
Virtualization:                     VT-x
L1d cache:                          1.5 MiB (32 instances)
L1i cache:                          1 MiB (32 instances)
L2 cache:                           40 MiB (32 instances)
L3 cache:                           48 MiB (2 instances)
NUMA node(s):                       2
NUMA node0 CPU(s):                  0-15,32-47
NUMA node1 CPU(s):                  16-31,48-63
Vulnerability Gather data sampling: Vulnerable: No microcode
Vulnerability Itlb multihit:        Not affected
Vulnerability L1tf:                 Not affected
Vulnerability Mds:                  Not affected
Vulnerability Meltdown:             Not affected
Vulnerability Mmio stale data:      Mitigation; Clear CPU buffers; SMT vulnerable
Vulnerability Retbleed:             Not affected
Vulnerability Spec rstack overflow: Not affected
Vulnerability Spec store bypass:    Mitigation; Speculative Store Bypass disabled via prctl
Vulnerability Spectre v1:           Mitigation; usercopy/swapgs barriers and __user pointer sanitization
Vulnerability Spectre v2:           Mitigation; Enhanced / Automatic IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
Vulnerability Srbds:                Not affected
Vulnerability Tsx async abort:      Not affected

Versions of relevant libraries:
[pip3] flake8==3.7.9
[pip3] flake8==3.7.9
[pip3] numpy==1.26.4
[pip3] torch==2.2.1
[pip3] torchaudio==2.2.1
[pip3] torchvision==0.17.1
[pip3] triton==2.2.0
[conda] Could not collect
        Pillow (10.2.0)

Loading trainer: ProDA
Loading dataset: Caltech101
Reading split from /mnt/hdd/DATA/caltech-101/split_zhou_Caltech101.json
Loading preprocessed few-shot data from /mnt/hdd/DATA/caltech-101/split_fewshot/shot_16_shuffled-seed_1.pkl
SUBSAMPLE BASE CLASSES!
Building transform_train
+ random resized crop (size=(224, 224), scale=(0.08, 1.0))
+ random flip
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
Building transform_test
+ resize the smaller edge to 224
+ 224x224 center crop
+ to torch tensor of range [0, 1]
+ normalization (mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
---------  ----------
Dataset    Caltech101
# classes  50
# train_x  800
# val      200
# test     1,287
---------  ----------
Loading CLIP (backbone: ViT-B/16)
Building custom CLIP
Turning off gradients in both the image and the text encoder
Parameters to be updated: {'prompt_learner.ctx'}
Loading evaluator: Classification
No checkpoint found, train from scratch
Initialize tensorboard (log_dir=output/base2new/train_base/caltech101/vit_b16_ep50_c4_BZ4_ProDA/seed1/tensorboard)
epoch [1/50] batch [20/200] time 0.547 (1.025) data 0.005 (0.029) loss 2.3778 (0.9311) lr 1.0000e-05 eta 2:50:31
epoch [1/50] batch [40/200] time 0.727 (0.814) data 0.000 (0.015) loss 4.1211 (1.0163) lr 1.0000e-05 eta 2:15:03
epoch [1/50] batch [60/200] time 0.717 (0.782) data 0.001 (0.010) loss 1.4297 (1.0771) lr 1.0000e-05 eta 2:09:31
epoch [1/50] batch [80/200] time 0.719 (0.765) data 0.000 (0.007) loss 0.8290 (1.0228) lr 1.0000e-05 eta 2:06:28
epoch [1/50] batch [100/200] time 0.534 (0.736) data 0.000 (0.006) loss 0.4048 (0.9568) lr 1.0000e-05 eta 2:01:28
epoch [1/50] batch [120/200] time 0.730 (0.726) data 0.000 (0.005) loss 0.2685 (0.9488) lr 1.0000e-05 eta 1:59:32
epoch [1/50] batch [140/200] time 0.710 (0.725) data 0.000 (0.004) loss 0.2191 (0.9585) lr 1.0000e-05 eta 1:59:04
epoch [1/50] batch [160/200] time 0.708 (0.723) data 0.000 (0.004) loss 1.1593 (0.9887) lr 1.0000e-05 eta 1:58:37
epoch [1/50] batch [180/200] time 0.708 (0.706) data 0.000 (0.004) loss 1.0127 (0.9672) lr 1.0000e-05 eta 1:55:31
epoch [1/50] batch [200/200] time 0.694 (0.706) data 0.000 (0.003) loss 0.2881 (0.9558) lr 1.0000e-05 eta 1:55:22
epoch [2/50] batch [20/200] time 0.715 (0.741) data 0.000 (0.026) loss 1.5835 (0.8564) lr 1.0000e-05 eta 2:00:43
epoch [2/50] batch [40/200] time 0.246 (0.673) data 0.000 (0.013) loss 1.3084 (0.7910) lr 1.0000e-05 eta 1:49:23
epoch [2/50] batch [60/200] time 0.704 (0.636) data 0.000 (0.009) loss 0.3603 (0.7792) lr 1.0000e-05 eta 1:43:10
epoch [2/50] batch [80/200] time 0.702 (0.655) data 0.000 (0.007) loss 0.8023 (0.7327) lr 1.0000e-05 eta 1:46:02
epoch [2/50] batch [100/200] time 0.728 (0.667) data 0.000 (0.005) loss 0.3982 (0.7684) lr 1.0000e-05 eta 1:47:45
epoch [2/50] batch [120/200] time 0.246 (0.652) data 0.000 (0.005) loss 0.0791 (0.8174) lr 1.0000e-05 eta 1:45:07
epoch [2/50] batch [140/200] time 0.483 (0.633) data 0.000 (0.004) loss 0.4808 (0.8231) lr 1.0000e-05 eta 1:41:54
epoch [2/50] batch [160/200] time 0.450 (0.613) data 0.000 (0.004) loss 1.1735 (0.8060) lr 1.0000e-05 eta 1:38:29
epoch [2/50] batch [180/200] time 0.461 (0.598) data 0.000 (0.003) loss 1.2034 (0.8280) lr 1.0000e-05 eta 1:35:55
epoch [2/50] batch [200/200] time 0.550 (0.595) data 0.000 (0.003) loss 0.3227 (0.8233) lr 1.0000e-05 eta 1:35:11
epoch [3/50] batch [20/200] time 0.471 (0.470) data 0.000 (0.031) loss 0.5368 (0.9818) lr 1.0000e-05 eta 1:15:01
epoch [3/50] batch [40/200] time 0.512 (0.470) data 0.000 (0.016) loss 0.0469 (0.8856) lr 1.0000e-05 eta 1:14:54
epoch [3/50] batch [60/200] time 0.686 (0.503) data 0.000 (0.011) loss 0.3999 (0.8252) lr 1.0000e-05 eta 1:19:58
epoch [3/50] batch [80/200] time 0.242 (0.525) data 0.000 (0.008) loss 0.0158 (0.8092) lr 1.0000e-05 eta 1:23:13
epoch [3/50] batch [100/200] time 0.675 (0.520) data 0.000 (0.006) loss 0.3682 (0.8190) lr 1.0000e-05 eta 1:22:19
epoch [3/50] batch [120/200] time 0.681 (0.543) data 0.000 (0.005) loss 1.2822 (0.8321) lr 1.0000e-05 eta 1:25:48
epoch [3/50] batch [140/200] time 0.700 (0.564) data 0.000 (0.005) loss 0.1600 (0.8194) lr 1.0000e-05 eta 1:28:55
epoch [3/50] batch [160/200] time 0.418 (0.571) data 0.000 (0.004) loss 0.8161 (0.8130) lr 1.0000e-05 eta 1:29:52
epoch [3/50] batch [180/200] time 0.726 (0.560) data 0.000 (0.004) loss 0.7546 (0.7914) lr 1.0000e-05 eta 1:27:50
epoch [3/50] batch [200/200] time 0.463 (0.552) data 0.000 (0.003) loss 1.3927 (0.7997) lr 1.0000e-05 eta 1:26:32
epoch [4/50] batch [20/200] time 0.467 (0.440) data 0.000 (0.028) loss 0.9366 (1.1702) lr 1.0000e-05 eta 1:08:47
epoch [4/50] batch [40/200] time 0.278 (0.407) data 0.000 (0.014) loss 2.3646 (0.9187) lr 1.0000e-05 eta 1:03:26
epoch [4/50] batch [60/200] time 0.450 (0.459) data 0.000 (0.010) loss 0.4546 (0.8607) lr 1.0000e-05 eta 1:11:27
epoch [4/50] batch [80/200] time 0.438 (0.429) data 0.000 (0.007) loss 0.3180 (0.8403) lr 1.0000e-05 eta 1:06:42
epoch [4/50] batch [100/200] time 0.465 (0.437) data 0.000 (0.006) loss 0.5987 (0.7549) lr 1.0000e-05 eta 1:07:40
epoch [4/50] batch [120/200] time 0.670 (0.449) data 0.000 (0.005) loss 1.0741 (0.7392) lr 1.0000e-05 eta 1:09:25
epoch [4/50] batch [140/200] time 0.560 (0.465) data 0.000 (0.004) loss 0.6724 (0.7223) lr 1.0000e-05 eta 1:11:47
epoch [4/50] batch [160/200] time 0.667 (0.488) data 0.000 (0.004) loss 0.0223 (0.7001) lr 1.0000e-05 eta 1:15:06
epoch [4/50] batch [180/200] time 0.675 (0.492) data 0.000 (0.003) loss 0.2157 (0.6929) lr 1.0000e-05 eta 1:15:33
epoch [4/50] batch [200/200] time 0.288 (0.504) data 0.000 (0.003) loss 0.2321 (0.7101) lr 1.0000e-05 eta 1:17:16
epoch [5/50] batch [20/200] time 0.443 (0.684) data 0.000 (0.028) loss 0.3780 (0.8106) lr 1.0000e-05 eta 1:44:40
epoch [5/50] batch [40/200] time 0.672 (0.641) data 0.000 (0.014) loss 1.0651 (0.7837) lr 1.0000e-05 eta 1:37:50
epoch [5/50] batch [60/200] time 0.239 (0.607) data 0.000 (0.010) loss 0.2367 (0.8351) lr 1.0000e-05 eta 1:32:23
epoch [5/50] batch [80/200] time 0.456 (0.557) data 0.000 (0.007) loss 1.3580 (0.8087) lr 1.0000e-05 eta 1:24:37
epoch [5/50] batch [100/200] time 0.477 (0.525) data 0.000 (0.006) loss 0.0780 (0.7593) lr 1.0000e-05 eta 1:19:33
epoch [5/50] batch [120/200] time 0.475 (0.514) data 0.000 (0.005) loss 0.3825 (0.7733) lr 1.0000e-05 eta 1:17:49
epoch [5/50] batch [140/200] time 0.462 (0.498) data 0.000 (0.004) loss 0.6488 (0.7525) lr 1.0000e-05 eta 1:15:09
epoch [5/50] batch [160/200] time 0.465 (0.492) data 0.000 (0.004) loss 1.8435 (0.7557) lr 1.0000e-05 eta 1:14:08
epoch [5/50] batch [180/200] time 0.245 (0.475) data 0.000 (0.003) loss 1.3535 (0.7319) lr 1.0000e-05 eta 1:11:22
epoch [5/50] batch [200/200] time 0.244 (0.455) data 0.000 (0.003) loss 0.8011 (0.7338) lr 2.0000e-03 eta 1:08:17
epoch [6/50] batch [20/200] time 0.340 (0.309) data 0.000 (0.027) loss 0.0132 (0.3752) lr 2.0000e-03 eta 0:46:14
epoch [6/50] batch [40/200] time 0.359 (0.304) data 0.000 (0.014) loss 1.2649 (0.5998) lr 2.0000e-03 eta 0:45:20
epoch [6/50] batch [60/200] time 0.245 (0.310) data 0.000 (0.009) loss 0.3247 (0.7037) lr 2.0000e-03 eta 0:46:15
epoch [6/50] batch [80/200] time 0.239 (0.321) data 0.000 (0.007) loss 1.4507 (0.7260) lr 2.0000e-03 eta 0:47:44
epoch [6/50] batch [100/200] time 0.266 (0.307) data 0.000 (0.006) loss 0.8406 (0.7339) lr 2.0000e-03 eta 0:45:33
epoch [6/50] batch [120/200] time 0.255 (0.300) data 0.000 (0.005) loss 1.1164 (0.7149) lr 2.0000e-03 eta 0:44:24
epoch [6/50] batch [140/200] time 0.260 (0.301) data 0.000 (0.004) loss 0.1296 (0.7141) lr 2.0000e-03 eta 0:44:24
epoch [6/50] batch [160/200] time 0.355 (0.301) data 0.000 (0.004) loss 0.0735 (0.7341) lr 2.0000e-03 eta 0:44:24
epoch [6/50] batch [180/200] time 0.274 (0.309) data 0.000 (0.003) loss 0.4039 (0.7367) lr 2.0000e-03 eta 0:45:28
epoch [6/50] batch [200/200] time 0.401 (0.308) data 0.000 (0.003) loss 0.0076 (0.7238) lr 1.9980e-03 eta 0:45:09
epoch [7/50] batch [20/200] time 0.272 (0.324) data 0.000 (0.038) loss 0.5710 (0.3922) lr 1.9980e-03 eta 0:47:29
epoch [7/50] batch [40/200] time 0.455 (0.318) data 0.000 (0.019) loss 1.5638 (0.4767) lr 1.9980e-03 eta 0:46:21
epoch [7/50] batch [60/200] time 0.464 (0.363) data 0.000 (0.013) loss 0.6208 (0.5465) lr 1.9980e-03 eta 0:52:50
epoch [7/50] batch [80/200] time 0.725 (0.372) data 0.000 (0.010) loss 1.7281 (0.5538) lr 1.9980e-03 eta 0:54:03
epoch [7/50] batch [100/200] time 0.244 (0.394) data 0.000 (0.008) loss 1.1961 (0.5756) lr 1.9980e-03 eta 0:57:06
epoch [7/50] batch [120/200] time 0.427 (0.376) data 0.000 (0.007) loss 0.6058 (0.5823) lr 1.9980e-03 eta 0:54:26
epoch [7/50] batch [140/200] time 0.464 (0.389) data 0.000 (0.006) loss 0.6620 (0.5725) lr 1.9980e-03 eta 0:56:10
epoch [7/50] batch [160/200] time 0.685 (0.405) data 0.000 (0.005) loss 0.0613 (0.5687) lr 1.9980e-03 eta 0:58:22
epoch [7/50] batch [180/200] time 0.324 (0.420) data 0.000 (0.004) loss 0.9414 (0.5607) lr 1.9980e-03 eta 1:00:18
epoch [7/50] batch [200/200] time 0.669 (0.437) data 0.000 (0.004) loss 0.6649 (0.5649) lr 1.9921e-03 eta 1:02:37
epoch [8/50] batch [20/200] time 0.652 (0.516) data 0.000 (0.028) loss 0.4241 (0.5908) lr 1.9921e-03 eta 1:13:44
epoch [8/50] batch [40/200] time 0.452 (0.565) data 0.000 (0.014) loss 1.1697 (0.6124) lr 1.9921e-03 eta 1:20:39
epoch [8/50] batch [60/200] time 0.653 (0.531) data 0.000 (0.010) loss 1.5425 (0.6670) lr 1.9921e-03 eta 1:15:33
epoch [8/50] batch [80/200] time 0.242 (0.541) data 0.000 (0.007) loss 0.0316 (0.5861) lr 1.9921e-03 eta 1:16:53
epoch [8/50] batch [100/200] time 0.672 (0.532) data 0.000 (0.006) loss 0.3428 (0.5712) lr 1.9921e-03 eta 1:15:24
epoch [8/50] batch [120/200] time 0.247 (0.531) data 0.000 (0.005) loss 0.3013 (0.5602) lr 1.9921e-03 eta 1:15:03
epoch [8/50] batch [140/200] time 0.434 (0.520) data 0.000 (0.004) loss 0.3821 (0.5555) lr 1.9921e-03 eta 1:13:21
epoch [8/50] batch [160/200] time 0.239 (0.502) data 0.000 (0.004) loss 1.5850 (0.5981) lr 1.9921e-03 eta 1:10:35
epoch [8/50] batch [180/200] time 0.455 (0.491) data 0.000 (0.003) loss 0.0885 (0.5883) lr 1.9921e-03 eta 1:08:51
epoch [8/50] batch [200/200] time 0.267 (0.481) data 0.000 (0.003) loss 0.0097 (0.5707) lr 1.9823e-03 eta 1:07:24
epoch [9/50] batch [20/200] time 0.246 (0.348) data 0.000 (0.030) loss 0.0398 (0.3971) lr 1.9823e-03 eta 0:48:39
epoch [9/50] batch [40/200] time 0.246 (0.295) data 0.000 (0.015) loss 0.4426 (0.4586) lr 1.9823e-03 eta 0:41:06
epoch [9/50] batch [60/200] time 0.244 (0.278) data 0.000 (0.010) loss 0.2360 (0.4921) lr 1.9823e-03 eta 0:38:38
epoch [9/50] batch [80/200] time 0.300 (0.278) data 0.000 (0.008) loss 0.0756 (0.4609) lr 1.9823e-03 eta 0:38:30
epoch [9/50] batch [100/200] time 0.556 (0.300) data 0.000 (0.006) loss 0.1167 (0.5087) lr 1.9823e-03 eta 0:41:28
epoch [9/50] batch [120/200] time 0.526 (0.348) data 0.000 (0.006) loss 0.3691 (0.4852) lr 1.9823e-03 eta 0:48:02
epoch [9/50] batch [140/200] time 0.557 (0.376) data 0.000 (0.005) loss 0.0162 (0.4536) lr 1.9823e-03 eta 0:51:43
epoch [9/50] batch [160/200] time 0.557 (0.398) data 0.000 (0.004) loss 0.0240 (0.4824) lr 1.9823e-03 eta 0:54:43
epoch [9/50] batch [180/200] time 0.819 (0.443) data 0.000 (0.004) loss 0.1371 (0.4885) lr 1.9823e-03 eta 1:00:43
epoch [9/50] batch [200/200] time 0.813 (0.480) data 0.000 (0.003) loss 0.2717 (0.5130) lr 1.9686e-03 eta 1:05:38
epoch [10/50] batch [20/200] time 0.818 (0.847) data 0.000 (0.028) loss 0.1735 (0.3846) lr 1.9686e-03 eta 1:55:27
epoch [10/50] batch [40/200] time 0.815 (0.832) data 0.000 (0.014) loss 0.0414 (0.4638) lr 1.9686e-03 eta 1:53:10
epoch [10/50] batch [60/200] time 0.822 (0.828) data 0.004 (0.010) loss 0.1297 (0.4696) lr 1.9686e-03 eta 1:52:16
epoch [10/50] batch [80/200] time 0.818 (0.825) data 0.000 (0.007) loss 1.1531 (0.4768) lr 1.9686e-03 eta 1:51:37
epoch [10/50] batch [100/200] time 0.825 (0.825) data 0.000 (0.006) loss 0.0204 (0.5025) lr 1.9686e-03 eta 1:51:24
epoch [10/50] batch [120/200] time 0.807 (0.824) data 0.000 (0.005) loss 0.5278 (0.4877) lr 1.9686e-03 eta 1:50:57
epoch [10/50] batch [140/200] time 0.826 (0.823) data 0.000 (0.004) loss 0.0527 (0.5101) lr 1.9686e-03 eta 1:50:34
epoch [10/50] batch [160/200] time 0.833 (0.823) data 0.000 (0.004) loss 1.9352 (0.4950) lr 1.9686e-03 eta 1:50:17
epoch [10/50] batch [180/200] time 0.807 (0.823) data 0.000 (0.004) loss 0.3899 (0.4965) lr 1.9686e-03 eta 1:49:56
epoch [10/50] batch [200/200] time 0.814 (0.822) data 0.000 (0.003) loss 0.2661 (0.4955) lr 1.9511e-03 eta 1:49:36
epoch [11/50] batch [20/200] time 0.819 (0.849) data 0.000 (0.028) loss 0.0110 (0.3920) lr 1.9511e-03 eta 1:52:55
epoch [11/50] batch [40/200] time 0.799 (0.833) data 0.000 (0.014) loss 0.2281 (0.5051) lr 1.9511e-03 eta 1:50:32
epoch [11/50] batch [60/200] time 0.827 (0.828) data 0.000 (0.010) loss 1.0347 (0.4944) lr 1.9511e-03 eta 1:49:31
epoch [11/50] batch [80/200] time 0.817 (0.826) data 0.000 (0.007) loss 0.6134 (0.5326) lr 1.9511e-03 eta 1:49:04
epoch [11/50] batch [100/200] time 0.820 (0.825) data 0.000 (0.006) loss 0.0103 (0.4975) lr 1.9511e-03 eta 1:48:35
epoch [11/50] batch [120/200] time 0.811 (0.824) data 0.000 (0.005) loss 0.0255 (0.4924) lr 1.9511e-03 eta 1:48:10
epoch [11/50] batch [140/200] time 0.821 (0.823) data 0.000 (0.004) loss 1.3540 (0.5239) lr 1.9511e-03 eta 1:47:48
epoch [11/50] batch [160/200] time 0.816 (0.822) data 0.000 (0.004) loss 1.4231 (0.5268) lr 1.9511e-03 eta 1:47:24
epoch [11/50] batch [180/200] time 0.812 (0.821) data 0.000 (0.003) loss 0.0035 (0.5274) lr 1.9511e-03 eta 1:47:02
epoch [11/50] batch [200/200] time 0.824 (0.821) data 0.000 (0.003) loss 0.5283 (0.5436) lr 1.9298e-03 eta 1:46:44
epoch [12/50] batch [20/200] time 0.813 (0.845) data 0.000 (0.030) loss 0.9533 (0.4616) lr 1.9298e-03 eta 1:49:36
epoch [12/50] batch [40/200] time 0.822 (0.832) data 0.002 (0.015) loss 0.0686 (0.4724) lr 1.9298e-03 eta 1:47:38
epoch [12/50] batch [60/200] time 0.818 (0.828) data 0.000 (0.010) loss 0.3246 (0.4988) lr 1.9298e-03 eta 1:46:45
epoch [12/50] batch [80/200] time 0.827 (0.826) data 0.000 (0.008) loss 0.1114 (0.4803) lr 1.9298e-03 eta 1:46:18
epoch [12/50] batch [100/200] time 0.815 (0.825) data 0.000 (0.006) loss 1.3453 (0.5210) lr 1.9298e-03 eta 1:45:51
epoch [12/50] batch [120/200] time 0.818 (0.823) data 0.000 (0.005) loss 0.0444 (0.5411) lr 1.9298e-03 eta 1:45:23
epoch [12/50] batch [140/200] time 0.830 (0.823) data 0.000 (0.005) loss 0.5049 (0.5243) lr 1.9298e-03 eta 1:45:06
epoch [12/50] batch [160/200] time 0.819 (0.823) data 0.000 (0.004) loss 0.3551 (0.5117) lr 1.9298e-03 eta 1:44:44
epoch [12/50] batch [180/200] time 0.818 (0.823) data 0.000 (0.004) loss 0.0721 (0.5321) lr 1.9298e-03 eta 1:44:30
epoch [12/50] batch [200/200] time 0.825 (0.822) data 0.000 (0.003) loss 0.2132 (0.5310) lr 1.9048e-03 eta 1:44:10
epoch [13/50] batch [20/200] time 0.826 (0.853) data 0.000 (0.030) loss 0.0123 (0.3995) lr 1.9048e-03 eta 1:47:46
epoch [13/50] batch [40/200] time 0.822 (0.837) data 0.001 (0.015) loss 1.1881 (0.3936) lr 1.9048e-03 eta 1:45:31
epoch [13/50] batch [60/200] time 0.827 (0.831) data 0.001 (0.010) loss 0.0028 (0.3901) lr 1.9048e-03 eta 1:44:27
epoch [13/50] batch [80/200] time 0.811 (0.827) data 0.000 (0.008) loss 0.0272 (0.4011) lr 1.9048e-03 eta 1:43:40
epoch [13/50] batch [100/200] time 0.825 (0.826) data 0.000 (0.006) loss 0.0952 (0.4028) lr 1.9048e-03 eta 1:43:16
epoch [13/50] batch [120/200] time 0.804 (0.825) data 0.000 (0.005) loss 0.2663 (0.4142) lr 1.9048e-03 eta 1:42:50
epoch [13/50] batch [140/200] time 0.821 (0.825) data 0.000 (0.005) loss 0.0871 (0.4286) lr 1.9048e-03 eta 1:42:32
epoch [13/50] batch [160/200] time 0.819 (0.824) data 0.000 (0.004) loss 0.5401 (0.4348) lr 1.9048e-03 eta 1:42:12
epoch [13/50] batch [180/200] time 0.822 (0.824) data 0.004 (0.004) loss 0.7542 (0.4343) lr 1.9048e-03 eta 1:41:51
epoch [13/50] batch [200/200] time 0.830 (0.823) data 0.000 (0.003) loss 0.7110 (0.4449) lr 1.8763e-03 eta 1:41:33
epoch [14/50] batch [20/200] time 0.809 (0.844) data 0.000 (0.028) loss 1.0834 (0.4850) lr 1.8763e-03 eta 1:43:51
epoch [14/50] batch [40/200] time 0.817 (0.832) data 0.000 (0.014) loss 0.1983 (0.3467) lr 1.8763e-03 eta 1:42:05
epoch [14/50] batch [60/200] time 0.801 (0.826) data 0.000 (0.010) loss 0.3152 (0.4564) lr 1.8763e-03 eta 1:41:02
epoch [14/50] batch [80/200] time 0.828 (0.825) data 0.000 (0.007) loss 0.4075 (0.4692) lr 1.8763e-03 eta 1:40:36
epoch [14/50] batch [100/200] time 0.817 (0.824) data 0.000 (0.006) loss 0.2014 (0.4581) lr 1.8763e-03 eta 1:40:13
epoch [14/50] batch [120/200] time 0.852 (0.818) data 0.000 (0.005) loss 0.0212 (0.5040) lr 1.8763e-03 eta 1:39:12
epoch [14/50] batch [140/200] time 0.831 (0.821) data 0.000 (0.004) loss 0.7163 (0.5269) lr 1.8763e-03 eta 1:39:21
epoch [14/50] batch [160/200] time 0.836 (0.823) data 0.000 (0.004) loss 0.8245 (0.5003) lr 1.8763e-03 eta 1:39:19
epoch [14/50] batch [180/200] time 0.856 (0.825) data 0.000 (0.004) loss 1.0912 (0.4954) lr 1.8763e-03 eta 1:39:16
epoch [14/50] batch [200/200] time 0.839 (0.819) data 0.000 (0.003) loss 0.8314 (0.4968) lr 1.8443e-03 eta 1:38:17
epoch [15/50] batch [20/200] time 0.852 (0.898) data 0.000 (0.057) loss 0.0021 (0.3357) lr 1.8443e-03 eta 1:47:29
epoch [15/50] batch [40/200] time 0.842 (0.869) data 0.000 (0.029) loss 0.1848 (0.5130) lr 1.8443e-03 eta 1:43:39
epoch [15/50] batch [60/200] time 0.842 (0.860) data 0.001 (0.020) loss 1.3944 (0.5651) lr 1.8443e-03 eta 1:42:19
epoch [15/50] batch [80/200] time 0.819 (0.843) data 0.000 (0.015) loss 0.6575 (0.6187) lr 1.8443e-03 eta 1:40:01
epoch [15/50] batch [100/200] time 0.804 (0.838) data 0.000 (0.012) loss 0.3619 (0.5548) lr 1.8443e-03 eta 1:39:12
epoch [15/50] batch [120/200] time 0.833 (0.835) data 0.000 (0.010) loss 0.0415 (0.5175) lr 1.8443e-03 eta 1:38:30
epoch [15/50] batch [140/200] time 0.812 (0.832) data 0.000 (0.009) loss 3.4955 (0.5128) lr 1.8443e-03 eta 1:37:56
epoch [15/50] batch [160/200] time 0.807 (0.830) data 0.000 (0.008) loss 0.0614 (0.5114) lr 1.8443e-03 eta 1:37:25
epoch [15/50] batch [180/200] time 0.812 (0.829) data 0.003 (0.007) loss 2.3041 (0.5174) lr 1.8443e-03 eta 1:37:00
epoch [15/50] batch [200/200] time 0.815 (0.828) data 0.000 (0.006) loss 0.0212 (0.5314) lr 1.8090e-03 eta 1:36:36
epoch [16/50] batch [20/200] time 0.813 (0.843) data 0.000 (0.027) loss 0.7793 (0.5761) lr 1.8090e-03 eta 1:38:05
epoch [16/50] batch [40/200] time 0.816 (0.830) data 0.000 (0.014) loss 0.0021 (0.4530) lr 1.8090e-03 eta 1:36:19
epoch [16/50] batch [60/200] time 0.825 (0.826) data 0.000 (0.009) loss 0.0072 (0.5197) lr 1.8090e-03 eta 1:35:30
epoch [16/50] batch [80/200] time 0.820 (0.826) data 0.000 (0.007) loss 0.1323 (0.4742) lr 1.8090e-03 eta 1:35:16
epoch [16/50] batch [100/200] time 0.810 (0.824) data 0.000 (0.006) loss 0.2286 (0.4517) lr 1.8090e-03 eta 1:34:48
epoch [16/50] batch [120/200] time 0.814 (0.824) data 0.000 (0.005) loss 0.0140 (0.4346) lr 1.8090e-03 eta 1:34:28
epoch [16/50] batch [140/200] time 0.822 (0.823) data 0.000 (0.004) loss 1.6203 (0.4465) lr 1.8090e-03 eta 1:34:05
epoch [16/50] batch [160/200] time 0.812 (0.822) data 0.000 (0.004) loss 0.0364 (0.4389) lr 1.8090e-03 eta 1:33:44
epoch [16/50] batch [180/200] time 0.812 (0.822) data 0.000 (0.003) loss 1.0323 (0.4423) lr 1.8090e-03 eta 1:33:24
epoch [16/50] batch [200/200] time 0.819 (0.822) data 0.000 (0.003) loss 1.5589 (0.4598) lr 1.7705e-03 eta 1:33:07
epoch [17/50] batch [20/200] time 0.811 (0.848) data 0.000 (0.029) loss 0.0221 (0.4837) lr 1.7705e-03 eta 1:35:52
epoch [17/50] batch [40/200] time 0.807 (0.835) data 0.000 (0.015) loss 1.1747 (0.4776) lr 1.7705e-03 eta 1:34:05
epoch [17/50] batch [60/200] time 0.837 (0.829) data 0.001 (0.010) loss 0.2582 (0.4452) lr 1.7705e-03 eta 1:33:05
epoch [17/50] batch [80/200] time 0.808 (0.827) data 0.000 (0.007) loss 1.6156 (0.4475) lr 1.7705e-03 eta 1:32:36
epoch [17/50] batch [100/200] time 0.817 (0.825) data 0.000 (0.006) loss 1.5301 (0.4759) lr 1.7705e-03 eta 1:32:06
epoch [17/50] batch [120/200] time 0.815 (0.823) data 0.000 (0.005) loss 1.6569 (0.4688) lr 1.7705e-03 eta 1:31:39
epoch [17/50] batch [140/200] time 0.819 (0.823) data 0.000 (0.004) loss 1.1834 (0.4997) lr 1.7705e-03 eta 1:31:19
epoch [17/50] batch [160/200] time 0.815 (0.822) data 0.000 (0.004) loss 0.2226 (0.5162) lr 1.7705e-03 eta 1:30:59
epoch [17/50] batch [180/200] time 0.829 (0.822) data 0.000 (0.003) loss 0.5464 (0.5004) lr 1.7705e-03 eta 1:30:39
epoch [17/50] batch [200/200] time 0.827 (0.821) data 0.000 (0.003) loss 0.0372 (0.4933) lr 1.7290e-03 eta 1:30:19
epoch [18/50] batch [20/200] time 0.821 (0.843) data 0.000 (0.028) loss 0.2230 (0.5808) lr 1.7290e-03 eta 1:32:27
epoch [18/50] batch [40/200] time 0.823 (0.834) data 0.000 (0.014) loss 0.0010 (0.5440) lr 1.7290e-03 eta 1:31:12
epoch [18/50] batch [60/200] time 0.818 (0.828) data 0.001 (0.010) loss 0.0167 (0.4902) lr 1.7290e-03 eta 1:30:17
epoch [18/50] batch [80/200] time 0.826 (0.826) data 0.000 (0.007) loss 0.0512 (0.4689) lr 1.7290e-03 eta 1:29:44
epoch [18/50] batch [100/200] time 0.813 (0.823) data 0.000 (0.006) loss 0.0666 (0.5117) lr 1.7290e-03 eta 1:29:11
epoch [18/50] batch [120/200] time 0.804 (0.823) data 0.000 (0.005) loss 0.0904 (0.5271) lr 1.7290e-03 eta 1:28:50
epoch [18/50] batch [140/200] time 0.818 (0.822) data 0.000 (0.004) loss 0.6180 (0.5195) lr 1.7290e-03 eta 1:28:29
epoch [18/50] batch [160/200] time 0.819 (0.821) data 0.000 (0.004) loss 0.6767 (0.5138) lr 1.7290e-03 eta 1:28:05
epoch [18/50] batch [180/200] time 0.818 (0.820) data 0.000 (0.003) loss 0.7658 (0.5039) lr 1.7290e-03 eta 1:27:45
epoch [18/50] batch [200/200] time 0.831 (0.820) data 0.000 (0.003) loss 0.0051 (0.5126) lr 1.6845e-03 eta 1:27:28
epoch [19/50] batch [20/200] time 0.809 (0.843) data 0.000 (0.028) loss 1.1072 (0.5498) lr 1.6845e-03 eta 1:29:39
epoch [19/50] batch [40/200] time 0.817 (0.829) data 0.000 (0.014) loss 0.7290 (0.4751) lr 1.6845e-03 eta 1:27:52
epoch [19/50] batch [60/200] time 0.824 (0.825) data 0.000 (0.010) loss 0.0156 (0.4183) lr 1.6845e-03 eta 1:27:09
epoch [19/50] batch [80/200] time 0.799 (0.823) data 0.000 (0.007) loss 0.9106 (0.4394) lr 1.6845e-03 eta 1:26:39
epoch [19/50] batch [100/200] time 0.822 (0.821) data 0.000 (0.006) loss 0.8431 (0.4294) lr 1.6845e-03 eta 1:26:14
epoch [19/50] batch [120/200] time 0.808 (0.820) data 0.000 (0.005) loss 0.0188 (0.3975) lr 1.6845e-03 eta 1:25:49
epoch [19/50] batch [140/200] time 0.825 (0.820) data 0.000 (0.004) loss 0.7505 (0.4448) lr 1.6845e-03 eta 1:25:31
epoch [19/50] batch [160/200] time 0.825 (0.820) data 0.000 (0.004) loss 1.6862 (0.4513) lr 1.6845e-03 eta 1:25:19
epoch [19/50] batch [180/200] time 0.823 (0.820) data 0.000 (0.004) loss 0.2407 (0.4685) lr 1.6845e-03 eta 1:25:02
epoch [19/50] batch [200/200] time 0.813 (0.820) data 0.000 (0.003) loss 0.5267 (0.4590) lr 1.6374e-03 eta 1:24:43
epoch [20/50] batch [20/200] time 0.839 (0.813) data 0.000 (0.029) loss 1.2501 (0.4556) lr 1.6374e-03 eta 1:23:47
epoch [20/50] batch [40/200] time 0.841 (0.825) data 0.000 (0.015) loss 0.3734 (0.4607) lr 1.6374e-03 eta 1:24:44
epoch [20/50] batch [60/200] time 0.839 (0.830) data 0.001 (0.010) loss 0.4018 (0.4253) lr 1.6374e-03 eta 1:24:53
epoch [20/50] batch [80/200] time 0.846 (0.832) data 0.000 (0.008) loss 0.4265 (0.4270) lr 1.6374e-03 eta 1:24:53
epoch [20/50] batch [100/200] time 0.836 (0.812) data 0.000 (0.006) loss 0.1865 (0.4373) lr 1.6374e-03 eta 1:22:31
epoch [20/50] batch [120/200] time 0.859 (0.817) data 0.000 (0.005) loss 0.0212 (0.4233) lr 1.6374e-03 eta 1:22:45
epoch [20/50] batch [140/200] time 0.839 (0.820) data 0.000 (0.005) loss 0.0509 (0.4337) lr 1.6374e-03 eta 1:22:47
epoch [20/50] batch [160/200] time 0.847 (0.822) data 0.000 (0.004) loss 0.4515 (0.4198) lr 1.6374e-03 eta 1:22:46
epoch [20/50] batch [180/200] time 0.818 (0.824) data 0.000 (0.004) loss 0.8403 (0.4246) lr 1.6374e-03 eta 1:22:41
epoch [20/50] batch [200/200] time 0.827 (0.818) data 0.000 (0.003) loss 0.5327 (0.4102) lr 1.5878e-03 eta 1:21:46
epoch [21/50] batch [20/200] time 0.811 (0.843) data 0.000 (0.029) loss 0.6214 (0.5068) lr 1.5878e-03 eta 1:24:00
epoch [21/50] batch [40/200] time 0.808 (0.829) data 0.000 (0.014) loss 0.0097 (0.4775) lr 1.5878e-03 eta 1:22:21
epoch [21/50] batch [60/200] time 0.822 (0.824) data 0.000 (0.010) loss 0.0039 (0.4433) lr 1.5878e-03 eta 1:21:35
epoch [21/50] batch [80/200] time 0.822 (0.822) data 0.000 (0.007) loss 0.9502 (0.4291) lr 1.5878e-03 eta 1:21:08
epoch [21/50] batch [100/200] time 0.828 (0.820) data 0.000 (0.006) loss 0.3981 (0.4817) lr 1.5878e-03 eta 1:20:39
epoch [21/50] batch [120/200] time 0.817 (0.820) data 0.000 (0.005) loss 0.7869 (0.5016) lr 1.5878e-03 eta 1:20:20
epoch [21/50] batch [140/200] time 0.805 (0.819) data 0.000 (0.004) loss 0.0183 (0.4756) lr 1.5878e-03 eta 1:19:59
epoch [21/50] batch [160/200] time 0.822 (0.819) data 0.000 (0.004) loss 0.0141 (0.4823) lr 1.5878e-03 eta 1:19:44
epoch [21/50] batch [180/200] time 0.816 (0.819) data 0.000 (0.003) loss 1.2519 (0.4835) lr 1.5878e-03 eta 1:19:26
epoch [21/50] batch [200/200] time 0.822 (0.819) data 0.000 (0.003) loss 0.8900 (0.4623) lr 1.5358e-03 eta 1:19:09
epoch [22/50] batch [20/200] time 0.817 (0.844) data 0.000 (0.029) loss 0.1799 (0.4455) lr 1.5358e-03 eta 1:21:15
epoch [22/50] batch [40/200] time 0.805 (0.828) data 0.000 (0.015) loss 0.0771 (0.4400) lr 1.5358e-03 eta 1:19:30
epoch [22/50] batch [60/200] time 0.818 (0.823) data 0.000 (0.010) loss 0.2634 (0.4622) lr 1.5358e-03 eta 1:18:43
epoch [22/50] batch [80/200] time 0.808 (0.822) data 0.000 (0.007) loss 0.3828 (0.4712) lr 1.5358e-03 eta 1:18:23
epoch [22/50] batch [100/200] time 0.793 (0.820) data 0.000 (0.006) loss 1.1477 (0.4698) lr 1.5358e-03 eta 1:17:53
epoch [22/50] batch [120/200] time 0.822 (0.820) data 0.000 (0.005) loss 1.2437 (0.4579) lr 1.5358e-03 eta 1:17:36
epoch [22/50] batch [140/200] time 0.807 (0.820) data 0.000 (0.004) loss 1.0077 (0.4815) lr 1.5358e-03 eta 1:17:21
epoch [22/50] batch [160/200] time 0.815 (0.819) data 0.000 (0.004) loss 0.4909 (0.4962) lr 1.5358e-03 eta 1:17:00
epoch [22/50] batch [180/200] time 0.825 (0.819) data 0.000 (0.003) loss 1.4135 (0.4822) lr 1.5358e-03 eta 1:16:41
epoch [22/50] batch [200/200] time 0.808 (0.819) data 0.000 (0.003) loss 0.0783 (0.4841) lr 1.4818e-03 eta 1:16:26
epoch [23/50] batch [20/200] time 0.811 (0.841) data 0.000 (0.028) loss 0.0861 (0.4383) lr 1.4818e-03 eta 1:18:13
epoch [23/50] batch [40/200] time 0.812 (0.829) data 0.000 (0.014) loss 0.2634 (0.4003) lr 1.4818e-03 eta 1:16:48
epoch [23/50] batch [60/200] time 0.802 (0.823) data 0.001 (0.010) loss 0.2878 (0.3789) lr 1.4818e-03 eta 1:16:01
epoch [23/50] batch [80/200] time 0.821 (0.822) data 0.000 (0.007) loss 0.0840 (0.3908) lr 1.4818e-03 eta 1:15:39
epoch [23/50] batch [100/200] time 0.821 (0.821) data 0.000 (0.006) loss 0.5938 (0.3928) lr 1.4818e-03 eta 1:15:16
epoch [23/50] batch [120/200] time 0.819 (0.820) data 0.000 (0.005) loss 1.1888 (0.4037) lr 1.4818e-03 eta 1:14:55
epoch [23/50] batch [140/200] time 0.823 (0.820) data 0.000 (0.004) loss 1.2144 (0.4329) lr 1.4818e-03 eta 1:14:39
epoch [23/50] batch [160/200] time 0.821 (0.820) data 0.000 (0.004) loss 0.0262 (0.4228) lr 1.4818e-03 eta 1:14:18
epoch [23/50] batch [180/200] time 0.818 (0.819) data 0.000 (0.003) loss 0.1280 (0.4348) lr 1.4818e-03 eta 1:14:01
epoch [23/50] batch [200/200] time 0.817 (0.820) data 0.000 (0.003) loss 0.0554 (0.4192) lr 1.4258e-03 eta 1:13:45
epoch [24/50] batch [20/200] time 0.813 (0.846) data 0.000 (0.031) loss 0.8944 (0.4307) lr 1.4258e-03 eta 1:15:48
epoch [24/50] batch [40/200] time 0.820 (0.832) data 0.000 (0.016) loss 0.0235 (0.4737) lr 1.4258e-03 eta 1:14:18
epoch [24/50] batch [60/200] time 0.820 (0.827) data 0.002 (0.011) loss 0.3158 (0.4127) lr 1.4258e-03 eta 1:13:38
epoch [24/50] batch [80/200] time 0.821 (0.824) data 0.000 (0.008) loss 0.3424 (0.4077) lr 1.4258e-03 eta 1:13:02
epoch [24/50] batch [100/200] time 0.803 (0.824) data 0.000 (0.007) loss 0.2026 (0.4286) lr 1.4258e-03 eta 1:12:46
epoch [24/50] batch [120/200] time 0.820 (0.823) data 0.000 (0.006) loss 0.9638 (0.3973) lr 1.4258e-03 eta 1:12:26
epoch [24/50] batch [140/200] time 0.820 (0.822) data 0.008 (0.005) loss 0.0069 (0.4143) lr 1.4258e-03 eta 1:12:05
epoch [24/50] batch [160/200] time 0.815 (0.822) data 0.000 (0.004) loss 0.6952 (0.4125) lr 1.4258e-03 eta 1:11:46
epoch [24/50] batch [180/200] time 0.813 (0.822) data 0.000 (0.004) loss 1.5308 (0.4312) lr 1.4258e-03 eta 1:11:29
epoch [24/50] batch [200/200] time 0.819 (0.822) data 0.000 (0.004) loss 0.0038 (0.4046) lr 1.3681e-03 eta 1:11:13
epoch [25/50] batch [20/200] time 0.815 (0.848) data 0.000 (0.031) loss 0.7295 (0.4426) lr 1.3681e-03 eta 1:13:11
epoch [25/50] batch [40/200] time 0.816 (0.832) data 0.000 (0.015) loss 0.9379 (0.5163) lr 1.3681e-03 eta 1:11:33
epoch [25/50] batch [60/200] time 0.816 (0.827) data 0.005 (0.011) loss 0.0066 (0.4858) lr 1.3681e-03 eta 1:10:51
epoch [25/50] batch [80/200] time 0.669 (0.824) data 0.000 (0.008) loss 0.5756 (0.4835) lr 1.3681e-03 eta 1:10:16
epoch [25/50] batch [100/200] time 0.812 (0.823) data 0.003 (0.007) loss 0.6359 (0.4888) lr 1.3681e-03 eta 1:09:56
epoch [25/50] batch [120/200] time 0.855 (0.815) data 0.000 (0.006) loss 0.0074 (0.4786) lr 1.3681e-03 eta 1:09:00
epoch [25/50] batch [140/200] time 0.852 (0.819) data 0.000 (0.005) loss 1.4232 (0.5053) lr 1.3681e-03 eta 1:09:02
epoch [25/50] batch [160/200] time 0.836 (0.821) data 0.000 (0.004) loss 0.7958 (0.4811) lr 1.3681e-03 eta 1:08:59
epoch [25/50] batch [180/200] time 0.839 (0.823) data 0.000 (0.004) loss 0.7435 (0.4659) lr 1.3681e-03 eta 1:08:52
epoch [25/50] batch [200/200] time 0.732 (0.824) data 0.000 (0.004) loss 0.0661 (0.4457) lr 1.3090e-03 eta 1:08:41
epoch [26/50] batch [20/200] time 0.769 (0.810) data 0.000 (0.030) loss 0.6146 (0.2999) lr 1.3090e-03 eta 1:07:12
epoch [26/50] batch [40/200] time 0.846 (0.826) data 0.009 (0.016) loss 0.2205 (0.3478) lr 1.3090e-03 eta 1:08:15
epoch [26/50] batch [60/200] time 0.842 (0.831) data 0.000 (0.011) loss 0.0028 (0.4277) lr 1.3090e-03 eta 1:08:22
epoch [26/50] batch [80/200] time 0.853 (0.833) data 0.001 (0.008) loss 0.0214 (0.4245) lr 1.3090e-03 eta 1:08:19
epoch [26/50] batch [100/200] time 0.809 (0.823) data 0.000 (0.007) loss 0.1779 (0.4403) lr 1.3090e-03 eta 1:07:11
epoch [26/50] batch [120/200] time 0.810 (0.821) data 0.000 (0.006) loss 0.0054 (0.4344) lr 1.3090e-03 eta 1:06:48
epoch [26/50] batch [140/200] time 0.810 (0.821) data 0.000 (0.005) loss 0.0088 (0.4407) lr 1.3090e-03 eta 1:06:28
epoch [26/50] batch [160/200] time 0.815 (0.820) data 0.000 (0.004) loss 1.2852 (0.4641) lr 1.3090e-03 eta 1:06:07
epoch [26/50] batch [180/200] time 0.819 (0.820) data 0.000 (0.004) loss 0.1069 (0.4531) lr 1.3090e-03 eta 1:05:50
epoch [26/50] batch [200/200] time 0.823 (0.820) data 0.000 (0.004) loss 0.0312 (0.4545) lr 1.2487e-03 eta 1:05:35
epoch [27/50] batch [20/200] time 0.814 (0.845) data 0.000 (0.032) loss 0.2647 (0.1957) lr 1.2487e-03 eta 1:07:16
epoch [27/50] batch [40/200] time 0.828 (0.830) data 0.000 (0.017) loss 0.5020 (0.3503) lr 1.2487e-03 eta 1:05:51
epoch [27/50] batch [60/200] time 0.863 (0.828) data 0.001 (0.011) loss 0.2264 (0.3272) lr 1.2487e-03 eta 1:05:26
epoch [27/50] batch [80/200] time 0.823 (0.826) data 0.000 (0.009) loss 0.4610 (0.3766) lr 1.2487e-03 eta 1:04:58
epoch [27/50] batch [100/200] time 0.813 (0.825) data 0.000 (0.007) loss 0.3356 (0.3709) lr 1.2487e-03 eta 1:04:39
epoch [27/50] batch [120/200] time 0.824 (0.825) data 0.000 (0.006) loss 1.0036 (0.3891) lr 1.2487e-03 eta 1:04:22
epoch [27/50] batch [140/200] time 0.822 (0.825) data 0.003 (0.005) loss 0.2053 (0.4322) lr 1.2487e-03 eta 1:04:02
epoch [27/50] batch [160/200] time 0.814 (0.824) data 0.000 (0.005) loss 1.8213 (0.4323) lr 1.2487e-03 eta 1:03:43
epoch [27/50] batch [180/200] time 0.832 (0.823) data 0.000 (0.004) loss 0.3697 (0.4292) lr 1.2487e-03 eta 1:03:24
epoch [27/50] batch [200/200] time 0.812 (0.823) data 0.000 (0.004) loss 0.7098 (0.4228) lr 1.1874e-03 eta 1:03:04
epoch [28/50] batch [20/200] time 0.820 (0.853) data 0.000 (0.027) loss 1.6665 (0.4810) lr 1.1874e-03 eta 1:05:05
epoch [28/50] batch [40/200] time 0.818 (0.837) data 0.000 (0.014) loss 0.5849 (0.4300) lr 1.1874e-03 eta 1:03:36
epoch [28/50] batch [60/200] time 0.798 (0.830) data 0.001 (0.009) loss 0.1781 (0.5150) lr 1.1874e-03 eta 1:02:48
epoch [28/50] batch [80/200] time 0.822 (0.828) data 0.000 (0.007) loss 0.0531 (0.4747) lr 1.1874e-03 eta 1:02:21
epoch [28/50] batch [100/200] time 0.841 (0.826) data 0.000 (0.006) loss 0.5492 (0.4745) lr 1.1874e-03 eta 1:01:58
epoch [28/50] batch [120/200] time 0.820 (0.825) data 0.000 (0.005) loss 0.2657 (0.4469) lr 1.1874e-03 eta 1:01:34
epoch [28/50] batch [140/200] time 0.838 (0.825) data 0.000 (0.004) loss 0.0807 (0.4395) lr 1.1874e-03 eta 1:01:18
epoch [28/50] batch [160/200] time 0.818 (0.824) data 0.000 (0.004) loss 0.7718 (0.4469) lr 1.1874e-03 eta 1:00:57
epoch [28/50] batch [180/200] time 0.807 (0.823) data 0.000 (0.003) loss 0.0169 (0.4364) lr 1.1874e-03 eta 1:00:38
epoch [28/50] batch [200/200] time 0.827 (0.823) data 0.000 (0.003) loss 0.0031 (0.4446) lr 1.1253e-03 eta 1:00:20
epoch [29/50] batch [20/200] time 0.817 (0.843) data 0.000 (0.028) loss 0.2813 (0.5026) lr 1.1253e-03 eta 1:01:32
epoch [29/50] batch [40/200] time 0.824 (0.829) data 0.000 (0.014) loss 1.4611 (0.4554) lr 1.1253e-03 eta 1:00:16
epoch [29/50] batch [60/200] time 0.819 (0.828) data 0.000 (0.010) loss 0.0199 (0.4145) lr 1.1253e-03 eta 0:59:54
epoch [29/50] batch [80/200] time 0.822 (0.826) data 0.000 (0.007) loss 0.2121 (0.4343) lr 1.1253e-03 eta 0:59:29
epoch [29/50] batch [100/200] time 0.815 (0.824) data 0.000 (0.006) loss 0.3335 (0.4299) lr 1.1253e-03 eta 0:59:04
epoch [29/50] batch [120/200] time 0.835 (0.823) data 0.000 (0.005) loss 0.0301 (0.4337) lr 1.1253e-03 eta 0:58:44
epoch [29/50] batch [140/200] time 0.824 (0.823) data 0.000 (0.004) loss 0.0186 (0.4420) lr 1.1253e-03 eta 0:58:24
epoch [29/50] batch [160/200] time 0.820 (0.823) data 0.000 (0.004) loss 0.0018 (0.4532) lr 1.1253e-03 eta 0:58:11
epoch [29/50] batch [180/200] time 0.820 (0.823) data 0.000 (0.004) loss 0.9137 (0.4557) lr 1.1253e-03 eta 0:57:52
epoch [29/50] batch [200/200] time 0.819 (0.822) data 0.000 (0.003) loss 0.0094 (0.4307) lr 1.0628e-03 eta 0:57:32
epoch [30/50] batch [20/200] time 0.814 (0.854) data 0.000 (0.028) loss 0.9750 (0.4275) lr 1.0628e-03 eta 0:59:28
epoch [30/50] batch [40/200] time 0.830 (0.838) data 0.000 (0.015) loss 0.0341 (0.3423) lr 1.0628e-03 eta 0:58:04
epoch [30/50] batch [60/200] time 0.826 (0.835) data 0.001 (0.010) loss 0.0268 (0.3376) lr 1.0628e-03 eta 0:57:34
epoch [30/50] batch [80/200] time 0.820 (0.832) data 0.000 (0.008) loss 0.2700 (0.3881) lr 1.0628e-03 eta 0:57:06
epoch [30/50] batch [100/200] time 0.818 (0.830) data 0.000 (0.006) loss 0.0007 (0.3799) lr 1.0628e-03 eta 0:56:43
epoch [30/50] batch [120/200] time 0.829 (0.828) data 0.000 (0.005) loss 0.3446 (0.3715) lr 1.0628e-03 eta 0:56:16
epoch [30/50] batch [140/200] time 0.822 (0.827) data 0.000 (0.005) loss 0.0268 (0.3713) lr 1.0628e-03 eta 0:55:56
epoch [30/50] batch [160/200] time 0.798 (0.826) data 0.000 (0.004) loss 0.7442 (0.4046) lr 1.0628e-03 eta 0:55:35
epoch [30/50] batch [180/200] time 0.824 (0.824) data 0.000 (0.004) loss 1.6450 (0.4059) lr 1.0628e-03 eta 0:55:13
epoch [30/50] batch [200/200] time 0.820 (0.824) data 0.000 (0.003) loss 0.9841 (0.3974) lr 1.0000e-03 eta 0:54:57
epoch [31/50] batch [20/200] time 0.823 (0.803) data 0.000 (0.029) loss 0.1033 (0.4917) lr 1.0000e-03 eta 0:53:16
epoch [31/50] batch [40/200] time 0.848 (0.820) data 0.000 (0.015) loss 0.3966 (0.4193) lr 1.0000e-03 eta 0:54:06
epoch [31/50] batch [60/200] time 0.818 (0.826) data 0.001 (0.010) loss 0.0003 (0.3675) lr 1.0000e-03 eta 0:54:13
epoch [31/50] batch [80/200] time 0.841 (0.829) data 0.000 (0.008) loss 0.0500 (0.3679) lr 1.0000e-03 eta 0:54:09
epoch [31/50] batch [100/200] time 0.691 (0.829) data 0.000 (0.007) loss 0.4297 (0.3634) lr 1.0000e-03 eta 0:53:52
epoch [31/50] batch [120/200] time 0.558 (0.817) data 0.000 (0.006) loss 1.4326 (0.3942) lr 1.0000e-03 eta 0:52:50
epoch [31/50] batch [140/200] time 0.824 (0.816) data 0.000 (0.005) loss 0.6717 (0.4296) lr 1.0000e-03 eta 0:52:30
epoch [31/50] batch [160/200] time 0.820 (0.819) data 0.000 (0.004) loss 0.1739 (0.4355) lr 1.0000e-03 eta 0:52:24
epoch [31/50] batch [180/200] time 0.841 (0.821) data 0.000 (0.004) loss 0.0111 (0.4494) lr 1.0000e-03 eta 0:52:16
epoch [31/50] batch [200/200] time 0.850 (0.823) data 0.014 (0.004) loss 1.1346 (0.4464) lr 9.3721e-04 eta 0:52:08
epoch [32/50] batch [20/200] time 0.814 (0.845) data 0.000 (0.036) loss 0.0187 (0.3325) lr 9.3721e-04 eta 0:53:14
epoch [32/50] batch [40/200] time 0.798 (0.831) data 0.000 (0.018) loss 0.0537 (0.4204) lr 9.3721e-04 eta 0:52:04
epoch [32/50] batch [60/200] time 0.814 (0.827) data 0.001 (0.012) loss 0.0198 (0.4082) lr 9.3721e-04 eta 0:51:32
epoch [32/50] batch [80/200] time 0.840 (0.827) data 0.008 (0.009) loss 0.1763 (0.4543) lr 9.3721e-04 eta 0:51:18
epoch [32/50] batch [100/200] time 0.828 (0.828) data 0.000 (0.008) loss 0.0408 (0.4747) lr 9.3721e-04 eta 0:51:03
epoch [32/50] batch [120/200] time 0.812 (0.827) data 0.001 (0.007) loss 0.0070 (0.4431) lr 9.3721e-04 eta 0:50:42
epoch [32/50] batch [140/200] time 0.811 (0.827) data 0.000 (0.006) loss 0.1874 (0.4542) lr 9.3721e-04 eta 0:50:25
epoch [32/50] batch [160/200] time 0.816 (0.826) data 0.000 (0.005) loss 0.0490 (0.4377) lr 9.3721e-04 eta 0:50:08
epoch [32/50] batch [180/200] time 0.827 (0.826) data 0.000 (0.005) loss 0.3250 (0.4431) lr 9.3721e-04 eta 0:49:49
epoch [32/50] batch [200/200] time 0.812 (0.825) data 0.000 (0.004) loss 0.1301 (0.4534) lr 8.7467e-04 eta 0:49:30
epoch [33/50] batch [20/200] time 0.814 (0.844) data 0.000 (0.028) loss 0.0120 (0.3789) lr 8.7467e-04 eta 0:50:21
epoch [33/50] batch [40/200] time 0.821 (0.830) data 0.000 (0.015) loss 0.2342 (0.4290) lr 8.7467e-04 eta 0:49:13
epoch [33/50] batch [60/200] time 0.809 (0.827) data 0.001 (0.010) loss 0.2528 (0.4562) lr 8.7467e-04 eta 0:48:47
epoch [33/50] batch [80/200] time 0.795 (0.826) data 0.000 (0.008) loss 0.9098 (0.4443) lr 8.7467e-04 eta 0:48:27
epoch [33/50] batch [100/200] time 0.812 (0.824) data 0.000 (0.006) loss 0.3201 (0.4717) lr 8.7467e-04 eta 0:48:02
epoch [33/50] batch [120/200] time 0.819 (0.823) data 0.000 (0.005) loss 1.2327 (0.4978) lr 8.7467e-04 eta 0:47:44
epoch [33/50] batch [140/200] time 0.815 (0.823) data 0.000 (0.005) loss 0.0073 (0.4781) lr 8.7467e-04 eta 0:47:27
epoch [33/50] batch [160/200] time 0.814 (0.822) data 0.000 (0.004) loss 0.0678 (0.4561) lr 8.7467e-04 eta 0:47:06
epoch [33/50] batch [180/200] time 0.836 (0.822) data 0.000 (0.004) loss 0.1597 (0.4688) lr 8.7467e-04 eta 0:46:51
epoch [33/50] batch [200/200] time 0.811 (0.822) data 0.000 (0.003) loss 0.0083 (0.4787) lr 8.1262e-04 eta 0:46:33
epoch [34/50] batch [20/200] time 0.818 (0.849) data 0.000 (0.029) loss 0.0004 (0.5992) lr 8.1262e-04 eta 0:47:51
epoch [34/50] batch [40/200] time 0.815 (0.832) data 0.001 (0.015) loss 0.1160 (0.4299) lr 8.1262e-04 eta 0:46:34
epoch [34/50] batch [60/200] time 0.817 (0.828) data 0.001 (0.010) loss 0.1548 (0.4725) lr 8.1262e-04 eta 0:46:06
epoch [34/50] batch [80/200] time 0.813 (0.826) data 0.000 (0.008) loss 0.2175 (0.4107) lr 8.1262e-04 eta 0:45:43
epoch [34/50] batch [100/200] time 0.807 (0.825) data 0.000 (0.006) loss 0.8847 (0.3875) lr 8.1262e-04 eta 0:45:21
epoch [34/50] batch [120/200] time 0.829 (0.825) data 0.001 (0.005) loss 0.1212 (0.3797) lr 8.1262e-04 eta 0:45:05
epoch [34/50] batch [140/200] time 0.823 (0.824) data 0.000 (0.005) loss 0.2738 (0.3766) lr 8.1262e-04 eta 0:44:47
epoch [34/50] batch [160/200] time 0.822 (0.824) data 0.000 (0.004) loss 0.3488 (0.3718) lr 8.1262e-04 eta 0:44:28
epoch [34/50] batch [180/200] time 0.798 (0.823) data 0.000 (0.004) loss 0.2325 (0.3733) lr 8.1262e-04 eta 0:44:09
epoch [34/50] batch [200/200] time 0.815 (0.823) data 0.000 (0.003) loss 0.0956 (0.3764) lr 7.5131e-04 eta 0:43:53
epoch [35/50] batch [20/200] time 0.836 (0.847) data 0.000 (0.029) loss 0.0445 (0.2821) lr 7.5131e-04 eta 0:44:52
epoch [35/50] batch [40/200] time 0.821 (0.832) data 0.000 (0.015) loss 1.1177 (0.3531) lr 7.5131e-04 eta 0:43:47
epoch [35/50] batch [60/200] time 0.811 (0.826) data 0.001 (0.010) loss 0.7424 (0.3898) lr 7.5131e-04 eta 0:43:14
epoch [35/50] batch [80/200] time 0.813 (0.825) data 0.000 (0.008) loss 1.3547 (0.4266) lr 7.5131e-04 eta 0:42:53
epoch [35/50] batch [100/200] time 0.826 (0.824) data 0.000 (0.006) loss 0.0178 (0.3992) lr 7.5131e-04 eta 0:42:33
epoch [35/50] batch [120/200] time 0.813 (0.822) data 0.000 (0.005) loss 0.0455 (0.3796) lr 7.5131e-04 eta 0:42:13
epoch [35/50] batch [140/200] time 0.818 (0.822) data 0.008 (0.005) loss 0.0154 (0.4000) lr 7.5131e-04 eta 0:41:55
epoch [35/50] batch [160/200] time 0.808 (0.821) data 0.000 (0.004) loss 0.0020 (0.4091) lr 7.5131e-04 eta 0:41:37
epoch [35/50] batch [180/200] time 0.825 (0.821) data 0.000 (0.004) loss 0.0472 (0.3975) lr 7.5131e-04 eta 0:41:20
epoch [35/50] batch [200/200] time 0.818 (0.822) data 0.000 (0.004) loss 0.2148 (0.4184) lr 6.9098e-04 eta 0:41:07
epoch [36/50] batch [20/200] time 0.827 (0.857) data 0.000 (0.037) loss 1.0987 (0.4845) lr 6.9098e-04 eta 0:42:34
epoch [36/50] batch [40/200] time 0.809 (0.834) data 0.000 (0.019) loss 1.3108 (0.4627) lr 6.9098e-04 eta 0:41:08
epoch [36/50] batch [60/200] time 0.819 (0.829) data 0.000 (0.013) loss 0.3966 (0.4024) lr 6.9098e-04 eta 0:40:38
epoch [36/50] batch [80/200] time 0.812 (0.826) data 0.000 (0.010) loss 1.5630 (0.4214) lr 6.9098e-04 eta 0:40:12
epoch [36/50] batch [100/200] time 0.826 (0.826) data 0.000 (0.008) loss 0.0021 (0.4259) lr 6.9098e-04 eta 0:39:54
epoch [36/50] batch [120/200] time 0.847 (0.819) data 0.000 (0.007) loss 0.0639 (0.4260) lr 6.9098e-04 eta 0:39:19
epoch [36/50] batch [140/200] time 0.842 (0.822) data 0.000 (0.006) loss 0.0062 (0.4025) lr 6.9098e-04 eta 0:39:12
epoch [36/50] batch [160/200] time 0.849 (0.824) data 0.000 (0.005) loss 0.0931 (0.3996) lr 6.9098e-04 eta 0:39:01
epoch [36/50] batch [180/200] time 0.835 (0.826) data 0.000 (0.005) loss 0.2388 (0.4097) lr 6.9098e-04 eta 0:38:49
epoch [36/50] batch [200/200] time 0.632 (0.826) data 0.000 (0.004) loss 1.5645 (0.4200) lr 6.3188e-04 eta 0:38:33
epoch [37/50] batch [20/200] time 0.822 (0.850) data 0.000 (0.030) loss 0.1319 (0.3580) lr 6.3188e-04 eta 0:39:23
epoch [37/50] batch [40/200] time 0.851 (0.821) data 0.005 (0.015) loss 1.8782 (0.4610) lr 6.3188e-04 eta 0:37:47
epoch [37/50] batch [60/200] time 0.849 (0.826) data 0.001 (0.011) loss 0.2243 (0.3847) lr 6.3188e-04 eta 0:37:44
epoch [37/50] batch [80/200] time 0.839 (0.830) data 0.000 (0.008) loss 0.0082 (0.4040) lr 6.3188e-04 eta 0:37:37
epoch [37/50] batch [100/200] time 0.828 (0.831) data 0.000 (0.007) loss 0.0866 (0.3783) lr 6.3188e-04 eta 0:37:23
epoch [37/50] batch [120/200] time 0.786 (0.823) data 0.000 (0.006) loss 0.2188 (0.3936) lr 6.3188e-04 eta 0:36:45
epoch [37/50] batch [140/200] time 0.815 (0.822) data 0.000 (0.005) loss 0.5329 (0.4555) lr 6.3188e-04 eta 0:36:26
epoch [37/50] batch [160/200] time 0.863 (0.821) data 0.000 (0.005) loss 0.1633 (0.4335) lr 6.3188e-04 eta 0:36:07
epoch [37/50] batch [180/200] time 0.812 (0.820) data 0.000 (0.004) loss 0.3874 (0.4364) lr 6.3188e-04 eta 0:35:48
epoch [37/50] batch [200/200] time 0.810 (0.820) data 0.000 (0.004) loss 0.0004 (0.4209) lr 5.7422e-04 eta 0:35:31
epoch [38/50] batch [20/200] time 0.826 (0.850) data 0.000 (0.028) loss 1.0481 (0.4102) lr 5.7422e-04 eta 0:36:33
epoch [38/50] batch [40/200] time 0.807 (0.835) data 0.000 (0.014) loss 1.0582 (0.4958) lr 5.7422e-04 eta 0:35:36
epoch [38/50] batch [60/200] time 0.823 (0.828) data 0.000 (0.010) loss 0.0523 (0.4402) lr 5.7422e-04 eta 0:35:03
epoch [38/50] batch [80/200] time 0.821 (0.827) data 0.000 (0.008) loss 0.0044 (0.4387) lr 5.7422e-04 eta 0:34:45
epoch [38/50] batch [100/200] time 0.812 (0.826) data 0.000 (0.006) loss 0.0003 (0.4999) lr 5.7422e-04 eta 0:34:24
epoch [38/50] batch [120/200] time 0.810 (0.824) data 0.000 (0.005) loss 0.0706 (0.4617) lr 5.7422e-04 eta 0:34:04
epoch [38/50] batch [140/200] time 0.802 (0.823) data 0.000 (0.005) loss 0.0346 (0.4644) lr 5.7422e-04 eta 0:33:43
epoch [38/50] batch [160/200] time 0.803 (0.820) data 0.000 (0.004) loss 1.1759 (0.4521) lr 5.7422e-04 eta 0:33:21
epoch [38/50] batch [180/200] time 0.818 (0.820) data 0.000 (0.004) loss 1.3267 (0.4291) lr 5.7422e-04 eta 0:33:04
epoch [38/50] batch [200/200] time 0.805 (0.819) data 0.000 (0.003) loss 0.0057 (0.4203) lr 5.1825e-04 eta 0:32:46
epoch [39/50] batch [20/200] time 0.814 (0.842) data 0.000 (0.029) loss 0.1547 (0.2612) lr 5.1825e-04 eta 0:33:23
epoch [39/50] batch [40/200] time 0.820 (0.828) data 0.000 (0.014) loss 0.4221 (0.2819) lr 5.1825e-04 eta 0:32:34
epoch [39/50] batch [60/200] time 0.820 (0.823) data 0.001 (0.010) loss 0.1665 (0.3424) lr 5.1825e-04 eta 0:32:06
epoch [39/50] batch [80/200] time 0.825 (0.821) data 0.000 (0.007) loss 0.0065 (0.3555) lr 5.1825e-04 eta 0:31:45
epoch [39/50] batch [100/200] time 0.809 (0.819) data 0.000 (0.006) loss 0.2109 (0.3773) lr 5.1825e-04 eta 0:31:24
epoch [39/50] batch [120/200] time 0.806 (0.818) data 0.000 (0.005) loss 0.6909 (0.3588) lr 5.1825e-04 eta 0:31:04
epoch [39/50] batch [140/200] time 0.830 (0.818) data 0.000 (0.004) loss 0.0053 (0.3565) lr 5.1825e-04 eta 0:30:49
epoch [39/50] batch [160/200] time 0.822 (0.819) data 0.000 (0.004) loss 0.0581 (0.3479) lr 5.1825e-04 eta 0:30:33
epoch [39/50] batch [180/200] time 0.814 (0.818) data 0.000 (0.004) loss 0.0004 (0.3528) lr 5.1825e-04 eta 0:30:16
epoch [39/50] batch [200/200] time 0.819 (0.818) data 0.000 (0.003) loss 0.0076 (0.3664) lr 4.6417e-04 eta 0:30:00
epoch [40/50] batch [20/200] time 0.808 (0.844) data 0.000 (0.030) loss 0.0383 (0.4808) lr 4.6417e-04 eta 0:30:40
epoch [40/50] batch [40/200] time 0.810 (0.830) data 0.000 (0.015) loss 0.0976 (0.4533) lr 4.6417e-04 eta 0:29:53
epoch [40/50] batch [60/200] time 0.828 (0.826) data 0.001 (0.010) loss 0.0081 (0.4274) lr 4.6417e-04 eta 0:29:26
epoch [40/50] batch [80/200] time 0.806 (0.822) data 0.000 (0.008) loss 0.0154 (0.4152) lr 4.6417e-04 eta 0:29:03
epoch [40/50] batch [100/200] time 0.811 (0.821) data 0.000 (0.006) loss 0.0029 (0.3938) lr 4.6417e-04 eta 0:28:44
epoch [40/50] batch [120/200] time 0.821 (0.820) data 0.000 (0.005) loss 0.1655 (0.4289) lr 4.6417e-04 eta 0:28:25
epoch [40/50] batch [140/200] time 0.823 (0.820) data 0.000 (0.005) loss 1.5835 (0.4211) lr 4.6417e-04 eta 0:28:08
epoch [40/50] batch [160/200] time 0.829 (0.820) data 0.000 (0.004) loss 0.6907 (0.4290) lr 4.6417e-04 eta 0:27:52
epoch [40/50] batch [180/200] time 0.815 (0.819) data 0.000 (0.004) loss 0.0436 (0.4113) lr 4.6417e-04 eta 0:27:34
epoch [40/50] batch [200/200] time 0.818 (0.818) data 0.000 (0.003) loss 0.0446 (0.4056) lr 4.1221e-04 eta 0:27:16
epoch [41/50] batch [20/200] time 0.809 (0.849) data 0.000 (0.027) loss 0.0828 (0.4270) lr 4.1221e-04 eta 0:28:01
epoch [41/50] batch [40/200] time 0.830 (0.832) data 0.006 (0.014) loss 0.6744 (0.4004) lr 4.1221e-04 eta 0:27:10
epoch [41/50] batch [60/200] time 0.818 (0.827) data 0.000 (0.009) loss 0.4536 (0.4171) lr 4.1221e-04 eta 0:26:43
epoch [41/50] batch [80/200] time 0.814 (0.825) data 0.000 (0.007) loss 0.5762 (0.3897) lr 4.1221e-04 eta 0:26:24
epoch [41/50] batch [100/200] time 0.823 (0.824) data 0.000 (0.006) loss 0.2828 (0.4213) lr 4.1221e-04 eta 0:26:04
epoch [41/50] batch [120/200] time 0.820 (0.822) data 0.000 (0.005) loss 0.5252 (0.4189) lr 4.1221e-04 eta 0:25:45
epoch [41/50] batch [140/200] time 0.815 (0.822) data 0.000 (0.004) loss 0.3590 (0.4144) lr 4.1221e-04 eta 0:25:28
epoch [41/50] batch [160/200] time 0.813 (0.821) data 0.000 (0.004) loss 0.3841 (0.4146) lr 4.1221e-04 eta 0:25:10
epoch [41/50] batch [180/200] time 0.825 (0.820) data 0.000 (0.003) loss 0.0807 (0.4060) lr 4.1221e-04 eta 0:24:53
epoch [41/50] batch [200/200] time 0.818 (0.821) data 0.000 (0.003) loss 0.0015 (0.4068) lr 3.6258e-04 eta 0:24:37
epoch [42/50] batch [20/200] time 0.562 (0.834) data 0.000 (0.028) loss 0.2778 (0.4185) lr 3.6258e-04 eta 0:24:44
epoch [42/50] batch [40/200] time 0.845 (0.816) data 0.000 (0.014) loss 0.1822 (0.3814) lr 3.6258e-04 eta 0:23:55
epoch [42/50] batch [60/200] time 0.842 (0.822) data 0.001 (0.010) loss 0.2428 (0.4445) lr 3.6258e-04 eta 0:23:51
epoch [42/50] batch [80/200] time 0.836 (0.826) data 0.000 (0.007) loss 0.5195 (0.4490) lr 3.6258e-04 eta 0:23:40
epoch [42/50] batch [100/200] time 0.845 (0.828) data 0.000 (0.006) loss 0.1899 (0.4800) lr 3.6258e-04 eta 0:23:27
epoch [42/50] batch [120/200] time 0.821 (0.823) data 0.000 (0.005) loss 0.4968 (0.4804) lr 3.6258e-04 eta 0:23:02
epoch [42/50] batch [140/200] time 0.541 (0.818) data 0.000 (0.004) loss 0.0601 (0.4852) lr 3.6258e-04 eta 0:22:38
epoch [42/50] batch [160/200] time 0.853 (0.819) data 0.004 (0.004) loss 0.9029 (0.4885) lr 3.6258e-04 eta 0:22:22
epoch [42/50] batch [180/200] time 0.841 (0.821) data 0.000 (0.004) loss 0.0054 (0.4941) lr 3.6258e-04 eta 0:22:10
epoch [42/50] batch [200/200] time 0.838 (0.823) data 0.000 (0.003) loss 0.1192 (0.4862) lr 3.1545e-04 eta 0:21:56
epoch [43/50] batch [20/200] time 0.710 (0.866) data 0.000 (0.032) loss 0.2803 (0.4997) lr 3.1545e-04 eta 0:22:48
epoch [43/50] batch [40/200] time 0.838 (0.813) data 0.000 (0.016) loss 0.0052 (0.4947) lr 3.1545e-04 eta 0:21:07
epoch [43/50] batch [60/200] time 0.840 (0.822) data 0.008 (0.011) loss 0.3450 (0.4855) lr 3.1545e-04 eta 0:21:05
epoch [43/50] batch [80/200] time 0.834 (0.826) data 0.000 (0.008) loss 1.1775 (0.4975) lr 3.1545e-04 eta 0:20:55
epoch [43/50] batch [100/200] time 0.857 (0.829) data 0.000 (0.007) loss 0.5201 (0.4519) lr 3.1545e-04 eta 0:20:42
epoch [43/50] batch [120/200] time 0.527 (0.796) data 0.000 (0.006) loss 0.0055 (0.4491) lr 3.1545e-04 eta 0:19:37
epoch [43/50] batch [140/200] time 0.561 (0.758) data 0.000 (0.005) loss 0.0234 (0.4249) lr 3.1545e-04 eta 0:18:26
epoch [43/50] batch [160/200] time 0.604 (0.732) data 0.000 (0.005) loss 0.0056 (0.4202) lr 3.1545e-04 eta 0:17:33
epoch [43/50] batch [180/200] time 0.844 (0.733) data 0.000 (0.004) loss 0.5308 (0.4017) lr 3.1545e-04 eta 0:17:20
epoch [43/50] batch [200/200] time 0.829 (0.743) data 0.000 (0.004) loss 0.4015 (0.4175) lr 2.7103e-04 eta 0:17:20
epoch [44/50] batch [20/200] time 0.838 (0.865) data 0.000 (0.031) loss 0.0067 (0.3973) lr 2.7103e-04 eta 0:19:53
epoch [44/50] batch [40/200] time 0.841 (0.852) data 0.000 (0.016) loss 0.1946 (0.2970) lr 2.7103e-04 eta 0:19:19
epoch [44/50] batch [60/200] time 0.529 (0.767) data 0.000 (0.011) loss 0.0086 (0.3074) lr 2.7103e-04 eta 0:17:07
epoch [44/50] batch [80/200] time 0.551 (0.710) data 0.000 (0.008) loss 0.3218 (0.3959) lr 2.7103e-04 eta 0:15:37
epoch [44/50] batch [100/200] time 0.725 (0.693) data 0.000 (0.007) loss 0.1750 (0.3877) lr 2.7103e-04 eta 0:15:01
epoch [44/50] batch [120/200] time 0.571 (0.685) data 0.000 (0.006) loss 0.3132 (0.3740) lr 2.7103e-04 eta 0:14:36
epoch [44/50] batch [140/200] time 0.670 (0.678) data 0.000 (0.005) loss 0.1418 (0.3585) lr 2.7103e-04 eta 0:14:14
epoch [44/50] batch [160/200] time 0.723 (0.677) data 0.000 (0.004) loss 0.6887 (0.3839) lr 2.7103e-04 eta 0:13:59
epoch [44/50] batch [180/200] time 0.730 (0.676) data 0.000 (0.004) loss 0.7458 (0.4085) lr 2.7103e-04 eta 0:13:45
epoch [44/50] batch [200/200] time 0.724 (0.675) data 0.000 (0.003) loss 0.2484 (0.4155) lr 2.2949e-04 eta 0:13:29
epoch [45/50] batch [20/200] time 0.539 (0.620) data 0.000 (0.028) loss 0.0014 (0.2387) lr 2.2949e-04 eta 0:12:11
epoch [45/50] batch [40/200] time 0.538 (0.573) data 0.000 (0.014) loss 0.8476 (0.2804) lr 2.2949e-04 eta 0:11:04
epoch [45/50] batch [60/200] time 0.550 (0.563) data 0.001 (0.010) loss 1.0645 (0.3880) lr 2.2949e-04 eta 0:10:42
epoch [45/50] batch [80/200] time 0.547 (0.562) data 0.000 (0.008) loss 0.1244 (0.3607) lr 2.2949e-04 eta 0:10:29
epoch [45/50] batch [100/200] time 0.551 (0.556) data 0.001 (0.006) loss 1.6644 (0.3799) lr 2.2949e-04 eta 0:10:11
epoch [45/50] batch [120/200] time 0.538 (0.553) data 0.000 (0.005) loss 0.3948 (0.3940) lr 2.2949e-04 eta 0:09:57
epoch [45/50] batch [140/200] time 0.751 (0.578) data 0.000 (0.005) loss 0.3084 (0.3939) lr 2.2949e-04 eta 0:10:12
epoch [45/50] batch [160/200] time 0.737 (0.594) data 0.000 (0.004) loss 0.0563 (0.4213) lr 2.2949e-04 eta 0:10:17
epoch [45/50] batch [180/200] time 0.543 (0.609) data 0.000 (0.004) loss 0.1154 (0.4275) lr 2.2949e-04 eta 0:10:20
epoch [45/50] batch [200/200] time 0.742 (0.619) data 0.000 (0.003) loss 0.0137 (0.4259) lr 1.9098e-04 eta 0:10:19
epoch [46/50] batch [20/200] time 0.744 (0.725) data 0.000 (0.028) loss 0.0014 (0.4686) lr 1.9098e-04 eta 0:11:50
epoch [46/50] batch [40/200] time 0.604 (0.722) data 0.000 (0.014) loss 0.1350 (0.4187) lr 1.9098e-04 eta 0:11:33
epoch [46/50] batch [60/200] time 0.748 (0.720) data 0.000 (0.010) loss 0.4197 (0.4425) lr 1.9098e-04 eta 0:11:16
epoch [46/50] batch [80/200] time 0.731 (0.714) data 0.000 (0.007) loss 0.1060 (0.4325) lr 1.9098e-04 eta 0:10:56
epoch [46/50] batch [100/200] time 0.752 (0.720) data 0.001 (0.006) loss 0.0033 (0.4334) lr 1.9098e-04 eta 0:10:47
epoch [46/50] batch [120/200] time 0.746 (0.715) data 0.000 (0.005) loss 0.0134 (0.4212) lr 1.9098e-04 eta 0:10:29
epoch [46/50] batch [140/200] time 0.760 (0.712) data 0.014 (0.004) loss 1.1638 (0.3922) lr 1.9098e-04 eta 0:10:12
epoch [46/50] batch [160/200] time 0.746 (0.716) data 0.000 (0.004) loss 0.0378 (0.3900) lr 1.9098e-04 eta 0:10:01
epoch [46/50] batch [180/200] time 0.742 (0.716) data 0.000 (0.003) loss 0.2179 (0.4180) lr 1.9098e-04 eta 0:09:46
epoch [46/50] batch [200/200] time 0.748 (0.716) data 0.000 (0.003) loss 0.8985 (0.4173) lr 1.5567e-04 eta 0:09:32
epoch [47/50] batch [20/200] time 0.530 (0.763) data 0.000 (0.028) loss 0.0235 (0.2994) lr 1.5567e-04 eta 0:09:54
epoch [47/50] batch [40/200] time 0.748 (0.736) data 0.000 (0.014) loss 0.2480 (0.2512) lr 1.5567e-04 eta 0:09:19
epoch [47/50] batch [60/200] time 0.742 (0.730) data 0.000 (0.010) loss 0.4562 (0.3252) lr 1.5567e-04 eta 0:09:00
epoch [47/50] batch [80/200] time 0.643 (0.733) data 0.000 (0.007) loss 1.2496 (0.4346) lr 1.5567e-04 eta 0:08:47
epoch [47/50] batch [100/200] time 0.744 (0.729) data 0.000 (0.006) loss 0.2558 (0.4220) lr 1.5567e-04 eta 0:08:30
epoch [47/50] batch [120/200] time 0.746 (0.723) data 0.000 (0.005) loss 0.7544 (0.4218) lr 1.5567e-04 eta 0:08:11
epoch [47/50] batch [140/200] time 0.753 (0.717) data 0.000 (0.004) loss 0.0691 (0.4085) lr 1.5567e-04 eta 0:07:53
epoch [47/50] batch [160/200] time 0.757 (0.717) data 0.000 (0.004) loss 0.2952 (0.4039) lr 1.5567e-04 eta 0:07:39
epoch [47/50] batch [180/200] time 0.757 (0.717) data 0.000 (0.004) loss 0.6311 (0.3940) lr 1.5567e-04 eta 0:07:24
epoch [47/50] batch [200/200] time 0.755 (0.721) data 0.000 (0.003) loss 0.3888 (0.4100) lr 1.2369e-04 eta 0:07:12
epoch [48/50] batch [20/200] time 0.529 (0.551) data 0.000 (0.029) loss 1.8592 (0.4732) lr 1.2369e-04 eta 0:05:19
epoch [48/50] batch [40/200] time 0.566 (0.543) data 0.000 (0.015) loss 0.2052 (0.4402) lr 1.2369e-04 eta 0:05:04
epoch [48/50] batch [60/200] time 0.555 (0.544) data 0.000 (0.010) loss 0.4460 (0.3964) lr 1.2369e-04 eta 0:04:53
epoch [48/50] batch [80/200] time 0.538 (0.555) data 0.000 (0.007) loss 0.3893 (0.3770) lr 1.2369e-04 eta 0:04:48
epoch [48/50] batch [100/200] time 0.554 (0.552) data 0.000 (0.006) loss 0.7178 (0.4106) lr 1.2369e-04 eta 0:04:35
epoch [48/50] batch [120/200] time 0.711 (0.558) data 0.000 (0.005) loss 0.1222 (0.4136) lr 1.2369e-04 eta 0:04:27
epoch [48/50] batch [140/200] time 0.520 (0.570) data 0.000 (0.004) loss 0.0283 (0.4178) lr 1.2369e-04 eta 0:04:22
epoch [48/50] batch [160/200] time 0.731 (0.580) data 0.000 (0.004) loss 0.0042 (0.4037) lr 1.2369e-04 eta 0:04:15
epoch [48/50] batch [180/200] time 0.727 (0.589) data 0.000 (0.004) loss 1.2540 (0.4112) lr 1.2369e-04 eta 0:04:07
epoch [48/50] batch [200/200] time 0.526 (0.593) data 0.000 (0.003) loss 0.2899 (0.4282) lr 9.5173e-05 eta 0:03:57
epoch [49/50] batch [20/200] time 0.543 (0.570) data 0.000 (0.039) loss 0.1899 (0.3366) lr 9.5173e-05 eta 0:03:36
epoch [49/50] batch [40/200] time 0.553 (0.562) data 0.000 (0.020) loss 0.4931 (0.3530) lr 9.5173e-05 eta 0:03:22
epoch [49/50] batch [60/200] time 0.536 (0.569) data 0.003 (0.013) loss 0.3442 (0.3707) lr 9.5173e-05 eta 0:03:13
epoch [49/50] batch [80/200] time 0.557 (0.559) data 0.000 (0.010) loss 0.1535 (0.4287) lr 9.5173e-05 eta 0:02:58
epoch [49/50] batch [100/200] time 0.609 (0.557) data 0.000 (0.008) loss 0.0325 (0.4218) lr 9.5173e-05 eta 0:02:47
epoch [49/50] batch [120/200] time 0.751 (0.589) data 0.000 (0.007) loss 0.3960 (0.4215) lr 9.5173e-05 eta 0:02:44
epoch [49/50] batch [140/200] time 0.738 (0.603) data 0.000 (0.006) loss 0.0102 (0.4260) lr 9.5173e-05 eta 0:02:36
epoch [49/50] batch [160/200] time 0.550 (0.618) data 0.000 (0.005) loss 0.4682 (0.4270) lr 9.5173e-05 eta 0:02:28
epoch [49/50] batch [180/200] time 0.740 (0.629) data 0.000 (0.005) loss 1.4162 (0.4219) lr 9.5173e-05 eta 0:02:18
epoch [49/50] batch [200/200] time 0.735 (0.637) data 0.000 (0.004) loss 0.1404 (0.4344) lr 7.0224e-05 eta 0:02:07
epoch [50/50] batch [20/200] time 0.634 (0.736) data 0.000 (0.028) loss 1.9760 (0.5033) lr 7.0224e-05 eta 0:02:12
epoch [50/50] batch [40/200] time 0.759 (0.740) data 0.000 (0.014) loss 0.1996 (0.4015) lr 7.0224e-05 eta 0:01:58
epoch [50/50] batch [60/200] time 0.751 (0.729) data 0.001 (0.010) loss 0.2736 (0.3532) lr 7.0224e-05 eta 0:01:42
epoch [50/50] batch [80/200] time 0.620 (0.725) data 0.000 (0.007) loss 0.0003 (0.3998) lr 7.0224e-05 eta 0:01:27
epoch [50/50] batch [100/200] time 0.807 (0.729) data 0.000 (0.006) loss 0.8774 (0.4095) lr 7.0224e-05 eta 0:01:12
epoch [50/50] batch [120/200] time 0.531 (0.730) data 0.000 (0.005) loss 0.2553 (0.4074) lr 7.0224e-05 eta 0:00:58
epoch [50/50] batch [140/200] time 0.487 (0.701) data 0.000 (0.005) loss 0.3724 (0.4322) lr 7.0224e-05 eta 0:00:42
epoch [50/50] batch [160/200] time 0.557 (0.681) data 0.000 (0.004) loss 1.0203 (0.4212) lr 7.0224e-05 eta 0:00:27
epoch [50/50] batch [180/200] time 0.785 (0.675) data 0.000 (0.004) loss 0.4559 (0.4226) lr 7.0224e-05 eta 0:00:13
epoch [50/50] batch [200/200] time 0.820 (0.673) data 0.000 (0.003) loss 0.0809 (0.4306) lr 4.8943e-05 eta 0:00:00
Checkpoint saved to output/base2new/train_base/caltech101/vit_b16_ep50_c4_BZ4_ProDA/seed1/prompt_learner/model.pth.tar-50
Finish training
Deploy the last-epoch model
Evaluate on the *test* set
=> result
* total: 1,287
* correct: 1,264
* accuracy: 98.21%
* error: 1.79%
* macro_f1: 96.90%
Elapsed: 2:05:25
