epochs: 400
time_step: 4
layer: 4
dim: 384
num_heads: 8
patch_size: 4
mlp_ratio: 4
#data_dir: ./torch/cifar10/
data_dir: ../../../data/
dataset: torch/cifar10
num_classes: 10
img_size: 32
mean:
    - 0.4914
    - 0.4822
    - 0.4465
std:
    - 0.2470
    - 0.2435
    - 0.2616
crop_pct: 1.0
scale:
    - 1.0
    - 1.0
ratio: [1.0,1.0]
color_jitter: 0.
interpolation: bicubic
train_interpolation: bicubic
aa: rand-m9-n1-mstd0.4-inc1
mixup: 0.5
mixup_off_epoch: 200
mixup_prob: 1.0
mixup_mode: batch
mixup_switch_prob: 0.5
cutmix: 0.0
reprob: 0.25
remode: const
amp: True
batch_size: 64
val_batch_size: 64
lr: 1e-3
min_lr: 1e-5
sched: cosine
weight_decay: 6e-2
cooldown_epochs: 10
warmup_epochs: 20
warmup_lr: 0.00001
opt: adamw
smoothing: 0.1
workers: 8
depths: 4
