# dataset
data_dir: '/home/shensicheng/dataset/ILSVRC2012/'
dataset: imagenet
num_classes: 1000
img_size: 224
model: "TIMv2_bl_qk_img"

batch_size: 48
val_batch_size: 48
epochs: 300   #epochs

# model structure
step: 4
patch_size: 16
in_channels: 3
embed_dim: 512
num_heads: 8
mlp_ratio: 4
attn_scale: 0.125
mlp_drop: 0.0
attn_drop: 0.0
depths: 8

#data augmentation
mean:
    - 0.485
    - 0.456
    - 0.406
std:
    - 0.229
    - 0.224
    - 0.225
crop_pct: 0.95
scale:
    - 1.0
    - 1.0
ratio:
    - 0.75
    - 1.3333333333333333
color_jitter: 0.4
interpolation: bicubic
train_interpolation: bicubic
aa: rand-m5-mstd0.5-inc1
mixup: 0.2
mixup_off_epoch: 0
mixup_prob: 0.6
mixup_mode: batch
mixup_switch_prob: 0.5
cutmix: 1.0
reprob: 0.0
remode: const
decay_epochs: 30
decay_rate: 0.1
drop_path: 0.2 # ???
train_split: train
val_split: validation


# train hyperparam
amp: True
apex_amp: False
lr: 0.0015
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
min_lr: 1.5e-05
sched: cosine
cooldown_epochs: 10
weight_decay: 0.01
warmup_epochs: 20
warmup_lr: 1.0e-05
opt: lamb
opt_betas: null
opt_eps: null
smoothing: 0.1
workers: 16
seed: 42
log_interval: 200

# log dir
output: "/home/shensicheng/log/TIMv2/baseline/imgnet/"

## device
#device: 0,1,2,3,4,5,6,7


