# dataset
data_dir: '/mnt/data/datasets/SVHN'
dataset: torch/svhn
num_classes: 10
img_size: 32

# data augmentation (SVHN更干净，弱一点的增广通常更稳)
mean:
  - 0.4377
  - 0.4438
  - 0.4728
std:
  - 0.1980
  - 0.2010
  - 0.1970
crop_pct: 1.0
scale: [1.0, 1.0]
ratio: [1.0, 1.0]
color_jitter: 0.0
interpolation: bicubic
train_interpolation: bicubic
aa: rand-m3-n1-mstd0.4-inc1   # 较弱的RandAugment，避免过拟合到噪声
epochs: 300
mixup: 0.3
mixup_off_epoch: 150
mixup_prob: 1.0
mixup_mode: batch
mixup_switch_prob: 0.5
cutmix: 0.0
reprob: 0.10                  # 随机擦除减弱
remode: const

# model structure（保持不变）
model: "TIMv2_bl_qk_cifar"
step: 4
patch_size: 4
in_channels: 3
embed_dim: 384
num_heads: 12
mlp_ratio: 4
attn_scale: 0.125
mlp_drop: 0.0
attn_drop: 0.0
depths: 4

# train hyperparam
amp: True
batch_size: 128
val_batch_size: 128
lr: 5e-4
min_lr: 1e-5
sched: cosine
weight_decay: 5e-2
cooldown_epochs: 10
warmup_epochs: 10
warmup_lr: 1.0e-5
opt: adamw
smoothing: 0.1
workers: 16
seed: 42
log_interval: 200

# log dir
output: "/home/shensicheng/log/TIMv2/baseline/svhn/"

# device
device: 1