seed: 1222
gpus: 2
workers: 10
opt: adamw
sched: cosine_timm
lr: 1e-4
min_lr: 1e-6
warmup_lr: 1e-6
weight_decay: 0.05
momentum: 0.9
end_epoch: 300
cooldown_epochs: 10
warmup_epochs: 20
batch_size: 384
classifier_type: deit_small_patch16_224_return_total_attn
is_pretrained_imagenet: False
initial_checkpoint: dino_deitsmall16_pretrain.pth
patch_size: 16
num_classes: 200
tag : CUB200_epoch_300
experiments_name: CUB200_experiment_guide
experiments_subname: CUB200_baseline_without_re
dataset_name: CUB200
aug: 'timm_rand'
repeated_aug: True
transmix: False
reprob: 0.0
mixup: 0.8
cutmix: 1.0
mixup_prob: 1.0
mixup_switch_prob: 0.5
saveckp_freq: 1200
baseline: True
sample_rate: 100