epochs: 100
time_step: 4
layer: 8
dim: 768
num_heads: 8
patch_size: 16
mlp_ratio: 4
data_dir: ~/data/imagenet
dataset: imagenet
num_classes: 1000
img_size: 224
img_size_teacher: 224
mean:
    - 0.485
    - 0.456
    - 0.406
std:
    - 0.229
    - 0.224
    - 0.225
crop_pct: 0.875
scale:
    - 0.08
    - 1.0
ratio: [0.75, 1.33]
color_jitter: 0.4
interpolation: bicubic
train_interpolation: bicubic
aa: rand-m9-mstd0.5-inc1
mixup: 0.8
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_mode: batch
mixup_switch_prob: 0.5
cutmix: 1.0
reprob: 0.25
remode: pixel
amp: True
batch_size: 20
val_batch_size: 20
lr: 3.5355e-4
min_lr: 1e-5
sched: cosine
weight_decay: 5e-2
cooldown_epochs: 10
warmup_epochs: 20
warmup_lr: 1e-6
opt: adamw
smoothing: 0.1
workers: 8
depths: 8

# Many Eyes, One Mind (MEOM) specific parameters
distill: True
distill_type: meom
meom_alpha: 0.5  # Weight for ANN teacher
meom_beta: 0.5   # Weight for dynamic teacher
meom_gamma: 1.0
meom_temperature: 3.0
use_ce: False

# Teacher model configuration
model_teacher_name: vit-b
teacher_emb: 768
student_emb: 768
