num_classes: 10
epochs: 60
grad_clip: 5.0
log_frequency: 100

model:
  name: vit
  num_classes: 10

criterion:
  name: CrossEntropyLoss

optimizer:
  name: SGD
  lr: 0.01
  weight_decay: 5.e-5
  momentum: 0.9

scheduler:
  name: CosineAnnealingLR
  T_max: $epochs
  eta_min: 0.0

dataset:
  name: DatasetGenerator
  train_batch_size: 128
  eval_batch_size: 128
