num_classes: 100
epochs: 100
grad_clip: 5.0
log_frequency: 100

model:
  name: vit
  num_classes: 100

criterion:
  name: CrossEntropyLoss

optimizer:
  name: SGD
  lr: 0.1
  weight_decay: 5.e-5
  momentum: 0.9

scheduler:
  name: CosineAnnealingLR
  T_max: $epochs
  eta_min: 0.0

dataset:
  name: DatasetGenerator
  train_batch_size: 128
  eval_batch_size: 128
