num_classes: 10
epochs: 80
grad_clip: 5.0
log_frequency: 50

model:
  name: ToyModel

criterion:
  name: CrossEntropyLoss

optimizer:
  name: SGD
  lr: 0.025
  weight_decay: 5.e-4
  momentum: 0.9

scheduler:
  name: CosineAnnealingLR
  T_max: $epochs
  eta_min: 0.0

dataset:
  name: DatasetGenerator
  train_batch_size: 128
  eval_batch_size: 128
