device: cuda
dataset:
  name: CIFAR100
model:
  type: cifar
  name: pyramidnet
  init_mode: kaiming_fan_out
  pyramidnet:
    depth: 110
    initial_channels: 16
    block_type: bottleneck
    alpha: 84
optim:
  admetas:
    lamda: 0.9
    beta: 0.2
train:
  seed: 0
  precision: O0
  batch_size: 128
  subdivision: 1
  optimizer: admetas
  base_lr: 0.05
  weight_decay: 1e-4
  output_dir: experiments_pyramid/admetas/
  log_period: 100
  checkpoint_period: 100
  val_ratio: 0.0
  use_test_as_val: True
  use_tensorboard: True
  distributed: False
  dataloader:
    num_workers: 2
    drop_last: True
    pin_memory: False
validation:
  batch_size: 256
  dataloader:
    num_workers: 2
    drop_last: False
    pin_memory: False
scheduler:
  epochs: 160
  type: multistep
  milestones: [80, 120]
  lr_decay: 0.1
augmentation:
  use_random_crop: True
  use_random_horizontal_flip: True
  use_cutout: False
  use_random_erasing: False
  use_dual_cutout: False
  use_mixup: False
  use_ricap: False
  use_cutmix: False
  use_label_smoothing: False
  random_crop:
    padding: 4
    fill: 0
    padding_mode: constant
  random_horizontal_flip:
    prob: 0.5
tensorboard:
  train_images: False
  val_images: False
  model_params: False


