defaults:
  - model: neomlp
  - data: cifar10
  - _self_

num_epochs: 300
eval_every: 250

batch_size: 256

num_workers: 8
num_accum: 1

save_ckpt: False

reuse_backbone: False

optim:
  _target_: torch.optim.AdamW
  lr: 5e-3
  weight_decay: 1e-4
  amsgrad: True
  fused: False

# scheduler: null
scheduler:
  _target_: experiments.lr_scheduler.WarmupLRScheduler
  warmup_steps: 1000

distributed: null
# distributed:
#   world_size: 1
#   rank: 0
#   device_ids: null

load_ckpt: null

use_amp: False
gradscaler:
  enabled: ${use_amp}
autocast:
  device_type: cuda
  enabled: ${use_amp}
  dtype: float16

clip_grad: True
clip_grad_max_norm: 10.0

seed: 42
wandb:
  project: neomlp.neoneural-classification
  entity: null
  name: null

matmul_precision: high

debug: False

mixup: False
alpha: 0.5
beta: 0.8
