defaults:
  - model: neomlp
  - data: cifar10
  - _self_

num_epochs: 100
num_finetune_epochs: ${num_epochs}
steps_till_eval: 950
# steps_till_plot: 2500

eval: False
num_train_images: 32768
num_eval_images: 2
num_full_eval_images: 16
train_fraction: 0.005
finetune_fraction: 0.005
num_augmentations: 1

num_workers: 8
num_tsne_images: 1000
num_accum: 1

save_ckpt: False

optim:
  _target_: torch.optim.AdamW
  lr: 5e-3
  weight_decay: 5e-4
  amsgrad: True
  fused: False

# scheduler: null
scheduler:
  _target_: experiments.lr_scheduler.WarmupLRScheduler
  warmup_steps: 1000

distributed: null
# distributed:
#   world_size: 1
#   rank: 0
#   device_ids: null

load_ckpt: null

use_amp: False
gradscaler:
  enabled: ${use_amp}
autocast:
  device_type: cuda
  enabled: ${use_amp}
  dtype: float16

clip_grad: True
clip_grad_max_norm: 10.0

seed: 42
wandb:
  project: neomlp.neural-datasets
  entity: null
  name: null

matmul_precision: high

debug: False

mixup: False
alpha: 0.5


