defaults:
  - config
  - _self_

name: train_resnet_cifar100_90
program: train_resnet.py
method: bayes

command:
  - python
  - ${program}
  - ${args_no_hyphens}
  - wandb.group=9sparse
  - sparsity=0.9
  - dataset=cifar100

metric:
  goal: maximize
  name: sparse_accuracy


parameters:

  batch_size:
    values: [32, 64, 128, 192, 256, 384, 512]
  num_epochs:
    values: [120, 150, 200]
  model.activation_name:
    values: ["ReLU", "GELU"]

  optimizer.lr:
    values: [0.1, 0.075, 0.05, 0.02]

  optimizer.momentum:
    min: 0.2
    max: 0.99
    distribution: uniform
  optimizer.weight_decay:
    min: 0.000001
    max: 0.0001
    distribution: log_uniform_values

  lr_scheduler.step_ratio:
    min: 0.3
    max: 0.5
    distribution: uniform
  lr_scheduler.gamma:
    min: 0.2
    max: 0.5
    distribution: uniform

  sparsifier.warmup:
    min: 0.0
    max: 0.15
    distribution: uniform

  sparsifier.freeze:
    min: 0.8
    max: 1.0
    distribution: uniform

  sparsifier.ema.rho:
    min: 0.001
    max: 0.1
    distribution: log_uniform_values

  sparsifier.alphas.default:
    min: 1.0
    max: 10.
    distribution: uniform

  sparsifier.lambda.beta:
    min: 0.0001
    max: 0.2
    distribution: log_uniform_values

  sparsifier.lambda.cap:
    min: 0.001
    max: 0.004
    distribution: log_uniform_values