defaults:
  - config
  - _self_

name: train_resnet_cifar100_90
program: train_resnet.py
method: bayes

command:
  - python
  - ${program}
  - ${args_no_hyphens}
  - wandb.group=9sparse
  - sparsity=0.9
  - sparsity=0.9

metric:
  goal: maximize
  name: sparse_accuracy


parameters:

  batch_size:
    values: [32, 64, 128, 192, 256, 384, 512, 768, 1024]
  num_epochs:
    values: [100, 120, 150, 200]
  model.activation_name:
    values: ["ReLU", "SiLU", "GELU"]

  optimizer.lr:
    min: 0.001
    max: 0.1
    distribution: log_uniform_values

  optimizer.momentum:
    min: 0.2
    max: 0.99
    distribution: uniform
  optimizer.weight_decay:
    min: 0.000001
    max: 0.001
    distribution: log_uniform_values

  lr_scheduler.step_ratio:
    min: 0.1
    max: 0.5
    distribution: uniform
  lr_scheduler.gamma:
    min: 0.1
    max: 0.5
    distribution: uniform
  lr_scheduler.offset_ratio:
    values: [-1, 0.25, 0.4, 0.5]

  sparsifier.warmup:
    min: 0.0
    max: 0.2
    distribution: uniform
  sparsifier.freeze:
    min: 0.7
    max: 1.0
    distribution: uniform

  sparsifier.ema.rho:
    min: 0.001
    max: 0.99
    distribution: log_uniform_values

  sparsifier.alphas.default:
    min: 0.0
    max: 10.
    distribution: uniform

  sparsifier.lambda.mode:
    values: ["constant", "RM"]

  sparsifier.lambda.beta:
    min: 0.0001
    max: 0.9
    distribution: log_uniform_values

  sparsifier.lambda.cap:
    min: 0.001
    max: 0.003
    distribution: log_uniform_values

  sparsifier.lambda.t0:
    min: 1
    max: 1000
    distribution: log_uniform_values
