training_modifiers:

  - !EpochRangeModifier
    start_epoch: 0
    end_epoch: 300

  - !LearningRateFunctionModifier
    start_epoch: 0
    end_epoch: 300
    lr_func: linear
    init_lr: 0.2
    final_lr: 0.001
    cycle_epochs: 20
    cycle_mul: 1.0

pruning_modifiers:

  - !OBSXPruningModifier
    params: ['re:.*(blocks.\d+.\d+.(conv|conv_(exp|pw|pwl)|se.conv_.*)|conv_head).weight']
    init_sparsity: 0.4
    final_sparsity: 0.6
    start_epoch: 0
    end_epoch: 40
    update_frequency: 20
    inter_func: linear
    mask_type: unstructured
    global_sparsity: True
    num_grads: 4096
    fisher_block_size: 16
    damp: 1.0e-8
    num_recomputations: 1

  - !OBSXPruningModifier
    params: ['re:.*(blocks.\d+.\d+.(conv|conv_(exp|pw|pwl)|se.conv_.*)|conv_head).weight']
    init_sparsity: 0.6
    final_sparsity: 0.75
    start_epoch: 40
    end_epoch: 100
    update_frequency: 20
    inter_func: linear
    mask_type: unstructured
    global_sparsity: True
    num_grads: 4096
    fisher_block_size: 16
    damp: 1.0e-8
    num_recomputations: 1

  - !OBSXPruningModifier
    params: ['re:.*(blocks.\d+.\d+.(conv|conv_(exp|pw|pwl)|se.conv_.*)|conv_head).weight']
    init_sparsity: 0.75
    final_sparsity: 0.9
    start_epoch: 100
    end_epoch: 280
    update_frequency: 20
    inter_func: linear
    mask_type: unstructured
    global_sparsity: True
    num_grads: 4096
    fisher_block_size: 16
    damp: 1.0e-8
    num_recomputations: 1

  - !OBSXPruningModifier
    params: ['re:.*(blocks.\d+.\d+.(conv|conv_(exp|pw|pwl)|se.conv_.*)|conv_head).weight']
    init_sparsity: 0.9
    final_sparsity: 0.9
    start_epoch: 280
    end_epoch: 300
    update_frequency: 20
    inter_func: linear
    mask_type: unstructured
    global_sparsity: True
    num_grads: 4096
    fisher_block_size: 16
    damp: 1.0e-8
    num_recomputations: 1
