#config example:
#Authors: Shaokai, Jan
#sample_argument:
#type || default value
#value
#print_freq  int || 10  
#resume      bool || False     *resume from checkpoint
#gpu         str || all      * gpu id to use
#lr          float || 0.1      *learning rate
#arch        str  || resnet18  *architecture
#save_model  str  || saved_model.pt * filename to saved model
#load_model  str  || load_model.pt  * filename to load model
#workers     int  || 16        * number of threads for loading images
#logging     bool || False     * whether to log files
#log_dir     str  || logs      * destination folder to drop log file
#smooth_eps  float|| 0.0       * smoothing rate [0.0, 1.0], set to 0.0 to disable
#alpha       float|| 0.0       * for mixup training, lambda = Beta(alpha, alpha) distribution. Set to 0.0 to disable
#                              * chosen value of alpha in Bag of Tricks is 0.2, and increase # of training epochs by 67%
#epochs      int  || 200       * number of training epochs for chosen stage
#warmup_epochs  int || 0       * number of epochs for lr warmup
#warmup_lr    float || 0.0001  * initial lr before warmup
#optimizer   str  || sgd       * optimizer for the chosen stage
#lr_scheduler str || default,cosine  * learning rate scheduler
#admm_epoch  int  || 20        * how many epochs required to update z and u
#rho        float || 0.001     * regularization strength
#sparsity_type str || [irregular,channel,filter,bn_filter]     * sparsity type
#multi_rho   bool  || False    * whether to use multi rho in admm stage
#verbose    bool  || False     * whether to print convergence info
#masked_progressive bool || False * whether to use masked progressive

general:
 data:
   'data/ImageNet'
 pretrained:
   False
 start_epoch:
   0
 momentum:
   0.9
 weight_decay:
   1e-4
 batch_size:
   1024
 world_size:
   -1
 rank:
   -1
 dist_url:
   'localhost'
 dist_backend:
   nccl
 seed:

 gpu:
   0,1
 multiprocessing_distributed:
   False
 sparsity_type: 
  channel  # irregular, channel, filter, bn_filter.
 print_freq:
  10 
 resume:  
  False 
 arch: 
  resnet50
 workers: 
  16
 logging: 
  False
 log_dir: 
  logs
 smooth_eps:
  0.0
 epsilon:
  0.015686275  # (4/255)
 n_repeats:
  4
 pgd_steps:
  10
 pgd_epsilon:
  0.015686275
 pgd_random_start:
  True
 pgd_step_size:
  0.003921569
 alpha:
  0.0
 verify:
  False
pretrain:
 lr:
  0.1
 epochs: 
  1
 warmup_lr:
  0.0001
 warmup_epochs: 
  0
 save_model: 
  alexnet_bn_pretrained.pt
 optimizer:
  sgd
 lr_scheduler: 
  cosine
admm:
 lr:  
  0.01
 epochs: 
  100
 save_model: 
  resnet50_admm_channel_50.pt
 load_model: 
  resnet50_pretrained_imagenet.tar
 optimizer:
  sgd
 lr_scheduler: 
  default
 admm_epoch: 
  10
 rho: 
  0.001
 multi_rho: 
  True
 masked_progressive: 
  False
 verbose:
  False
retrain:
 lr:  
  0.01
 epochs:
  100
 warmup_lr:
  0.0001
 warmup_epochs: 
  0
 save_model: 
  alexnet_retrained.pt
 load_model:
  alexnet_admm.pt
 optimizer: 
  sgd
 lr_scheduler:
  default
 masked_progressive: 
  False 
resnet18:
#20 conv layers, use bnx instead of convx for bn_filter pruning
#floats
 prune_ratios:  # only use bn1..bnN when you do BN_filter pruning. Otherwise conv1..convN or fc1..fcN
  conv1.weight:
   0.9375
  conv2.weight:
   0.9375
  conv3.weight:
   0.9375
  conv4.weight:
   0.9375
  conv5.weight:
   0.9375
  conv6.weight:
   0.9375
  conv7.weight:
   0.9375
  conv8.weight:
   0.9375
  conv9.weight:
   0.9375
  conv10.weight:
   0.9375
  conv11.weight:
   0.9375
  conv12.weight:
   0.9375
  conv13.weight:
   0.9375
  conv14.weight:
   0.9375
  conv15.weight:
   0.9375
  conv16.weight:
   0.9375
  conv17.weight:
   0.9375
  conv18.weight:
   0.9375
  conv19.weight:
   0.9375
  conv20.weight:
   0.9375
resnet50:
#20 conv layers, use bnx instead of convx for bn_filter pruning
#floats
 prune_ratios:
  conv1.weight:
   0.5
  conv2.weight:
   0.5
  conv3.weight:
   0.5
  conv4.weight:
   0.5
  conv5.weight:
   0.5
  conv6.weight:
   0.5
  conv7.weight:
   0.5
  conv8.weight:
   0.5
  conv9.weight:
   0.5
  conv10.weight:
   0.5
  conv11.weight:
   0.5
  conv12.weight:
   0.5
  conv13.weight:
   0.5
  conv14.weight:
   0.5
  conv15.weight:
   0.5
  conv16.weight:
   0.5
  conv17.weight:
   0.5
  conv18.weight:
   0.5
  conv19.weight:
   0.5
  conv20.weight:
   0.5
  conv21.weight:
   0.5
  conv22.weight:
   0.5
  conv23.weight:
   0.5
  conv24.weight:
   0.5
  conv25.weight:
   0.5
  conv26.weight:
   0.5
  conv27.weight:
   0.5
  conv28.weight:
   0.5
  conv29.weight:
   0.5
  conv30.weight:
   0.5
  conv31.weight:
   0.5
  conv32.weight:
   0.5
  conv33.weight:
   0.5
  conv34.weight:
   0.5
  conv35.weight:
   0.5
  conv36.weight:
   0.5
  conv37.weight:
   0.5
  conv38.weight:
   0.5
  conv39.weight:
   0.5
  conv40.weight:
   0.5
  conv41.weight:
   0.5
  conv42.weight:
   0.5
  conv43.weight:
   0.5
  conv44.weight:
   0.5
  conv45.weight:
   0.5
  conv46.weight:
   0.5
  conv47.weight:
   0.5
  conv48.weight:
   0.5
  conv49.weight:
   0.5
  conv50.weight:
   0.5
  conv51.weight:
   0.5
  conv52.weight:
   0.5
  conv53.weight:
   0.5
  fc1.weight:
   0.5

vgg16:
#20 conv layers, use bnx instead of convx for bn_filter pruning
#floats
 prune_ratios:
  conv1.weight:
   0.9375
  conv2.weight:
   0.9375
  conv3.weight:
   0.9375
  conv4.weight:
   0.9375
  conv5.weight:
   0.9375
  conv6.weight:
   0.9375
  conv7.weight:
   0.9375
  conv8.weight:
   0.9375
  conv9.weight:
   0.9375
  conv10.weight:
   0.9375
  conv11.weight:
   0.9375
  conv12.weight:
   0.9375
  conv13.weight:
   0.9375
alexnet_bn:
  prune_ratios:
   conv1.weight:
     0.16
   conv2.weight:
     0.65
   conv3.weight:
     0.67
   conv4.weight:
     0.66
   conv5.weight:
     0.66
