import argparse

parser = argparse.ArgumentParser(description='Model pruning with model transfer.')

parser.add_argument(
    '--data_set',
    type=str,
    default='cub200',
    help='Select dataset to finetune. default:cub200',
)

parser.add_argument(
    '--data_path',
    type=str,
    default='../datasets/CUB200/CUB_200_2011_new/',
    help='The dictionary where the input is stored. default:',
)

parser.add_argument(
    '--job_dir',
    type=str,
    default='./experiments/',
    help=
    'The directory where the summaries will be stored. default:./experiments',
)

parser.add_argument(
    '--rank_path',
    type=str,
    default=None,
    help='The dictionary where the rank for Hrank method is stored. default:',
)

parser.add_argument(
    '--reset',
    action='store_true',
    help='reset the directory?',
)

parser.add_argument(
    '--gpus',
    type=str,
    default='0,1,2,3',
    help='Select gpu_id to use. default:[0]',
)

parser.add_argument(
    '--manualSeed',
    type=int,
    default=None,
    help='manual seed',
)

parser.add_argument(
    '--cfg',
    type=str,
    default='resnet50',
    help='Detail architecuture of model. default:resnet50',
)

parser.add_argument(
    '--prune_rule',
    type=str,
    default='l1_pretrain',
    help=
    'criterion for filter pruning. default:l1_pretrain optional: l1_pretrain, random_pretrain, hrank_pretrain, NS_pretrain, depgraph_pretrain, epruner_pretrain',
)

parser.add_argument(
    '--compress_rate',
    type=str,
    default='1.0*100',
    help='compress rate of each conv (for layer-wise pruning methods)',
)

parser.add_argument(
    '--channel_PR',
    type=float,
    default=None,
    help=
    'global channel pruning rate for the model (for Network Slimming method)',
)

parser.add_argument(
    '--target_flops_PR',
    type=float,
    default=None,
    help='global target pruning rate for the model (for DepGraph method)',
)

parser.add_argument(
    '--use_pretrain',
    action='store_true',
    help='if load the pretrained model.',
)

parser.add_argument(
    '--transfer',
    action='store_true',
    help='if the model is pretrained on A and finetuned on B.',
)

parser.add_argument(
    '--hard_inherit',
    action='store_true',
    help=
    'if the model is pretrained and pruned on A and finetuned with the inherited weight on B.',
)

parser.add_argument(
    '--train_slim',
    action='store_true',
    help='if the model is structured by the preset pruning rate.',
)

parser.add_argument(
    '--preference_beta',
    type=float,
    default=None,
    help=
    'if the pruned model is reload the initialization weights from the pretrained model.',
)

parser.add_argument(
    '--init_method',
    type=str,
    default='centroids',
    help=
    'Initital method of pruned model. default:centroids. optimal:random,centroids,random_project'
)

parser.add_argument(
    '--train_batch_size',
    type=int,
    default=32,
    help='Batch size for training. default:128',
)

parser.add_argument(
    '--eval_batch_size',
    type=int,
    default=32,
    help='Batch size for validation. default:128',
)

parser.add_argument(
    '--train_epochs',
    type=int,
    default=350,
    help='The num of epochs to train. default:240',
)

parser.add_argument(
    '--finetune_epochs',
    type=int,
    default=200,
    help='The num of epochs to train. default:100',
)

parser.add_argument(
    '--momentum',
    type=float,
    default=0.9,
    help='Momentum for MomentumOptimizer. default:0.9',
)

parser.add_argument(
    '--lr_train',
    type=float,
    default=0.01,
    help='Learning rate for pretrain. default:0.01',
)

parser.add_argument(
    '--lr',
    type=float,
    default=0.001,
    help='Learning rate for finetune. default:0.001',
)

parser.add_argument(
    '--lr_type',
    type=str,
    default='step',
    help='Learning rate schedule. default: step',
)

parser.add_argument(
    '--lr_decay_step_finetune',
    type=str,
    default='80,120,160',
    help='the iterval of learn rate decay. default:30',
)

parser.add_argument(
    '--lr_decay_step_pretrain',
    type=str,
    default='250,290,320',
    help='the iterval of learn rate decay. default:30',
)

parser.add_argument(
    '--train_weight_decay',
    type=float,
    default=5e-4,
    help='The weight decay of loss for pretrain. default:5e-4',
)

parser.add_argument(
    '--weight_decay',
    type=float,
    default=5e-4,
    help='The weight decay of loss for finetune. default:5e-4',
)

parser.add_argument(
    '--resume_pretrain',
    type=str,
    default=None,
    help='Load the model from the specified checkpoint.',
)

parser.add_argument(
    '--resume_finetune',
    type=str,
    default=None,
    help='Load the model from the specified checkpoint.',
)

parser.add_argument(
    '--workers',
    default=32,
    type=int,
    help='number of data loading workers',
)

parser.add_argument(
    '--world_size',
    default=1,
    type=int,
    help='number of nodes for distributed training',
)

parser.add_argument(
    '--local_rank',
    default=0,
    type=int,
    help='node rank for distributed training',
)

parser.add_argument(
    '--dist_url',
    default='tcp://localhost:10000',
    type=str,
    help='url used to set up distributed training',
)
parser.add_argument(
    '--dist_backend',
    default='nccl',
    type=str,
    help='distributed backend',
)

parser.add_argument(
    '--multiprocessing_distributed',
    action='store_true',
    help='Use multi-processing distributed training to launch '
    'N processes per node, which has N GPUs. This is the '
    'fastest way to use PyTorch for either single node or '
    'multi node data parallel training',
)

parser.add_argument(
    '--gpu',
    default=None,
    type=int,
    help='GPU id to use.',
)

args = parser.parse_args(args=[])