import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from Utils import load
from Utils import generator
from Utils import metrics
from Utils.logger import *
from train import *
from prune import *

def run(args):
    ## Random Seed and Device ##
    torch.manual_seed(args.seed)
    device = load.device(args.gpu)

    ## Data ##
    print('Loading {} dataset.'.format(args.dataset))
    input_shape, num_classes = load.dimension(args.dataset) 
    prune_loader = load.dataloader(args.dataset, args.prune_batch_size, True, args.workers, args.prune_dataset_ratio * num_classes)
    train_loader = load.dataloader(args.dataset, args.train_batch_size, True, args.workers)
    test_loader = load.dataloader(args.dataset, args.test_batch_size, False, args.workers)

    ## Model, Loss, Optimizer ##
    print('Creating {}-{} model.'.format(args.model_class, args.model))
    if args.model =='fc':
        
        model = load.model(args.model, args.model_class)(input_shape, 
                                                        num_classes, 
                                                        args.dense_classifier, 
                                                        args.pretrained,
                                                        L=args.n_layers,
                                                        N=args.hidden_dim,
                                                        nonlinearity=nn.ReLU()).to(device)
    else:
        model = load.model(args.model, args.model_class)(input_shape, 
                                                        num_classes, 
                                                        args.dense_classifier, 
                                                        args.pretrained).to(device)
    loss = nn.CrossEntropyLoss()
    opt_class, opt_kwargs = load.optimizer(args.optimizer)
    print(opt_class)
    try:
        if args.optimizer in ['ammd', 'hmd', 'ahmd']:
            opt_kwargs['delta'] = args.delta
        if 'pathnorm' in args.optimizer:
            opt_kwargs['model'] = model
            opt_kwargs['input_shape'] = (1,) + input_shape if len(input_shape) < 4 else input_shape
            opt_kwargs['update_frequency'] = args.update_frequency
            opt_kwargs['lambda_reg'] = args.lambda_reg
            # opt_kwargs['path_momentum'] = args.path_momentum
            # opt_kwargs['use_path_aware_clipping'] = args.use_path_aware_clipping
            # opt_kwargs['base_clip_norm'] = args.base_clip_norm
    except:
        pass
    optimizer = opt_class(generator.parameters(model), lr=args.lr, weight_decay=args.weight_decay, **opt_kwargs)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_drops, gamma=args.lr_drop_rate)


    ## Pre-Train ##
    print('Pre-Train for {} epochs.'.format(args.pre_epochs))
    pre_result = train_eval_loop(model, loss, optimizer, scheduler, train_loader, 
                                 test_loader, device, args.pre_epochs, args.verbose, args)

    ## Prune ##
    print('Pruning with {} for {} epochs.'.format(args.pruner, args.prune_epochs))
    pruner = load.pruner(args.pruner)(generator.masked_parameters(model, args.prune_bias, args.prune_batchnorm, args.prune_residual))
    sparsity = 10**(-float(args.compression))
    prune_loop(model, loss, pruner, prune_loader, device, sparsity, 
               args.compression_schedule, args.mask_scope, args.prune_epochs, args.reinitialize, args.prune_train_mode, args.shuffle, args.invert)

    
    ## Post-Train ##
    print('Post-Training for {} epochs.'.format(args.post_epochs))
    post_result = train_eval_loop(model, loss, optimizer, scheduler, train_loader, 
                                  test_loader, device, args.post_epochs, args.verbose, args) 

    ## Display Results ##
    frames = [pre_result.head(1), pre_result.tail(1), post_result.head(1), post_result.tail(1)]
    train_result = pd.concat(frames, keys=['Init.', 'Pre-Prune', 'Post-Prune', 'Final'])
    prune_result = metrics.summary(model, 
                                   pruner.scores,
                                   metrics.flop(model, input_shape, device),
                                   lambda p: generator.prunable(p, args.prune_batchnorm, args.prune_residual))
    total_params = int((prune_result['sparsity'] * prune_result['size']).sum())
    possible_params = prune_result['size'].sum()
    total_flops = int((prune_result['sparsity'] * prune_result['flops']).sum())
    possible_flops = prune_result['flops'].sum()
    print_and_log(args.logger, "Train results:\n{}".format(train_result), )
    print_and_log(args.logger, "Prune results:\n{}".format(prune_result))
    print_and_log(args.logger, "Parameter Sparsity: {}/{} ({:.4f})".format(total_params, possible_params, total_params / possible_params))
    print_and_log(args.logger, "FLOP Sparsity: {}/{} ({:.4f})".format(total_flops, possible_flops, total_flops / possible_flops))

    ## Save Results and Model ##
    if args.save:
        print('Saving results.')
        # post_result.to_csv(args.logging_file.replace('.log', '.csv'))
        # pre_result.to_pickle("{}/pre-train.pkl".format(args.result_dir))
        # post_result.to_pickle("{}/post-train.pkl".format(args.result_dir))
        # prune_result.to_pickle("{}/compression.pkl".format(args.result_dir))
        torch.save(model.state_dict(), f"./saved_models/{args.dataset}_{args.model}_{args.hidden_dim}_{args.n_layers}_{args.pruner}_{args.compression}_{args.seed}.pt")
        # torch.save(optimizer.state_dict(),"{}/optimizer.pt".format(args.result_dir))
        # torch.save(scheduler.state_dict(),"{}/scheduler.pt".format(args.result_dir))


