import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from Utils import load
from copy import deepcopy
import logging
from logger import set_logger
from Utils import generator
from Utils import metrics
from train import *
from prune import *

def run(args):
    if not args.save:
        print("This experiment requires an expid.")
        quit()

    ## Random Seed and Device ##
    device = load.device(args.gpu)

    # Set logger
    set_logger(args.result_dir)
    logger = logging.getLogger()
    logger.info(f"Result folder path: {args.result_dir}")
    
    ## Data ##
    logger.info('Loading {} dataset.'.format(args.dataset))
    input_shape, num_classes = load.dimension(args.dataset) 
    # prune_loader = load.dataloader(args.dataset, args.prune_batch_size, True, args.workers, args.prune_dataset_ratio * num_classes)
    prune_loader = None
    trn_loader, val_loader = load.dataloader(args.dataset, args.train_batch_size, True, args.workers)
    tst_loader = load.dataloader(args.dataset, args.test_batch_size, False, args.workers)

    loss = nn.CrossEntropyLoss()
    opt_class, opt_kwargs = load.optimizer(args.optimizer)
    
    ## Create N matching tickets##
    logger.info('Creating {} model.'.format(args.model))
    for i in range(args.num_models):
        model = load.model(args.model, args.model_class)(input_shape, 
                                                        num_classes, 
                                                        args.dense_classifier,
                                                        args.pretrained).to(device)
        optimizer = opt_class(generator.parameters(model), lr=args.lr, weight_decay=args.weight_decay, **opt_kwargs)
        scheduler = torch.optim.lr_scheduler.ConstantLR(optimizer, factor=1., total_iters=0)
        if args.matching_epochs > 0:
            # print(len(trn_loader), len(val_loader))
            train_eval_loop(args, args.matching_epochs, logger, model, loss, optimizer, scheduler, trn_loader, val_loader, tst_loader, device, args.matching_epochs, args.verbose)
        
        ## Save Matching ticket ##
        torch.save(model.state_dict(),"{}/matching_ticket_e{:03d}_s{:03d}.pt".format(args.result_dir, args.matching_epochs, i+1))
        logger.info(f'Saved the matching ticket at epoch {args.matching_epochs}!')

    ## Train-Prune Loop ##
    logger.info('{} compression ratio for {} iterations'.format(args.imp_ratio, args.imp_iter))
        
    # Create N models
    models, params = [], []
    logger.info(f'Creating {args.num_models} models')
    for i in range(args.num_models):
        # models.append(deepcopy(model))
        model = load.model(args.model, args.model_class)(input_shape, 
                                                     num_classes, 
                                                     args.dense_classifier,
                                                     args.pretrained).to(device)
        model.load_state_dict(torch.load("{}/matching_ticket_e{:03d}_s{:03d}.pt".format(args.result_dir, args.matching_epochs, i+1), map_location=device))
        models.append(model)
        params += model.parameters()

    for l in range(args.imp_iter):
        logger.info(f'=====Start IMP iteration {l+1}/{args.imp_iter}=====')
        
        optimizer = opt_class(params, lr=args.lr, weight_decay=args.weight_decay, **opt_kwargs)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 
                                                                        args.pre_epochs, 
                                                                        T_mult=1, 
                                                                        eta_min=0.)
        # Pre Train Model
        best_models = train_eval_loop_ens(args, logger, models, loss, optimizer, scheduler, trn_loader, val_loader,
                        tst_loader, device, args.pre_epochs, args.verbose, data_diversity=False)

        for i in range(args.num_models):
            torch.save(models[i].state_dict(),"{}/ckpt_r{:.2f}_i{:03d}_s{:03d}.pt".format(args.result_dir, args.imp_ratio, l, i+1))
        
        # Prune Model
        pruners = []
        for i, model in enumerate(models):
            pruner = load.pruner(args.pruner)(generator.masked_parameters(model, args.prune_bias, args.prune_batchnorm, args.prune_residual))
            pruners.append(pruner)
            sparsity = args.imp_ratio ** (l+1)
            prune_loop(args, model, loss, pruner, prune_loader, device, sparsity, 
                    args.compression_schedule, args.mask_scope, args.prune_epochs, args.reinitialize, args.prune_train_mode, args.shuffle, args.invert)
            
            ## Logging pruning results
            logger.info(f"(Model{i} prune stats)")
            prune_result = metrics.summary(model, 
                                            pruner.scores,
                                            metrics.flop(model, input_shape, device),
                                            lambda p: generator.prunable(p, args.prune_batchnorm, args.prune_residual))
            total_params = int((prune_result['sparsity'] * prune_result['size']).sum())
            possible_params = prune_result['size'].sum()
            total_flops = int((prune_result['sparsity'] * prune_result['flops']).sum())
            possible_flops = prune_result['flops'].sum()
            logger.info("Parameter Sparsity: {}/{} ({:.4f})".format(total_params, possible_params, total_params / possible_params))
            logger.info("FLOP Sparsity: {}/{} ({:.4f})".format(total_flops, possible_flops, total_flops / possible_flops))
            
            
        logger.info(f"Pruning sparsity: {sparsity}")
        
        if args.imp_type == 'none':
            pass
        elif args.imp_type == 'weight':
            # Rewind Model's Weights
            for i, model in enumerate(models):
                model_dict = model.state_dict()
                original_dict = torch.load("{}/matching_ticket_e{:03d}_s{:03d}.pt".format(args.result_dir, args.matching_epochs, i+1), map_location=device)
                original_weights = dict(filter(lambda v: (v[0].endswith(('.weight', '.bias'))), original_dict.items()))
                model_dict.update(original_weights)
                model.load_state_dict(model_dict)
        elif args.imp_type == 'lr':
            pass
        
        
    # Train Model
    logger.info(f"=====Start post-training!=====")
    optimizer = opt_class(params, lr=args.lr, weight_decay=args.weight_decay, **opt_kwargs)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 
                                                                     args.post_epochs, 
                                                                     T_mult=1, 
                                                                     eta_min=0.)
    best_models = train_eval_loop_ens(args, logger, models, loss, optimizer, scheduler, trn_loader, val_loader,
                                    tst_loader, device, args.post_epochs, args.verbose, data_diversity=False )
    

    for i in range(args.num_models):
        torch.save(models[i].state_dict(),"{}/ckpt_r{:.2f}_i{:03d}_s{:03d}.pt".format(args.result_dir, args.imp_ratio, args.imp_iter, i+1))