import argparse
import os
import shutil
import random
import distutils.util
import numpy as np
import pandas as pd
import sys
import yaml

from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.optim as optim

# config_file = './../../env.yml'
config_file = './env.yml'
with open(config_file, 'r') as stream:
    yamlfile = yaml.safe_load(stream)
    root_dir = yamlfile['root_dir']
    src_dir = yamlfile['src_dir']

sys.path.append(src_dir)
sys.path.append(os.path.join(src_dir, 'attack'))
sys.path.append(os.path.join(src_dir, 'models'))
from attack.dsq_attack import system_attack
from utils import mkdir_p, AverageMeter, accuracy, print_acc_conf, TrainRecorder
from cifar_utils import transform_train, transform_train_aug, transform_test, Cifardata, DistillCifardata, WarmUpLR, \
    ModelwNorm
from cifar100.models.model_selector import get_network

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


class CifardataIdx(data.Dataset):
    def __init__(self, data, labels, transform):
        self.data = data
        self.transform = transform
        self.labels = labels
        self.indexs = np.arange(len(labels), dtype=int)

    def __getitem__(self, index):
        img = Image.fromarray((self.data[index].transpose(1,2,0).astype(np.uint8)))
        label = self.labels[index]
        img = self.transform(img)
        idx = self.indexs[index]
        return img, label, idx

    def __len__(self):
        return len(self.labels)


def train(trainloader, testloader, model, criterion, optimizer, warmup_scheduler, epoch, args):
    # the importance list
    vals = []
    indexes = []
    # switch to train mode
    model.train()

    losses = AverageMeter()
    top1 = AverageMeter()

    for batch_ind, (inputs, targets, idxs) in enumerate(trainloader):
        model.train()

        if epoch <= args.warmup:
            warmup_scheduler.step()

        inputs = inputs.to(device, torch.float)
        targets = targets.to(device, torch.long)

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # compute gradient and do SGD step        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # add importance
        vals += [score(model, testloader)] * len(idxs)
        indexes += list(idxs)

        # measure accuracy and record loss
        prec1, _ = accuracy(outputs.data, targets.data, topk=(1, 5))
        losses.update(loss.item(), inputs.size()[0])
        top1.update(prec1.item() / 100.0, inputs.size()[0])

    vals = np.array(vals, dtype=np.float32)
    indexes = np.array(indexes, dtype=int)

    return (losses.avg, top1.avg, vals, indexes)


def score(model, testloader):
    correct_pred = 0.0
    n = 0

    with torch.no_grad():
        model.eval()
        for batch_ind, (X, y_true) in enumerate(testloader):
        #for X, y_true,index in testloader:
            X = X.to(device)
            model = model.to(device)
            y_true = y_true.to(device)

            y_hat = model(X)
            y_prob = F.softmax(y_hat,dim=0).detach().cpu().numpy()
            #_, predicted_labels = torch.max(y_prob, 1)
            y = y_true.detach().cpu().numpy()

            n += y_true.size(0)
            #correct_pred += (predicted_labels == y_true).sum()
            correct_pred += my_xe_score(y_prob, y)

    return float(correct_pred / n)


def my_xe_score(probs, y):
    true_probs = probs[np.arange(len(y)), y]
    true_log_probs = -np.log(np.clip(true_probs, 1e-12, None))
    return true_log_probs.sum()


def test(testloader, model, criterion):
    model.eval()

    losses = AverageMeter()
    top1 = AverageMeter()

    for batch_ind, (inputs, targets) in enumerate(testloader):
        inputs = inputs.to(device, torch.float)
        targets = targets.to(device, torch.long)
        outputs = model(inputs)

        loss = criterion(outputs, targets)

        prec1, _ = accuracy(outputs.data, targets.data, topk=(1, 5))
        losses.update(loss.item(), inputs.size()[0])
        top1.update(prec1.item() / 100.0, inputs.size()[0])

    return (losses.avg, top1.avg)


def save_checkpoint(state, is_best, acc, checkpoint):
    if not os.path.isdir(checkpoint):
        mkdir_p(checkpoint)
    filepath = os.path.join(checkpoint, 'model_last.pth.tar')
    torch.save(state, filepath)
    if is_best:
        shutil.copyfile(filepath, os.path.join(checkpoint, 'model_best.pth.tar'))


def get_learning_rate(optimizer):
    lr = []
    for param_group in optimizer.param_groups:
        lr += [param_group['lr']]
    return lr


def shuffle_backward(l, order):
    l_out = np.zeros((len(l)), dtype=np.float32)
    for i, j in enumerate(order):
        l_out[j] = l[i]
    return l_out


def main():
    parser = argparse.ArgumentParser(description='setting for cifar100')
    parser.add_argument('--model', type=str, default='mobilenetv3_small_50')
    parser.add_argument('--classifier_epochs', type=int, default=200, help='classifier epochs')
    parser.add_argument('--attack_epochs', type=int, default=150, help='attack epochs in NN attack')
    parser.add_argument('--print_epoch', type=int, default=5,
                        help='print model training stats per print_epoch_splitai during splitai training')
    parser.add_argument('--batch_size', type=int, default=256, help='batch size')
    parser.add_argument('--warmup', type=int, default=1, help='warm up epochs')
    parser.add_argument('--num_worker', type=int, default=1, help='number workers')
    parser.add_argument('--num_class', type=int, default=100, help='num class')
    parser.add_argument('--data_aug', type=distutils.util.strtobool, default=True, help='turn on data augmentation')
    parser.add_argument('--run_idx', type=int, default=100, help='idx running')
    parser.add_argument('--save_path', default='save_checkpoints/', type=str, help='folder to save the checkpoints')
    parser.add_argument('--load_path', default='save_checkpoints/', type=str, help='folder to load the checkpoints')

    args = parser.parse_args()
    print(dict(args._get_kwargs()))

    attack_epochs = args.attack_epochs
    batch_size = args.batch_size
    num_class = args.num_class
    classifier_epochs = args.classifier_epochs
    print_epoch = args.print_epoch
    warmup = args.warmup
    num_worker = args.num_worker

    DATASET_PATH = os.path.join(root_dir, 'cifar100', 'data')
    checkpoint_path = os.path.join(args.save_path, 'cifar100', args.model, 'shapley',
                                   'aug' if args.data_aug else 'no_aug', 'original', str(args.run_idx))
    print(checkpoint_path)

    train_data_tr_attack = np.load(os.path.join(DATASET_PATH, 'partition', 'tr_data.npy'))
    train_label_tr_attack = np.load(os.path.join(DATASET_PATH, 'partition', 'tr_label.npy'))
    train_data_te_attack = np.load(os.path.join(DATASET_PATH, 'partition', 'te_data.npy'))
    train_label_te_attack = np.load(os.path.join(DATASET_PATH, 'partition', 'te_label.npy'))
    train_data = np.load(os.path.join(DATASET_PATH, 'partition', 'train_data.npy'))
    train_label = np.load(os.path.join(DATASET_PATH, 'partition', 'train_label.npy'))
    test_data = np.load(os.path.join(DATASET_PATH, 'partition', 'test_data.npy'))
    test_label = np.load(os.path.join(DATASET_PATH, 'partition', 'test_label.npy'))
    ref_data = np.load(os.path.join(DATASET_PATH, 'partition', 'ref_data.npy'))
    ref_label = np.load(os.path.join(DATASET_PATH, 'partition', 'ref_label.npy'))
    all_test_data = np.load(os.path.join(DATASET_PATH, 'partition', 'all_test_data.npy'))
    all_test_label = np.load(os.path.join(DATASET_PATH, 'partition', 'all_test_label.npy'))

    # print first 20 labels for each subset, for checking with other experiments
    print(train_label_tr_attack[:20])
    print(train_label_te_attack[:20])
    print(test_label[:20])
    print(ref_label[:20])

    # if data augmented
    if args.data_aug:
        trainset = CifardataIdx(train_data, train_label, transform_train_aug)
    else:
        trainset = CifardataIdx(train_data, train_label, transform_train)
    # load dataset
    # trainset = Cifardata(train_data, train_label, transform_train)
    traintestset = Cifardata(train_data, train_label, transform_test)
    testset = Cifardata(test_data, test_label, transform_test)
    refset = Cifardata(ref_data, ref_label, transform_test)

    trset = Cifardata(train_data_tr_attack, train_label_tr_attack, transform_test)
    teset = Cifardata(train_data_te_attack, train_label_te_attack, transform_test)
    alltestset = Cifardata(all_test_data, all_test_label, transform_test)

    trloader = torch.utils.data.DataLoader(trset, batch_size=batch_size, shuffle=False, num_workers=num_worker)
    teloader = torch.utils.data.DataLoader(teset, batch_size=batch_size, shuffle=False, num_workers=num_worker)
    alltestloader = torch.utils.data.DataLoader(alltestset, batch_size=batch_size, shuffle=False,
                                                num_workers=num_worker)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_worker)
    traintestloader = torch.utils.data.DataLoader(traintestset, batch_size=batch_size, shuffle=False,
                                                  num_workers=num_worker)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_worker)
    refloader = torch.utils.data.DataLoader(refset, batch_size=batch_size, shuffle=False, num_workers=num_worker)

    best_acc = 0.00
    model_1 = get_network(arch=args.model, num_classes=100)
    model = ModelwNorm(model_1)

    criterion = nn.CrossEntropyLoss()
    model = model.to(device, torch.float)
    criterion = criterion.to(device, torch.float)

    iter_per_epoch = len(trainloader)
    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    train_scheduler = optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=[60, 120, 160], gamma=0.2
    )  # learning rate decay
    warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * warmup)

    print("training sets: {:d}".format(len(trainset)))

    trer = TrainRecorder()
    imp_scores = np.zeros((len(trainset)), dtype=np.float32)
    best_epoch = 0
    for epoch in range(1, classifier_epochs + 1):
        if epoch > 1:
            train_scheduler.step(epoch)

        training_loss, training_acc, vals, idxs = \
            train(trainloader, alltestloader, model, criterion, optimizer, warmup_scheduler, epoch, args)
        # eval
        train_loss, train_acc = test(traintestloader, model, criterion)
        test_loss, test_acc = test(testloader, model, criterion)
        # update importance score
        imp_scores = (imp_scores[idxs] * (epoch-1)) / epoch + (vals - np.roll(vals, 1)) / epoch
        imp_scores = shuffle_backward(imp_scores, idxs)
        # record
        trer.update(train_loss, train_acc, test_loss, test_acc, training_loss, training_acc)
        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        if is_best:
            best_epoch = epoch
            # save the best
            save_checkpoint({
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'acc': test_acc,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            }, is_best, best_acc, checkpoint=checkpoint_path)

        # if (epoch)%print_epoch ==0:
        lr = get_learning_rate(optimizer)
        print(
            'Epoch: [{:d} | {:d}]: learning rate:{:.4f}. acc: training|train|test: {:.4f}|{:.4f}|{:.4f}. loss: training|train|test: {:.4f}|{:.4f}|{:.4f}'.format(
                epoch, classifier_epochs, lr[0],
                training_acc, train_acc, test_acc, training_loss, train_loss, test_loss
            )
        )
        sys.stdout.flush()
    # save the record
    trer.save(checkpoint_path, 'train_record.csv')
    # save importance score
    print("Imp Score:", imp_scores)
    np.save(f'{checkpoint_path}/imp.npy', imp_scores)
    # np.save(f'{checkpoint_path}/idx.npy', idxs)
    # save the last
    save_checkpoint({
        'epoch': epoch,
        'state_dict': model.state_dict(),
        'acc': test_acc,
        'best_acc': best_acc,
        'optimizer': optimizer.state_dict(),
    }, False, best_acc, checkpoint=checkpoint_path)

    print("best acc: {:.4f}".format(best_acc))
    print("Final saved epoch {:d} acc: {:.4f}".format(best_epoch, best_acc))


if __name__ == '__main__':
    main()
