import json
import os
import torch
from PIL import Image
import os, random, math
import numpy as np
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset
import models
import copy
import torch
import numpy as np
from torchvision import datasets, transforms

class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.int64):
            return int(obj)
        return json.JSONEncoder.default(self, obj)

def mnist_iid(dataset, num_users):
    """
    Sample I.I.D. client data from MNIST dataset
    :param dataset:
    :param num_users:
    :return: dict of image index
    """
    num_items = int(len(dataset) / num_users)
    dict_users, all_idxs = {}, [i for i in range(len(dataset))]
    for i in range(num_users):
        dict_users[i] = set(np.random.choice(all_idxs, num_items,
                                             replace=False))
        all_idxs = list(set(all_idxs) - dict_users[i])
    return dict_users


def mnist_noniid(dataset, num_users):
    """
    Sample non-I.I.D client data from MNIST dataset
    :param dataset:
    :param num_users:
    :return:
    """
    # 60,000 training imgs -->  200 imgs/shard X 300 shards
    num_shards, num_imgs = 25, int(len(dataset) / 25)
    idx_shard = [i for i in range(num_shards)]
    dict_users = {i: np.array([]) for i in range(num_users)}
    idxs = np.arange(num_shards * num_imgs)
    labels = dataset.targets.numpy()

    # sort labels
    idxs_labels = np.vstack((idxs, labels))
    idxs_labels = idxs_labels[:, idxs_labels[1, :].argsort()]
    idxs = idxs_labels[0, :]

    # divide and assign 2 shards/client
    for i in range(num_users):
        rand_set = set(np.random.choice(idx_shard, 5, replace=False))
        idx_shard = list(set(idx_shard) - rand_set)
        for rand in rand_set:
            dict_users[i] = np.concatenate(
                (dict_users[i], idxs[rand * num_imgs:(rand + 1) * num_imgs]), axis=0)
    return dict_users


def cifar_iid(dataset, num_users):
    """
    Sample I.I.D. client data from CIFAR10 dataset
    :param dataset:
    :param num_users:
    :return: dict of image index
    """
    num_items = int(len(dataset) / num_users)
    dict_users, all_idxs = {}, [i for i in range(len(dataset))]
    for i in range(num_users):
        dict_users[i] = set(np.random.choice(all_idxs, num_items,
                                             replace=False))
        all_idxs = list(set(all_idxs) - dict_users[i])
    return dict_users


def cifar_noniid(dataset, num_users):
    """
    Sample non-I.I.D client data from CIFAR10 dataset
    :param dataset:
    :param num_users:
    :return:
    """

    num_shards, num_imgs = 25, 2000
    idx_shard = [i for i in range(num_shards)]
    dict_users = {i: np.array([]) for i in range(num_users)}
    idxs = np.arange(num_shards * num_imgs)
    # labels = dataset.train_labels.numpy()
    labels = np.array(dataset.targets)

    # sort labels
    idxs_labels = np.vstack((idxs, labels))
    idxs_labels = idxs_labels[:, idxs_labels[1, :].argsort()]
    idxs = idxs_labels[0, :]

    # divide and assign
    for i in range(num_users):
        rand_set = set(np.random.choice(idx_shard, 5, replace=False))
        idx_shard = list(set(idx_shard) - rand_set)
        for rand in rand_set:
            dict_users[i] = np.concatenate(
                (dict_users[i], idxs[rand * num_imgs:(rand + 1) * num_imgs]), axis=0)
    return dict_users


def cifar100_noniid(dataset, num_users):
    """
    Sample non-I.I.D client data from CIFAR10 dataset
    :param dataset:
    :param num_users:
    :return:
    """

    num_shards, num_imgs = 500, 100
    idx_shard = [i for i in range(num_shards)]
    dict_users = {i: np.array([]) for i in range(num_users)}
    idxs = np.arange(num_shards * num_imgs)
    # labels = dataset.train_labels.numpy()
    labels = np.array(dataset.targets)

    # sort labels
    idxs_labels = np.vstack((idxs, labels))
    idxs_labels = idxs_labels[:, idxs_labels[1, :].argsort()]
    idxs = idxs_labels[0, :]

    # divide and assign
    for i in range(num_users):
        rand_set = set(np.random.choice(idx_shard, 100, replace=False))
        idx_shard = list(set(idx_shard) - rand_set)
        for rand in rand_set:
            dict_users[i] = np.concatenate(
                (dict_users[i], idxs[rand * num_imgs:(rand + 1) * num_imgs]), axis=0)
    return dict_users


class DatasetSplit(Dataset):
    """An abstract Dataset class wrapped around Pytorch Dataset class.
    """

    def __init__(self, dataset, idxs):
        self.dataset = dataset
        self.idxs = [int(i) for i in idxs]

    def __len__(self):
        return len(self.idxs)

    def __getitem__(self, item):
        image, label = self.dataset[self.idxs[item]]
        return torch.tensor(image), torch.tensor(label)


def get_dataset(args):
    """ Returns train and test datasets and a user group which is a dict where
    the keys are the user index and the values are the corresponding data for
    each of those users.
    """

    if args.dataset == 'cifar10':
        train_dataset = datasets.CIFAR10('/youtu-face-identify-public/jiezhang/data', train=True,
                                         transform=transforms.Compose(
                                             [
                                                 transforms.RandomCrop(32, padding=4),
                                                 transforms.RandomHorizontalFlip(),
                                                 transforms.ToTensor(),
                                             ]))

        test_dataset = datasets.CIFAR10('/youtu-face-identify-public/jiezhang/data', train=False,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))

        # sample training data amongst users
        if args.iid:
            # Sample IID user data from Mnist
            user_groups = cifar_iid(train_dataset, args.num_users)
        else:
            user_groups = cifar_noniid(train_dataset, args.num_users)
    elif args.dataset == 'mnist':
        data_dir = '/youtu-face-identify-public/jiezhang/data'
        apply_transform = transforms.Compose([
            transforms.ToTensor()])

        train_dataset = datasets.MNIST(data_dir, train=True, download=True,
                                       transform=apply_transform)

        test_dataset = datasets.MNIST(data_dir, train=False, download=True,
                                      transform=apply_transform)

        # sample training data amongst users
        if args.iid:
            # Sample IID user data from Mnist
            user_groups = mnist_iid(train_dataset, args.num_users)
        else:

            user_groups = mnist_noniid(train_dataset, args.num_users)
    elif args.dataset == "fmnist":
        data_dir = '/youtu-face-identify-public/jiezhang/data'
        apply_transform = transforms.Compose([
            transforms.ToTensor()])

        train_dataset = datasets.FashionMNIST(data_dir, train=True, download=True,
                                              transform=apply_transform)

        test_dataset = datasets.FashionMNIST(data_dir, train=False, download=True,
                                             transform=apply_transform)

        # sample training data amongst users
        if args.iid:
            # Sample IID user data from Mnist
            user_groups = mnist_iid(train_dataset, args.num_users)
        else:

            user_groups = mnist_noniid(train_dataset, args.num_users)
    elif args.dataset == "cifar100":
        train_dataset = datasets.CIFAR100('/youtu-face-identify-public/jiezhang/data', train=True,
                                          transform=transforms.Compose(
                                              [
                                                  transforms.RandomCrop(32, padding=4),
                                                  transforms.RandomHorizontalFlip(),
                                                  transforms.ToTensor(),
                                              ]))

        test_dataset = datasets.CIFAR100('/youtu-face-identify-public/jiezhang/data', train=False,
                                         transform=transforms.Compose([
                                             transforms.ToTensor(),
                                         ]))
        # sample training data amongst users
        if args.iid:
            # Sample IID user data from Mnist
            user_groups = cifar_iid(train_dataset, args.num_users)
        else:
            user_groups = cifar100_noniid(train_dataset, args.num_users)

    return train_dataset, test_dataset, user_groups


def mean_average_weights(w):
    """
    Returns the average of the weights.
    """
    w_avg = copy.deepcopy(w[0])
    for key in w_avg.keys():
        try:
            for i in range(1, len(w)):
                w_avg[key] += w[i][key]
            if 'num_batches_tracked' in key:
                w_avg[key] = w_avg[key].true_divide(len(w))
            else:
                w_avg[key] = torch.div(w_avg[key], len(w))
        except Exception as e:
            print(e)
            print(key)
            print(len(w))
    return w_avg

def mean_average_weights_baseline(w):
    """
    Returns the average of the weights.
    """
    w_avg = copy.deepcopy(w[0][0])
    for key in w_avg.keys():
        try:
            for i in range(1, len(w)):
                w_avg[key] += w[i][0][key]
            if 'num_batches_tracked' in key:
                w_avg[key] = w_avg[key].true_divide(len(w))
            else:
                w_avg[key] = torch.div(w_avg[key], len(w))
        except Exception as e:
            print(e)
            print(key)
            print(len(w))
    return w_avg

def federated_average_weights(w, net_dataidx_map):
    """
    Returns the average of the weights.
    """
    num_classes = len(net_dataidx_map[0])
    n_samples = {}
    for key, value_dict in net_dataidx_map.items():
        n_samples[key] = sum(value_dict.values())
    n_total = sum(n_samples.values())
    w_avg = copy.deepcopy(w[0])
    for key in w_avg.keys():
        w_avg[key] = (n_samples[0] / n_total) * w[0][key]
        for i in range(1, len(w)):
            w_avg[key] += (n_samples[i] / n_total) * w[i][key]
    return w_avg

def federated_average_weights_expand(w, net_dataidx_map):
    """
    Returns the average of the weights.
    """
    num_classes = len(net_dataidx_map[0])
    n_samples = []
    for i, value_dict in net_dataidx_map.items():
        n_samples.append(sum(value_dict.values()))
        n_samples.append(sum(value_dict.values()))
    n_total = sum(n_samples)
    w_avg = copy.deepcopy(w[0])
    for key in w_avg.keys():
        w_avg[key] = (n_samples[0] / n_total) * w[0][key]
        for i in range(1, len(w)):
            w_avg[key] += (n_samples[i] / n_total) * w[i][key]
    return w_avg


def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    cudnn.deterministic = True


def kldiv(logits, targets, T=1.0, reduction='batchmean'):
    q = F.log_softmax(logits / T, dim=1)
    p = F.softmax(targets / T, dim=1)
    return F.kl_div(q, p, reduction=reduction) * (T * T)


def pack_images(images, col=None, channel_last=False, padding=1):
    # N, C, H, W
    if isinstance(images, (list, tuple)):
        images = np.stack(images, 0)
    if channel_last:
        images = images.transpose(0, 3, 1, 2)  # make it channel first
    assert len(images.shape) == 4
    assert isinstance(images, np.ndarray)

    N, C, H, W = images.shape
    if col is None:
        col = int(math.ceil(math.sqrt(N)))
    row = int(math.ceil(N / col))

    pack = np.zeros((C, H * row + padding * (row - 1), W * col + padding * (col - 1)), dtype=images.dtype)
    for idx, img in enumerate(images):
        h = (idx // col) * (H + padding)
        w = (idx % col) * (W + padding)
        pack[:, h:h + H, w:w + W] = img
    return pack


def save_image_batch(imgs, output, col=None, size=None, pack=True):
    if isinstance(imgs, torch.Tensor):
        imgs = (imgs.detach().clamp(0, 1).cpu().numpy() * 255).astype('uint8')
    base_dir = os.path.dirname(output)
    if base_dir != '':
        os.makedirs(base_dir, exist_ok=True)
    if pack:
        imgs = pack_images(imgs, col=col).transpose(1, 2, 0).squeeze()
        imgs = Image.fromarray(imgs)
        if size is not None:
            if isinstance(size, (list, tuple)):
                imgs = imgs.resize(size)
            else:
                w, h = imgs.size
                max_side = max(h, w)
                scale = float(size) / float(max_side)
                _w, _h = int(w * scale), int(h * scale)
                imgs = imgs.resize([_w, _h])
        imgs.save(output)
    else:
        output_filename = output.strip('.png')
        for idx, img in enumerate(imgs):
            if img.shape[0] == 1:
                img = Image.fromarray(img[0])
            else:
                img = Image.fromarray(img.transpose(1, 2, 0))
            img.save(output_filename + '-%d.png' % (idx))


class LabeledImageDataset(torch.utils.data.Dataset):
    def __init__(self, root, transform=None):
        self.root = os.path.abspath(root)
        self.categories = [int(f) for f in os.listdir(root)]
        images = []
        targets = []
        for c in self.categories:
            category_dir = os.path.join(self.root, str(c))
            _images = [os.path.join(category_dir, f) for f in os.listdir(category_dir)]
            images.extend(_images)
            targets.extend([c for _ in range(len(_images))])
        self.images = images
        self.targets = targets
        self.transform = transform

    def __getitem__(self, idx):
        img, target = Image.open(self.images[idx]), self.targets[idx]
        if self.transform:
            img = self.transform(img)
        return img, target

    def __len__(self):
        return len(self.images)


def _collect_all_images(root, postfix=['png', 'jpg', 'jpeg', 'JPEG']):
    images = []
    if isinstance(postfix, str):
        postfix = [postfix]
    for dirpath, dirnames, files in os.walk(root):
        for pos in postfix:
            for f in files:
                if f.endswith(pos):
                    images.append(os.path.join(dirpath, f))
    return images


class UnlabeledImageDataset(torch.utils.data.Dataset):
    def __init__(self, root, transform=None):
        self.root = os.path.abspath(root)
        self.images = _collect_all_images(self.root)  # [ os.path.join(self.root, f) for f in os.listdir( root ) ]
        self.transform = transform

    def __getitem__(self, idx):
        img = Image.open(self.images[idx])
        if self.transform:
            img = self.transform(img)
        return img

    def __len__(self):
        return len(self.images)

    def __repr__(self):
        return 'Unlabeled data:\n\troot: %s\n\tdata mount: %d\n\ttransforms: %s' % (
            self.root, len(self), self.transform)


class ImagePool(object):
    def __init__(self, root):
        self.root = os.path.abspath(root)
        os.makedirs(self.root, exist_ok=True)
        self._idx = 0

    def add(self, imgs, targets=None):
        save_image_batch(imgs, os.path.join(self.root, "%d.png" % (self._idx)), pack=False)
        self._idx += 1

    def get_dataset(self, transform=None, labeled=True):
        return UnlabeledImageDataset(self.root, transform=transform)


class DeepInversionHook():
    '''
    Implementation of the forward hook to track feature statistics and compute a loss on them.
    Will compute mean and variance, and will use l2 as a loss
    '''

    def __init__(self, module):
        self.hook = module.register_forward_hook(self.hook_fn)
        self.module = module

    def hook_fn(self, module, input, output):  # hook_fn(module, input, output) -> None
        # hook co compute deepinversion's feature distribution regularization
        nch = input[0].shape[1]
        mean = input[0].mean([0, 2, 3])
        var = input[0].permute(1, 0, 2, 3).contiguous().view([nch, -1]).var(1, unbiased=False)
        # forcing mean and variance to match between two distributions
        # other ways might work better, i.g. KL divergence
        r_feature = torch.norm(module.running_var.data - var, 2) + torch.norm(
            module.running_mean.data - mean, 2)
        self.r_feature = r_feature

    def remove(self):
        self.hook.remove()


class KLDiv(nn.Module):
    def __init__(self, T=1.0, reduction='batchmean'):
        """

        :rtype: object
        """
        super().__init__()
        self.T = T
        self.reduction = reduction

    def forward(self, logits, targets):
        return kldiv(logits, targets, T=self.T, reduction=self.reduction)


def test(model, test_loader, device="cuda:0"):
    model.eval()
    test_loss = 0
    correct = 0
    # targets = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            # targets.append(target)
            output = model(data)
            test_loss += F.cross_entropy(output, target, size_average=False).item()  # sum up batch loss
            pred = torch.max(output, 1)[1]
            new = pred.eq(target.view_as(pred)).sum().item()
            # print(new)
            correct += new
    test_loss /= len(test_loader.dataset)
    acc = 100. * correct / len(test_loader.dataset)
    # targets = torch.cat(targets, dim=0)
    print('\n Test_set: Average loss: {:.4f}, Accuracy: {:.4f}\n'
          .format(test_loss, acc))
    return acc, test_loss


def record_net_data_stats(y_train, net_dataidx_map):
    net_cls_counts = {}

    for net_i, dataidx in net_dataidx_map.items():
        unq, unq_cnt = np.unique(y_train[dataidx], return_counts=True)
        tmp = {unq[i]: unq_cnt[i] for i in range(len(unq))}
        net_cls_counts[net_i] = tmp

    return net_cls_counts

#
# def partition_data(dataset, partition, n_parties, beta=0.4):
#     if dataset == 'mnist':
#         train_dataset = datasets.MNIST('/youtu-face-identify-public/jiezhang/data', train=True,
#                                        transform=transforms.Compose(
#                                            [transforms.ToTensor()]))
#
#         test_dataset = datasets.MNIST('/youtu-face-identify-public/jiezhang/data', train=False,
#                                       transform=transforms.Compose([
#                                           transforms.ToTensor(),
#                                       ]))
#     elif dataset == 'cifar10':
#         train_dataset = datasets.CIFAR10('/youtu-face-identify-public/jiezhang/data', train=True,
#                                          transform=transforms.Compose(
#                                              [
#                                                  transforms.RandomCrop(32, padding=4),
#                                                  transforms.RandomHorizontalFlip(),
#                                                  transforms.ToTensor(),
#                                              ]))
#
#         test_dataset = datasets.CIFAR10('/youtu-face-identify-public/jiezhang/data', train=False,
#                                         transform=transforms.Compose([
#                                             transforms.ToTensor(),
#                                         ]))
#     y_train = np.array(train_dataset.targets)
#     n_train = y_train.shape[0]
#
#     if partition == "iid":
#         idxs = np.random.permutation(n_train)
#         batch_idxs = np.array_split(idxs, n_parties)
#         net_dataidx_map = {i: batch_idxs[i] for i in range(n_parties)}
#
#     elif partition == "dirichlet":
#         min_size = 0
#         min_require_size = 10
#         K = 10
#         N = y_train.shape[0]
#         net_dataidx_map = {}
#
#         while min_size < min_require_size:
#             idx_batch = [[] for _ in range(n_parties)]
#             for k in range(K):
#                 idx_k = np.where(y_train == k)[0]
#                 np.random.shuffle(idx_k)
#                 proportions = np.random.dirichlet(np.repeat(beta, n_parties))
#                 # Balance
#                 proportions = np.array([p * (len(idx_j) < N / n_parties) for p, idx_j in zip(proportions, idx_batch)])
#                 proportions = proportions / proportions.sum()
#
#                 proportions = (np.cumsum(proportions) * len(idx_k)).astype(int)[:-1]
#
#                 idx_batch = [idx_j + idx.tolist() for idx_j, idx in zip(idx_batch, np.split(idx_k, proportions))]
#                 min_size = min([len(idx_j) for idx_j in idx_batch])
#
#         for j in range(n_parties):
#             np.random.shuffle(idx_batch[j])
#             net_dataidx_map[j] = idx_batch[j]
#
#     elif "shards_0" < partition <= "shards_9":
#         num = eval(partition[7:])
#         K = 10
#         if num == 10:
#             net_dataidx_map = {i: np.ndarray(0, dtype=np.int64) for i in range(n_parties)}
#             for i in range(10):
#                 idx_k = np.where(y_train == i)[0]
#                 np.random.shuffle(idx_k)
#                 split = np.array_split(idx_k, n_parties)
#                 for j in range(n_parties):
#                     net_dataidx_map[j] = np.append(net_dataidx_map[j], split[j])
#         else:
#             times = [0 for i in range(10)]
#             contain = []
#             for i in range(n_parties):
#                 current = [i % K]
#                 times[i % K] += 1
#                 j = 1
#                 while j < num:
#                     ind = random.randint(0, K - 1)
#                     if ind not in current:
#                         j = j + 1
#                         current.append(ind)
#                         times[ind] += 1
#                 contain.append(current)
#             net_dataidx_map = {i: np.ndarray(0, dtype=np.int64) for i in range(n_parties)}
#             for i in range(K):
#                 idx_k = np.where(y_train == i)[0]
#                 np.random.shuffle(idx_k)
#                 split = np.array_split(idx_k, times[i])
#                 ids = 0
#                 for j in range(n_parties):
#                     if i in contain[j]:
#                         net_dataidx_map[j] = np.append(net_dataidx_map[j], split[ids])
#                         ids += 1
#     traindata_cls_counts = record_net_data_stats(y_train, net_dataidx_map)
#     return train_dataset, test_dataset, net_dataidx_map, traindata_cls_counts
