import math
import torch
from torch.utils.data.sampler import Sampler
import torch.distributed as dist
from copy import deepcopy
import matplotlib.pyplot as plt
import seaborn as sns


class DistributedSampler(Sampler):
    """Sampler that restricts data loading to a subset of the dataset.

    It is especially useful in conjunction with
    :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
    process can pass a DistributedSampler instance as a DataLoader sampler,
    and load a subset of the original dataset that is exclusive to it.

    .. note::
        Dataset is assumed to be of constant size.

    Arguments:
        dataset: Dataset used for sampling.
        num_replicas (optional): Number of processes participating in
            distributed training.
        rank (optional): Rank of the current process within num_replicas.
        shuffle (optional): If true (default), sampler will shuffle the indices
    """

    def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True, batchsize=32):
        if num_replicas is None:
            if not dist.is_available():
                raise RuntimeError("Requires distributed package to be available")
            num_replicas = dist.get_world_size()
        if rank is None:
            if not dist.is_available():
                raise RuntimeError("Requires distributed package to be available")
            rank = dist.get_rank()
        self.dataset = dataset
        self.num_replicas = num_replicas
        self.rank = rank
        self.epoch = 0
        self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
        self.total_size = self.num_samples * self.num_replicas
        self.shuffle = shuffle
        self.batchsize = batchsize

    def __iter__(self):
        # deterministically shuffle based on epoch
        g = torch.Generator()
        g.manual_seed(self.epoch)
        if self.shuffle:
            indices = torch.randperm(len(self.dataset), generator=g).tolist()
        else:
            indices = list(range(len(self.dataset)))

        # add extra samples to make it evenly divisible
        indices += indices[: (self.total_size - len(indices))]
        assert len(indices) == self.total_size

        # subsample
        indices = indices[self.rank : self.total_size : self.num_replicas]
        assert len(indices) == self.num_samples

        return iter(indices)

    def __len__(self):
        return self.num_samples

    def set_epoch(self, epoch):
        self.epoch = epoch

    def __str__(self):
        return "DistributedSampler(num_replicas={num_replicas},rank={rank},shuffle={shuffle})".format(
            num_replicas=self.num_replicas, rank=self.rank, shuffle=self.shuffle
        )


class DecentralizedNonIIDSampler(DistributedSampler):
    def __iter__(self):
        nlabels = len(self.dataset.classes)

        # deterministically shuffle based on epoch
        g = torch.Generator()
        g.manual_seed(0)

        indices = []
        for i in range(nlabels):
            indices_i = torch.nonzero(self.dataset.targets == i)

            indices_i = indices_i.flatten().tolist()
            indices += indices_i

        # add extra samples to make it evenly divisible
        indices += indices[: (self.total_size - len(indices))]
        assert len(indices) == self.total_size

        # subsample
        indices = indices[
            self.rank * self.num_samples : (self.rank + 1) * self.num_samples
        ]
        assert len(indices) == self.num_samples

        if self.shuffle:
            g = torch.Generator()
            g.manual_seed(self.epoch)
            idx_idx = torch.randperm(len(indices), generator=g).tolist()
            indices = [indices[i] for i in idx_idx]

        return iter(indices)

    def __str__(self):
        return "DecentralizedNonIIDSampler(num_replicas={num_replicas},rank={rank},shuffle={shuffle})".format(
            num_replicas=self.num_replicas, rank=self.rank, shuffle=self.shuffle
        )


class DecentralizedMixedSampler(DistributedSampler):
    def __init__(self, noniid_percent, *args, **kwargs):
        super(DecentralizedMixedSampler, self).__init__(*args, **kwargs)
        self.noniid_percent = noniid_percent

    def __iter__(self):
        nlabels = len(self.dataset.classes)

        # deterministically shuffle based on epoch
        g = torch.Generator()
        g.manual_seed(0)

        all_indices = torch.randperm(len(self.dataset), generator=g).tolist()

        iid_count = int((1 - self.noniid_percent) * len(all_indices))
        iid_count = iid_count - (iid_count % self.num_replicas)
        iid_indices, noniid_indices = all_indices[:iid_count], all_indices[iid_count:]

        indices = []
        for i in range(nlabels):
            indices_i = torch.nonzero(self.dataset.targets == i)
            indices_i = indices_i.flatten().tolist()
            # Find those in the noniid parts
            indices_i = set(indices_i).intersection(set(noniid_indices))
            indices += indices_i

        # add extra samples to make it evenly divisible
        indices += indices[: (self.total_size - iid_count - len(indices))]
        assert len(indices) + iid_count == self.total_size

        # subsample
        num_noniid_samples_per_node = self.num_samples - iid_count // self.num_replicas
        indices = indices[
            self.rank
            * num_noniid_samples_per_node : (self.rank + 1)
            * num_noniid_samples_per_node
        ]
        # Add iid part
        indices += iid_indices[self.rank : iid_count : self.num_replicas]
        assert len(indices) == self.num_samples, (len(indices), self.num_samples)

        if self.shuffle:
            g = torch.Generator()
            g.manual_seed(self.epoch)
            idx_idx = torch.randperm(len(indices), generator=g).tolist()
            indices = [indices[i] for i in idx_idx]

        return iter(indices)


class NONIIDLTSampler(DistributedSampler):
    """NONIID + Long-Tail sampler.

    alpha: alpha controls the noniidness.
        - alpha = 0 refers to completely noniid
        - alpha = 1 refers to iid.

    beta: beta controls the long-tailness.
        - Class i takes beta ** i percent of data.
    """

    def __init__(self, alpha, beta, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.alpha = alpha
        self.beta = beta
        assert beta >= 0 and beta <= 1
        assert alpha >= 0

    def __iter__(self):
        # The dataset are not shuffled across nodes.
        g = torch.Generator()
        g.manual_seed(0)

        if self.shuffle:
            indices = torch.randperm(len(self.dataset), generator=g).tolist()
        else:
            indices = list(range(len(self.dataset)))

        nlabels = len(self.dataset.classes)
        indices = []
        for i in range(nlabels):
            label_indices = torch.nonzero(self.dataset.targets == i)
            label_indices = label_indices.flatten().tolist()
            g = torch.Generator()
            g.manual_seed(self.epoch)
            idx_idx = torch.randperm(len(label_indices), generator=g).tolist()
            label_indices = [label_indices[i] for i in idx_idx]
            label_selected = int(len(label_indices) * self.beta ** i)
            # discard the rest of label_indices[label_selected:]
            indices += label_indices[:label_selected]

        # Adjust after removing data points.
        self.num_samples = int(math.ceil(len(indices) * 1.0 / self.num_replicas))
        self.total_size = self.num_samples * self.num_replicas

        # add extra samples to make it evenly divisible
        indices += indices[: (self.total_size - len(indices))]
        assert len(indices) == self.total_size

        #
        if self.alpha:
            # IID
            indices = indices[self.rank : self.total_size : self.num_replicas]
        else:
            # NONIID
            indices = indices[
                self.rank * self.num_samples : (self.rank + 1) * self.num_samples
            ]
            # print("dataset for", self.rank, [self.dataset.targets[i].item() for i in indices])
        assert len(indices) == self.num_samples

        if self.shuffle:
            g = torch.Generator()
            g.manual_seed(self.rank ** 3 + self.epoch)
            idx_idx = torch.randperm(len(indices), generator=g).tolist()
            indices = [indices[i] for i in idx_idx]

        return iter(indices)

    def __str__(self):
        return "NONIIDLTSampler"


class DirichletSampler(DistributedSampler):
    def __init__(self, alpha, *args, **kwargs):
        super(DirichletSampler, self).__init__(*args, **kwargs)
        self.alpha = alpha
        self.indices = []

    def visualize_proportions(self, proportions):
        plt.figure(figsize=(10, 6))
        sns.heatmap(proportions, annot=True, cmap='YlGnBu', cbar=True)
        plt.title("Proportions of Samples for Each Label and Worker")
        plt.xlabel("Worker")
        plt.ylabel("Label")
        plt.show()

    def __iter__(self):
        if not self.indices:
            nlabels = len(self.dataset.classes)
            n_samples_per_label = len(self.dataset) // nlabels

            flag = True
            dirichlet_seed = 0
            while flag:
                # g = torch.Generator()
                # g.manual_seed(dirichlet_seed)
                with torch.random.fork_rng():
                    # Variable Data Allocation
                    # dirichlet_seed = original_seed + dirichlet_seed
                    # Fixed Data Allocation
                    torch.manual_seed(dirichlet_seed)

                    # Create a Dirichlet distribution with a concentration parameter of alpha
                    # dirichlet_dist = torch.distributions.dirichlet.Dirichlet(torch.full((nlabels,), self.alpha))
                    dirichlet_dist = torch.distributions.dirichlet.Dirichlet(
                        torch.full((self.num_replicas,), self.alpha))

                    # # For each worker, draw a sample from the Dirichlet distribution
                    # proportions = dirichlet_dist.sample((self.num_replicas,))
                    # For each label, draw a sample from the Dirichlet distribution to determine its distribution across workers
                    proportions = torch.stack([dirichlet_dist.sample() for _ in range(nlabels)])

                amount_all_workers = []
                for worker in range(self.num_replicas):
                    amnt = 0
                    for prop in proportions[:, worker]:
                        amnt += int(prop * n_samples_per_label)
                    amount_all_workers.append(amnt)
                if min(amount_all_workers) >= self.batchsize:
                    print(dirichlet_seed, amount_all_workers, sum(amount_all_workers))
                    flag = False
                dirichlet_seed += (dirichlet_seed + 2) ** 3

            # Visualize the proportions
            # self.visualize_proportions(proportions)

            # Shuffle the indices
            g = torch.Generator()
            g.manual_seed(0)
            all_indices = torch.randperm(len(self.dataset), generator=g).tolist()

            # Initialize list to store indices for each worker
            indices_per_worker = [[] for _ in range(self.num_replicas)]

            for class_idx in range(nlabels):
                # Get indices for the current class
                label_indices = (self.dataset.targets == class_idx).nonzero(as_tuple=False).flatten().tolist()
                label_indices = label_indices[:n_samples_per_label]  # ensure we take only the first n_samples_per_label
                # Shuffle the indices
                idx_idx = torch.randperm(len(label_indices), generator=g).tolist()
                label_indices = [label_indices[i] for i in idx_idx]

                # Distribute indices among workers according to the Dirichlet proportions
                start = 0
                for worker, prop in enumerate(proportions[class_idx]):
                    count = int(prop * len(label_indices))
                    # if start + count > len(label_indices):
                    #     label_indices = label_indices + label_indices
                    indices_per_worker[worker].extend(label_indices[start : start + count])
                    start += count

            # Select the indices for this worker
            self.indices = indices_per_worker[self.rank]
            g = torch.Generator()
            g.manual_seed(self.rank)
            idx_idx = torch.randperm(len(self.indices), generator=g).tolist()
            self.indices = [self.indices[i] for i in idx_idx]
            while len(self.indices) < self.num_samples:
                self.indices = self.indices + self.indices
            #
            # # Add extra samples to make it evenly divisible
            # indices += indices[: (self.total_size - len(indices))]
            # assert len(indices) == self.total_size
            #
            # # subsample
            # indices = indices[self.rank : self.total_size : self.num_replicas]
            # assert len(indices) == self.num_samples

        indices = deepcopy(self.indices)
        if self.shuffle:
            g = torch.Generator()
            g.manual_seed(self.rank ** 3 + self.epoch)
            idx_idx = torch.randperm(len(indices), generator=g).tolist()
            indices = [indices[i] for i in idx_idx]

        indices = indices[:self.num_samples]
        assert len(indices) == self.num_samples, [len(indices), self.num_samples]

        return iter(indices)

    def __str__(self):
        return "DirichletSampler"

