import math

import torch
import torch.distributed as dist

from torch.utils.data.sampler import Sampler


class DistributedSampler(Sampler):
    """Sampler that restricts data loading to a subset of the dataset.

    It is especially useful in conjunction with
    :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
    process can pass a DistributedSampler instance as a DataLoader sampler,
    and load a subset of the original dataset that is exclusive to it.

    .. note::
        Dataset is assumed to be of constant size.

    Arguments:
        dataset: Dataset used for sampling.
        num_replicas (optional): Number of processes participating in
            distributed training.
        rank (optional): Rank of the current process within num_replicas.
        shuffle (optional): If true (default), sampler will shuffle the indices
        num_samples (optional): controls how many samples to assign to each replica.
            If nothing is specified, the entire dataset is split evenly amongst replicas.
    """

    def __init__(self, dataset, num_samples=None, num_replicas=None, rank=None, shuffle=True):
        if num_replicas is None:
            if not dist.is_available():
                raise RuntimeError("Requires distributed package to be available")
            num_replicas = dist.get_world_size()
        if rank is None:
            if not dist.is_available():
                raise RuntimeError("Requires distributed package to be available")
            rank = dist.get_rank()
        self.dataset = dataset
        self.num_replicas = num_replicas
        self.rank = rank
        self.epoch = 0

        if num_samples is None:
            self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
        else:
            self.num_samples = num_samples

        self.total_size = self.num_samples * self.num_replicas
        self.shuffle = shuffle

    def __iter__(self):
        # deterministically shuffle based on epoch
        g = torch.Generator()
        g.manual_seed(self.epoch)
        if self.shuffle:
            indices = torch.randperm(len(self.dataset), generator=g).tolist()
        else:
            indices = list(range(len(self.dataset)))

        # add extra samples to make it evenly divisible
        indices += indices[: (self.total_size - len(indices))]
        assert len(indices) == self.total_size

        # subsample
        indices = indices[self.rank : self.total_size : self.num_replicas]
        assert len(indices) == self.num_samples

        return iter(indices)

    def __len__(self):
        return self.num_samples

    def set_epoch(self, epoch):
        self.epoch = epoch

    def __str__(self):
        return "DistributedSampler(num_replicas={num_replicas},rank={rank},shuffle={shuffle})".format(
            num_replicas=self.num_replicas, rank=self.rank, shuffle=self.shuffle
        )


class DecentralizedNonIIDSampler(DistributedSampler):
    def __iter__(self):
        nlabels = len(self.dataset.classes)

        # deterministically shuffle based on epoch
        g = torch.Generator()
        g.manual_seed(0)

        indices = []
        for i in range(nlabels):
            indices_i = torch.nonzero(self.dataset.targets == i)

            indices_i = indices_i.flatten().tolist()
            indices += indices_i

        # add extra samples to make it evenly divisible
        indices += indices[: (self.total_size - len(indices))]
        assert len(indices) == self.total_size

        # subsample
        indices = indices[
            self.rank * self.num_samples : (self.rank + 1) * self.num_samples
        ]
        assert len(indices) == self.num_samples

        if self.shuffle:
            g = torch.Generator()
            g.manual_seed(self.epoch)
            idx_idx = torch.randperm(len(indices), generator=g).tolist()
            indices = [indices[i] for i in idx_idx]

        return iter(indices)

    def __str__(self):
        return "DecentralizedNonIIDSampler(num_replicas={num_replicas},rank={rank},shuffle={shuffle})".format(
            num_replicas=self.num_replicas, rank=self.rank, shuffle=self.shuffle
        )


class NONIIDLTSampler(DistributedSampler):
    """NONIID + Long-Tail sampler.
    noniid: controls the noniidness.
    beta: beta controls the long-tailness.
        - Class i takes beta ** i percent of data.



    Taken from: https://github.com/epfml/byzantine-robust-noniid-optimizer/
    """

    def __init__(self, noniid, beta, dataset, num_samples=None, num_replicas=None, rank=None, shuffle=True):
        if num_replicas is None:
            if not dist.is_available():
                raise RuntimeError("Requires distributed package to be available")
            num_replicas = dist.get_world_size()
        if rank is None:
            if not dist.is_available():
                raise RuntimeError("Requires distributed package to be available")
            rank = dist.get_rank()
        self.dataset = dataset
        self.num_replicas = num_replicas
        self.rank = rank
        self.epoch = 0

        if num_samples is None:
            self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
        else:
            self.num_samples = num_samples

        self.total_size = self.num_samples * self.num_replicas
        self.shuffle = shuffle

        self.noniid = noniid
        self.beta = beta
        assert beta >= 0 and beta <= 1

    def __iter__(self):
        # The dataset are not shuffled across nodes.
        g = torch.Generator()
        g.manual_seed(0)

        if self.shuffle:
            indices = torch.randperm(len(self.dataset), generator=g).tolist()
        else:
            indices = list(range(len(self.dataset)))

        nlabels = len(self.dataset.classes)
        indices = []
        for i in range(nlabels):
            label_indices = torch.nonzero(self.dataset.targets == i)
            label_indices = label_indices.flatten().tolist()
            label_selected = int(len(label_indices) * self.beta ** i)
            # discard the rest of label_indices[label_selected:]
            indices += label_indices[:label_selected]

        # Adjust after removing data points.
        if self.num_samples is None:
            self.num_samples = int(math.ceil(len(indices) * 1.0 / self.num_replicas))
        self.total_size = self.num_samples * self.num_replicas

        # add extra samples to make it evenly divisible
        indices += indices[: (self.total_size - len(indices))]
        assert len(indices) == self.total_size

        if self.noniid:
            # NONIID
            indices = indices[
                self.rank * self.num_samples : (self.rank + 1) * self.num_samples
            ]
        else:
            # IID
            indices = indices[self.rank : self.total_size : self.num_replicas]

        assert len(indices) == self.num_samples

        if self.shuffle:
            g = torch.Generator()
            g.manual_seed(self.rank ** 3 + self.epoch)
            idx_idx = torch.randperm(len(indices), generator=g).tolist()
            indices = [indices[i] for i in idx_idx]

        return iter(indices)

    def __str__(self):
        return "NONIIDLTSampler"


def train_iid_sampler_callback(rank, n, examples_per_worker):
    return lambda x: DistributedSampler(
        dataset=x,
        num_samples=examples_per_worker,
        num_replicas=n,
        rank=rank,
        shuffle=True
    )


def train_noniid_sampler_callback(rank, n_good, examples_per_worker):
    if rank >= n_good:
        # Byzantine workers
        return lambda x: DistributedSampler(
            dataset=x,
            num_samples=examples_per_worker,
            num_replicas=n_good,
            rank=rank % (n_good),
            shuffle=True,
        )

    return lambda x: NONIIDLTSampler(
        noniid=True,
        beta=1.0,
        num_samples=examples_per_worker,
        num_replicas=n_good,
        rank=rank,
        shuffle=True,
        dataset=x,
    )


def test_iid_sampler_callback():
    return lambda x: DistributedSampler(dataset=x,
                                        num_replicas=1,
                                        rank=0,
                                        shuffle=False)


def test_noniid_sampler_callback():
    # (the noniid argument isn't important since there's only 1 replica);
    # both samplers included for completeness
    return lambda x: NONIIDLTSampler(noniid=False,
                                        beta=1.0,
                                        num_replicas=1,
                                        rank=0,
                                        shuffle=False,
                                        dataset=x)
