import torch
from torch.utils.data import Dataset
import torchvision
from torchvision import transforms
import os

class MNISTNoisy(Dataset):
    def __init__(self, train=True, download=False):
        self.name = 'mnist_noisy'
        self.train = train
        datasets_dir = os.path.abspath(__file__).split('data/')[0] + 'data/'
        path = f'{datasets_dir}/mnist/data/'
        self.dataset = torchvision.datasets.MNIST(root=path, train=train,
                                                  download=download, transform=transforms.ToTensor())
        if train:
            # Load noisy labels
            folder = os.path.dirname(os.path.abspath(__file__))
            noisy_labels = torch.load(os.path.join(folder, 'noisy_labels.pt'))
            self.dataset.targets = noisy_labels

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        return self.dataset[idx]

if __name__ == "__main__":
    # Load dataset and generate noisy mask for the labels with seed 42
    from data import MNIST
    from data.utils import set_seed
    mnist_trainset = MNIST(train=True, download=True)
    n_train = len(mnist_trainset)
    set_seed(42)
    flipped_indices = torch.randperm(n_train)[:int(n_train*0.1)]
    targets = mnist_trainset.dataset.targets.clone()
    for idx in flipped_indices:
        while targets[idx] == mnist_trainset.dataset.targets[idx]:
            targets[idx] = torch.randint(0, 10, (1,))

    folder = os.path.dirname(os.path.abspath(__file__))
    torch.save(flipped_indices, os.path.join(folder, 'flipped_indices.pt'))
    torch.save(targets, os.path.join(folder, 'noisy_labels.pt'))