import os
import pickle
import random
import tarfile
import urllib.request

import numpy as np
import pandas as pd
import torch
import torchvision
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset, TensorDataset


def train_val_split(labels, n_labeled, positive_label_list):
    labels = np.array(labels)
    label_types = np.unique(labels)
    train_labeled_idxs = []
    train_unlabeled_idxs = []
    n_labeled_per_class = int(n_labeled / len(positive_label_list))

    num_all = len(labels)
    num_positive = 0

    for i in label_types:
        idxs = np.where(labels == i)[0]
        np.random.shuffle(idxs)

        train_unlabeled_idxs.extend(idxs)
        if i in positive_label_list:
            train_labeled_idxs.extend(idxs[:n_labeled_per_class])
            num_positive += len(idxs)

    np.random.shuffle(train_labeled_idxs)
    np.random.shuffle(train_unlabeled_idxs)

    prior = num_positive / num_all

    return train_labeled_idxs, train_unlabeled_idxs, prior


def normalise_fashionmnist(x, mean, std):
    x, mean, std = [np.array(a, np.float32) for a in (x, mean, std)]
    x -= mean
    x /= std
    return x


def _3D_to_4(x):
    '''
    :param x: For mnist, it is a tensor of shape (len, 28, 28)
    :return: a tensor of shape (len, 1, 28, 28)
    '''
    return x.reshape(x.shape[0], 1, x.shape[1], x.shape[2])


def normalise(x, mean, std):
    x, mean, std = [np.array(a, np.float32) for a in (x, mean, std)]
    x -= mean * 255
    x *= 1.0 / (255 * std)
    return x


def transpose(x, source='NHWC', target='NCHW'):
    '''
    N: batch size
    H: height
    W: weight
    C: channel
    '''
    return x.transpose([source.index(d) for d in target])


class FashionMNIST_labeled(torchvision.datasets.FashionMNIST):
    def __init__(self,
                 root,
                 indexs=None,
                 train=True,
                 transform=None,
                 target_transform=None,
                 download=True):
        super(FashionMNIST_labeled,
              self).__init__(root,
                             train=train,
                             transform=transform,
                             target_transform=target_transform,
                             download=download)
        if indexs is not None:
            self.data = self.data[indexs]
            self.targets = np.array(self.targets)[indexs]
        self.data = _3D_to_4(
            normalise_fashionmnist(self.data, mean=(0.2860, ), std=(0.3530, )))

    def __getitem__(self, index):
        img, target = self.data[index], self.targets[index]

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target


class FashionMNIST_unlabeled(FashionMNIST_labeled):
    def __init__(self,
                 root,
                 indexs,
                 train=True,
                 transform=None,
                 target_transform=None,
                 download=True):
        super(FashionMNIST_unlabeled,
              self).__init__(root,
                             indexs,
                             train=train,
                             transform=transform,
                             target_transform=target_transform,
                             download=download)
        self.targets = np.array([-1 for i in range(len(self.targets))])


def get_fashionMNIST_data(num_labeled,
                          positive_label_list,
                          root,
                          transform_train=None,
                          transform_val=None):
    base_dataset = torchvision.datasets.FashionMNIST(root,
                                                     train=True,
                                                     download=True)
    train_labeled_idxs, train_unlabeled_idxs, prior = train_val_split(
        base_dataset.targets, num_labeled, positive_label_list)
    target_transform = lambda x: 1 if x in positive_label_list else -1

    train_labeled_dataset = FashionMNIST_labeled(
        root,
        train_labeled_idxs,
        train=True,
        transform=transform_train,
        target_transform=target_transform)
    train_unlabeled_dataset = FashionMNIST_unlabeled(
        root,
        train_unlabeled_idxs,
        train=True,
        transform=transform_train,
        target_transform=target_transform)
    val_dataset = FashionMNIST_labeled(root,
                                       train=True,
                                       transform=transform_train,
                                       target_transform=target_transform)
    test_dataset = FashionMNIST_labeled(root,
                                        train=False,
                                        transform=transform_val,
                                        download=True,
                                        target_transform=target_transform)

    return train_labeled_dataset, train_unlabeled_dataset, val_dataset, test_dataset, prior


class CIFAR10_labeled(torchvision.datasets.CIFAR10):
    def __init__(self,
                 root,
                 indexs=None,
                 train=True,
                 transform=None,
                 target_transform=None,
                 download=False):
        super(CIFAR10_labeled,
              self).__init__(root,
                             train=train,
                             transform=transform,
                             target_transform=target_transform,
                             download=download)
        if indexs is not None:
            self.data = self.data[indexs]
            self.targets = np.array(self.targets)[indexs]
        self.data = transpose(
            normalise(self.data, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)))

    def __getitem__(self, index):
        img, target = self.data[index], self.targets[index]

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target


class CIFAR10_unlabeled(CIFAR10_labeled):
    def __init__(self,
                 root,
                 indexs,
                 train=True,
                 transform=None,
                 target_transform=None,
                 download=False):
        super(CIFAR10_unlabeled,
              self).__init__(root,
                             indexs,
                             train=train,
                             transform=transform,
                             target_transform=target_transform,
                             download=download)
        self.targets = np.array([-1 for i in range(len(self.targets))])


def get_cifar10_data(num_labeled,
                     positive_label_list,
                     root,
                     transform_train=None,
                     transform_val=None):
    base_dataset = torchvision.datasets.CIFAR10(root,
                                                train=True,
                                                download=True)
    train_labeled_idxs, train_unlabeled_idxs, prior = train_val_split(
        base_dataset.targets, num_labeled, positive_label_list)
    target_transform = lambda x: 1 if x in positive_label_list else -1

    train_labeled_dataset = CIFAR10_labeled(root,
                                            train_labeled_idxs,
                                            train=True,
                                            transform=transform_train,
                                            target_transform=target_transform)
    train_unlabeled_dataset = CIFAR10_unlabeled(
        root,
        train_unlabeled_idxs,
        train=True,
        transform=transform_train,
        target_transform=target_transform)
    val_dataset = CIFAR10_labeled(root,
                                  train=True,
                                  transform=transform_train,
                                  target_transform=target_transform)
    test_dataset = CIFAR10_labeled(root,
                                   train=False,
                                   transform=transform_val,
                                   download=True,
                                   target_transform=target_transform)

    return train_labeled_dataset, train_unlabeled_dataset, val_dataset, test_dataset, prior


class STL10_labeled(torchvision.datasets.STL10):
    def __init__(self,
                 root,
                 indexs=None,
                 split='train+unlabeled',
                 transform=None,
                 target_transform=None,
                 download=False):
        super(STL10_labeled, self).__init__(root,
                                            split=split,
                                            transform=transform,
                                            target_transform=target_transform,
                                            download=download)
        if indexs is not None:
            self.data = self.data[indexs]
            self.labels = np.array(self.labels)[indexs]
        self.data = transpose(self.data, source='NCHW', target='NHWC')
        self.data = transpose(
            normalise(self.data, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)))

    def __getitem__(self, index):
        img, target = self.data[index], self.labels[index]

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target


class STL10_unlabeled(STL10_labeled):
    def __init__(self,
                 root,
                 indexs,
                 split='train+unlabeled',
                 transform=None,
                 target_transform=None,
                 download=False):
        super(STL10_unlabeled,
              self).__init__(root,
                             indexs,
                             split=split,
                             transform=transform,
                             target_transform=target_transform,
                             download=download)
        self.labels = np.array([-1 for i in range(len(self.labels))])


def get_stl10_data(num_labeled,
                   positive_label_list,
                   root,
                   transform_train=None,
                   transform_val=None):
    base_dataset = torchvision.datasets.STL10(root,
                                              split='train+unlabeled',
                                              download=True)
    train_labeled_idxs, train_unlabeled_idxs, prior = train_val_split(
        base_dataset.labels, num_labeled, positive_label_list)
    target_transform = lambda x: 1 if x in positive_label_list else -1

    train_labeled_dataset = STL10_labeled(root,
                                          train_labeled_idxs,
                                          split='train+unlabeled',
                                          transform=transform_train,
                                          target_transform=target_transform)
    train_unlabeled_dataset = STL10_unlabeled(
        root,
        train_unlabeled_idxs,
        split='train+unlabeled',
        transform=transform_train,
        target_transform=target_transform)
    val_dataset = STL10_labeled(root,
                                split='train',
                                transform=transform_train,
                                target_transform=target_transform)
    test_dataset = STL10_labeled(root,
                                 split='test',
                                 transform=transform_val,
                                 download=True,
                                 target_transform=target_transform)

    return train_labeled_dataset, train_unlabeled_dataset, val_dataset, test_dataset, prior


def get_loaders(train_labeled_dataset,
                train_unlabeled_dataset,
                val_dataset,
                test_dataset,
                batch_size=512):
    p_loader = DataLoader(dataset=train_labeled_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          drop_last=True)
    x_loader = DataLoader(dataset=train_unlabeled_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          drop_last=True)
    train_loader = DataLoader(dataset=val_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              drop_last=True)
    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=1024,
                            shuffle=False)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=1024,
                             shuffle=False)

    return p_loader, x_loader, train_loader, val_loader, test_loader


def load_image_dataset(dataset_name,
                       num_labeled,
                       batchsize,
                       positive_label_list,
                       root='../data',
                       with_bias=False,
                       resample_model=""):
    print("==================")
    print("loading data...")
    if dataset_name == "cifar10":
        (train_labeled_dataset, train_unlabeled_dataset, val_dataset,
         test_dataset,
         prior) = get_cifar10_data(num_labeled=num_labeled,
                                   positive_label_list=positive_label_list,
                                   root=root)
    elif dataset_name == "fashionmnist":
        (train_labeled_dataset, train_unlabeled_dataset, val_dataset,
         test_dataset, prior) = get_fashionMNIST_data(
             num_labeled=num_labeled,
             positive_label_list=positive_label_list,
             root=root)
    elif dataset_name == "stl10":
        (train_labeled_dataset, train_unlabeled_dataset, val_dataset,
         test_dataset,
         prior) = get_stl10_data(num_labeled=num_labeled,
                                 positive_label_list=positive_label_list,
                                 root=root)
    else:
        raise ValueError("dataset name {} is unknown.".format(dataset_name))

    p_loader, x_loader, train_loader, val_loader, test_loader = get_loaders(
        train_labeled_dataset, train_unlabeled_dataset, val_dataset,
        test_dataset, batchsize)

    dim = train_labeled_dataset.data.size / len(train_labeled_dataset.data)
    print("load data success!")
    print("==================")
    print('    # train data: ', len(x_loader.dataset))
    print('    # labeled train data: ', len(p_loader.dataset))
    print('    # val data: ', len(val_loader.dataset))
    print('    # test data: ', len(test_loader.dataset))
    print('    prior: ', prior)
    print('    dim: ', dim)

    return p_loader, x_loader, train_loader, val_loader, test_loader, train_labeled_dataset, dim, prior
