import numpy as np
import os
import sys
import random
import torch
import torchvision
import torchvision.transforms as transforms
from utils.dataset_utils import check, separate_data, split_data, save_file


random.seed(1)
np.random.seed(1)
# num_clients = 20
num_classes = 62
dir_path = "emnist/"


# Allocate data to users
def generate_mnist(dir_path, num_clients, num_classes, niid, balance, partition, alpha):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        
    # Setup directory for train/test data
    config_path = dir_path + f"{num_clients}/{balance}_{niid}_{alpha}/config.json"
    train_path = dir_path + f"{num_clients}/{balance}_{niid}_{alpha}/train/"
    test_path = dir_path + f"{num_clients}/{balance}_{niid}_{alpha}/test/"

    if check(config_path, train_path, test_path, num_clients, num_classes, alpha, niid, balance, partition):
        return

    # FIX HTTP Error 403: Forbidden
    from six.moves import urllib
    opener = urllib.request.build_opener()
    opener.addheaders = [('User-agent', 'Mozilla/5.0')]
    urllib.request.install_opener(opener)

    # Get MNIST data
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])

    trainset = torchvision.datasets.EMNIST(
        root=dir_path+"rawdata", train=True, split="byclass", download=True, transform=transform)
    testset = torchvision.datasets.EMNIST(
        root=dir_path+"rawdata", train=False, split="byclass", download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(
        trainset, batch_size=len(trainset.data), shuffle=False)
    testloader = torch.utils.data.DataLoader(
        testset, batch_size=len(testset.data), shuffle=False)

    for _, train_data in enumerate(trainloader, 0):
        trainset.data, trainset.targets = train_data
    for _, test_data in enumerate(testloader, 0):
        testset.data, testset.targets = test_data

    dataset_image = []
    dataset_label = []

    dataset_image.extend(trainset.data.cpu().detach().numpy())
    dataset_image.extend(testset.data.cpu().detach().numpy())
    dataset_label.extend(trainset.targets.cpu().detach().numpy())
    dataset_label.extend(testset.targets.cpu().detach().numpy())
    dataset_image = np.array(dataset_image)
    dataset_label = np.array(dataset_label)

    # dataset = []
    # for i in range(num_classes):
    #     idx = dataset_label == i
    #     dataset.append(dataset_image[idx])

    X, y, statistic = separate_data((dataset_image, dataset_label), num_clients, num_classes, alpha, niid, balance, partition)
    
    train_data, test_data = split_data(X, y)
    
    save_file(config_path, train_path, test_path, train_data, test_data, num_clients, num_classes, statistic, alpha, niid, balance, partition)


if __name__ == "__main__":
    niid = True if sys.argv[1] == "noniid" else False
    balance = True if sys.argv[2] == "balance" else False
    partition = sys.argv[3] if sys.argv[3] != "-" else None
    num_clients = int(sys.argv[4]) if sys.argv[4] else 20
    alpha = float(sys.argv[5]) if sys.argv[5] else 0.1

    generate_mnist(dir_path, num_clients, num_classes, niid, balance, partition, alpha)