import argparse
import os
import errno
import time
import random

import numpy as np

import torch
import torch.nn as nn
import torch.utils.data

from datasets import GetDataset

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Basic Setting
parser = argparse.ArgumentParser(description='Train StoNet Using ASGMCMC')

parser.add_argument('--dataset_name_id', default=0, type=int, help='index of data set')
parser.add_argument('--base_dataset_path', default='./data/', type=str,
                    help='folder path of data sets')
parser.add_argument('--base_path', default='./result/', type=str,
                    help='base path for saving result')
parser.add_argument('--model_path', default='post_stonet/', type=str, help='folder name for saving model')
parser.add_argument('--regression_flag', default=True, type=int,
                    help='true for regression and false for classification')
parser.add_argument('--confidence_interval_flag', default=True, type=int,
                    help='whether to store result to compute confidence interval')

# model
parser.add_argument('--layer', default=2, type=int, help='number of hidden layer')
parser.add_argument('--unit', default=[1000, 100], type=int, nargs='+', help='number of hidden unit in each layer')

# Training Setting
parser.add_argument('--save_interval', default=50, type=int, help='epoch interval for model saving')
parser.add_argument('--nepoch', default=5001, type=int, help='total number of training epochs')
parser.add_argument('--batch_train', default=50, type=int, help='batch size in training')
parser.add_argument('--lr', default=0.001, type=float, help='step size of sgd')


args = parser.parse_args()


class Net(nn.Module):
    def __init__(self, input_dim, output_dim, num_hidden=2, hidden_dim=[64, 64]):
        super(Net, self).__init__()
        self.num_hidden = num_hidden
        self.fc = nn.Linear(input_dim, hidden_dim[0])
        self.fc_list = []

        for i in range(num_hidden - 1):
            self.fc_list.append(nn.Linear(hidden_dim[i], hidden_dim[i + 1]))
            self.add_module('fc' + str(i + 2), self.fc_list[-1])
        self.fc_list.append(nn.Linear(hidden_dim[-1], output_dim))
        self.add_module('fc' + str(num_hidden + 1), self.fc_list[-1])

    def forward(self, x):
        x = nn.Tanh()(self.fc(x))
        for i in range(self.num_hidden - 1):
            x = nn.Tanh()(self.fc_list[i](x))
        x = self.fc_list[-1](x)
        return x




def main():
    import pickle
    save_interval = args.save_interval

    dataset_name_id = args.dataset_name_id

    dataset_names = ['Wine',
                 'CCPP',
                 'Protein',
                 'Year'
                 ]
    base_dataset_path = args.base_dataset_path
    random_state_train_test = np.arange(20)

    # for dataset_name_id in range(11):
    for random_state_train_test_id in range(20):
        dataset_name = dataset_names[dataset_name_id]
        random_state = random_state_train_test[random_state_train_test_id]

        seed = random_state
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(seed)


        num_hidden = args.layer
        hidden_dim = args.unit
        regression_flag = args.regression_flag
        num_epochs = args.nepoch

        base_path = args.base_path
        model_path = args.model_path

        PATH = base_path + dataset_name + '/' + 'data_split_' + str(random_state) + '/' + model_path

        lr = args.lr
        subn = args.batch_train
        confidence_interval_flag = args.confidence_interval_flag

        test_ratio = 0.2
        X, y = GetDataset(dataset_name, base_dataset_path)
        X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                y,
                                                test_size=test_ratio,
                                                random_state=random_state)

        # reshape the data
        X_train = np.asarray(X_train)
        Y_train = np.asarray(Y_train)
        X_test = np.asarray(X_test)
        Y_test = np.asarray(Y_test)

        # input dimensions
        n_train = X_train.shape[0]
        in_shape = X_train.shape[1]

        print("Size: train (%d, %d), test (%d, %d)" % (X_train.shape[0], X_train.shape[1], X_test.shape[0], X_test.shape[1]))

        # set seed for splitting the data into proper train and calibration
        np.random.seed(seed)
        idx = np.random.permutation(n_train)

        # divide the data into proper training set and calibration set
        n_half = int(np.floor(1 * n_train / 2))
        idx_train, idx_cal = idx[:n_half], idx[n_half:2*n_half]

        # zero mean and unit variance scaling of the train and test features
        scalerX = StandardScaler()
        scalerX = scalerX.fit(X_train[idx_train])
        X_train = scalerX.transform(X_train)
        X_test = scalerX.transform(X_test)

        scalerY = StandardScaler()
        scalerY = scalerY.fit(np.expand_dims(Y_train[idx_train], 1))
        Y_train = scalerY.transform(np.expand_dims(Y_train, 1))
        Y_test = scalerY.transform(np.expand_dims(Y_test, 1))

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        x_train = torch.FloatTensor(X_train[idx_train]).to(device)
        x_cal = torch.FloatTensor(X_train[idx_cal]).to(device)
        x_test = torch.FloatTensor(X_test).to(device)

        y_train = torch.FloatTensor(Y_train[idx_train]).to(device)
        y_cal = torch.FloatTensor(Y_train[idx_cal]).to(device)
        y_test = torch.FloatTensor(Y_test).to(device)

        print(x_train.shape, x_cal.shape, x_test.shape, y_train.shape, y_cal.shape, y_test.shape)

        ntrain = x_train.shape[0]
        ntest = x_test.shape[0]
        dim = x_train.shape[1]

        # define loss function
        sse = nn.MSELoss(reduction='sum')
        if regression_flag:
            output_dim = 1
            loss_func = nn.MSELoss()
            loss_func_sum = nn.MSELoss(reduction='sum')
            train_loss_path = np.zeros(num_epochs)
            test_loss_path = np.zeros(num_epochs)
        else:
            output_dim = int((y_test.max() + 1).item())
            loss_func = nn.CrossEntropyLoss()
            loss_func_sum = nn.CrossEntropyLoss(reduction='sum')
            train_loss_path = np.zeros(num_epochs)
            test_loss_path = np.zeros(num_epochs)
            train_accuracy_path = np.zeros(num_epochs)
            test_accuracy_path = np.zeros(num_epochs)
        time_used_path = np.zeros(num_epochs)

        # define model
        np.random.seed(seed)
        torch.manual_seed(seed)
        net = Net(dim, output_dim, num_hidden, hidden_dim)
        net.to(device)

        # path to save result
        if not os.path.isdir(PATH):
            try:
                os.makedirs(PATH)
            except OSError as exc:  # Python >2.5
                if exc.errno == errno.EEXIST and os.path.isdir(PATH):
                    pass
                else:
                    raise

        optimizer = torch.optim.Adam(net.parameters(), lr = lr, weight_decay = 0)

        with torch.no_grad():
            if regression_flag:
                print('epoch: ', -1)
                output = net(x_train)
                train_loss = loss_func(output, y_train)
                print("train loss: ", train_loss)

                output = net(x_test)
                test_loss = loss_func(output, y_test)
                print("test loss: ", test_loss)

            else:
                print('epoch: ', -1)

                output = net(x_train)
                train_loss = loss_func(output, y_train)
                prediction = output.data.max(1)[1]
                train_accuracy = prediction.eq(y_train.data).sum().item() / ntrain
                print("train loss: ", train_loss, 'train accuracy: ', train_accuracy)

                output = net(x_test)
                test_loss = loss_func(output, y_test)
                prediction = output.data.max(1)[1]
                test_accuracy = prediction.eq(y_test.data).sum().item() / ntest
                print("test loss: ", test_loss, 'test accuracy: ', test_accuracy)

        # training
        index = np.arange(ntrain)
        for epoch in range(num_epochs):
            start_time = time.process_time()
            np.random.shuffle(index)
            for iter_index in range(ntrain // subn):
                subsample = index[(iter_index * subn):((iter_index + 1) * subn)]
                output = net(x_train[subsample, ])

                loss = loss_func(output, y_train[subsample, ])

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            # print and save result at the end of each epoch
            with torch.no_grad():
                if regression_flag:
                    print('epoch: ', epoch)
                    output = net(x_train)
                    train_loss = loss_func(output, y_train)
                    train_loss_path[epoch] = train_loss
                    print("train loss: ", train_loss)

                    output = net(x_test)
                    test_loss = loss_func(output, y_test)
                    test_loss_path[epoch] = test_loss
                    print("test loss: ", test_loss)

                else:
                    print('epoch: ', epoch)

                    output = net(x_train)
                    train_loss = loss_func(output, y_train)
                    prediction = output.data.max(1)[1]
                    train_accuracy = prediction.eq(y_train.data).sum().item() / ntrain
                    train_loss_path[epoch] = train_loss
                    train_accuracy_path[epoch] = train_accuracy
                    print("train loss: ", train_loss, 'train accuracy: ', train_accuracy)

                    output = net(x_test)
                    test_loss = loss_func(output, y_test)
                    prediction = output.data.max(1)[1]
                    test_accuracy = prediction.eq(y_test.data).sum().item() / ntest
                    test_loss_path[epoch] = test_loss
                    test_accuracy_path[epoch] = test_accuracy
                    print("test loss: ", test_loss, 'test accuracy: ', test_accuracy)

            if epoch % save_interval == 0:
                torch.save(net.state_dict(), PATH + 'model' + str(epoch) + '.pt')

            end_time = time.process_time()
            time_used_path[epoch] = end_time - start_time

        if regression_flag:
            filename = PATH + 'result.txt'
            f = open(filename, 'wb')
            pickle.dump([train_loss_path, test_loss_path, time_used_path], f)
            f.close()
        else:
            filename = PATH + 'result.txt'
            f = open(filename, 'wb')
            pickle.dump([train_loss_path, test_loss_path, train_accuracy_path, test_accuracy_path, time_used_path], f)
            f.close()
        if confidence_interval_flag:
            filename = PATH + 'data.txt'
            f = open(filename, 'wb')
            pickle.dump(
                [X_train, Y_train, X_test, Y_test, x_train, x_cal, x_test, y_train, y_cal, y_test, scalerX, scalerY], f)
            f.close()


if __name__ == '__main__':
    main()




