import os
import sys
import numpy as np
import random
import pandas as pd
import math, time
import itertools
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
import datetime
import torch
from torch import nn, optim
from torch.autograd import Variable
import torch.utils.data as data_utils
from torch.utils.data.dataset import Dataset
from sklearn.preprocessing import StandardScaler
import gurobipy as gp
import logging
import copy
from collections import defaultdict
import joblib
import gurobipy as gp
from gurobipy import GRB
import time, datetime

import sys
import ip_model_both_tricks as ip_model_wholeFile
from ip_model_both_tricks import IPOfunc
import linear_relax as LP_relax_file


nurse_num = LP_relax_file.nurse_num
total_day_num = LP_relax_file.day_num
day_num = LP_relax_file.day_num
shift_num = LP_relax_file.shift_num
day_shift_num = LP_relax_file.day_shift_num
day_work_shift_num = LP_relax_file.day_work_shift_num
decision_num = LP_relax_file.decision_num
t_decision_num = LP_relax_file.t_decision_num
penaltyTerm = LP_relax_file.penaltyTerm
extra_serve_patient_num = LP_relax_file.extra_serve_patient_num
minimum_relax_day = LP_relax_file.minimum_relax_day
maximum_relax_day = LP_relax_file.maximum_relax_day

extra_payment = int(sys.argv[1])
LP_relax_file.set_extra_payment(extra_payment)
startmark = int(sys.argv[2])
endmark = int(sys.argv[3])
timeLim = float(sys.argv[4])  # 0: no time limit; >0: timeLim
cutLim = int(sys.argv[5]) # 0: no cut limit; 1: cut limit
ip_model_wholeFile.set_timeLim_and_cutLim(timeLim, cutLim)


train_case_num = 70
test_case_num = 30 
warm_start_val = 25000

featureNum = 8
warm_start_epoch_criterion = 12
#warm_start_val = 3000
stop_epoch_criterion = 20
dataset_path = os.path.abspath(os.path.dirname(os.getcwd()))
LP_relax_file.mkdir(dataset_path, 'day_num=' + str(total_day_num) + '/penalty=' + str(penaltyTerm) + ', extra_serve=' + str(extra_serve_patient_num) + ', extra_payment=' + str(extra_payment))
default_path = os.path.join(dataset_path, 'day_num=' + str(total_day_num) + '/penalty=' + str(penaltyTerm) + ', extra_serve=' + str(extra_serve_patient_num) + ', extra_payment=' + str(extra_payment) + '/')
LP_relax_file.mkdir(default_path, 'T1S3_T2S1_LCGT')


def weight_init(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_normal_(m.weight)
        nn.init.constant_(m.bias, 0)

    elif isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)

def make_fc(num_layers, num_features, num_targets=1,
            activation_fn = nn.ReLU,intermediate_size=2*featureNum, regularizers = True):
    net_layers = [nn.Linear(num_features, intermediate_size), activation_fn()]
    for hidden in range(num_layers-2):
        net_layers.append(nn.Linear(intermediate_size, intermediate_size))
        net_layers.append(activation_fn())
    net_layers.append(nn.Linear(intermediate_size, num_targets))
    net_layers.append(activation_fn())
    return nn.Sequential(*net_layers)
        

class MyCustomDataset():
    def __init__(self, feature, value):
        self.feature = feature
        self.value = value

    def __len__(self):
        return len(self.value)

    def __getitem__(self, idx):
        return self.feature[idx], self.value[idx]
        

class Intopt:
    def __init__(self, c, G, A, b, h2, h3, h4, h5, penalty, serve_patient_num, n_features, num_layers=5, smoothing=False, thr=0.1, max_iter=None, method=1, mu0=None,
                 damping=1e-7, target_size=1, epochs=8, optimizer=optim.Adam,
                 batch_size=day_work_shift_num, **hyperparams):
        self.c = c
        self.G = G
        self.A = A
        self.b = b
        self.h2 = h2
        self.h3 = h3
        self.h4 = h4
        self.h5 = h5
        self.penalty = penalty
        self.serve_patient_num = serve_patient_num
        self.target_size = target_size
        self.n_features = n_features
        self.damping = damping
        self.num_layers = num_layers

        self.smoothing = smoothing
        self.thr = thr
        self.max_iter = max_iter
        self.method = method
        self.mu0 = mu0

        self.optimizer = optimizer
        self.batch_size = batch_size
        self.hyperparams = hyperparams
        self.epochs = epochs
        # print("embedding size {} n_features {}".format(embedding_size, n_features))

#        self.model = Net(n_features=n_features, target_size=target_size)
        self.model = make_fc(num_layers=self.num_layers,num_features=n_features)
        #self.model.apply(weight_init)
#        w1 = self.model[0].weight
#        print(w1)

        self.optimizer = optimizer(self.model.parameters(), **hyperparams)

    def fit(self, feature, value):
        logging.info("Intopt")
        train_df = MyCustomDataset(feature, value)

        criterion = nn.MSELoss(reduction='mean')  # nn.MSELoss(reduction='mean')
        grad_list = np.zeros(self.epochs)
        TOV_train = LP_relax_file.actual_obj(self.c, self.A, self.b, self.G, value, self.h2, self.h3, self.h4, self.h5, train_case_num)
        for e in range(self.epochs):
            cur_loss = 0
            cur_loss_IP = 0
            total_loss = 0
            epoch_start_time = time.time()
            #          for parameters in self.model.parameters():
            #            print(parameters)
            if e < warm_start_epoch_criterion:
                #print('stage 1')
                train_dl = data_utils.DataLoader(train_df, batch_size=self.batch_size, shuffle=False)
                for feature, value in train_dl:
                    self.optimizer.zero_grad()
                    op = self.model(feature).squeeze()
                    while torch.min(op) <= 0 or torch.isnan(op).any() or torch.isinf(op).any():
                        self.optimizer.zero_grad()
                #                    self.model.__init__(self.n_features, self.target_size)
                        self.model = make_fc(num_layers=self.num_layers,num_features=self.n_features)
                        op = self.model(feature).squeeze()
                    #print(op)

                    loss = criterion(op, value)
                    total_loss += loss.item()
                    grad_list[e] = total_loss
                    loss.backward()
                    self.optimizer.step()
#                print("Epoch{} ::loss {} ->".format(e,total_loss))
                if e < warm_start_epoch_criterion - 1:
                    print("{} ->".format(total_loss), end=" ")
                else:
                    print("{} ->".format(total_loss))
                    
                global stop_epoch
                stop_epoch = e
                if grad_list[warm_start_epoch_criterion-1] <= warm_start_val and e == warm_start_epoch_criterion - 1:
                    train_dl = data_utils.DataLoader(train_df, batch_size=self.batch_size, shuffle=False)
                    instance_num = 0
                    batchCnt = 0
                #            train_loss = np.zeros(1)
                    for feature, value in train_dl:
                        self.optimizer.zero_grad()
                        op = self.model(feature).squeeze()
                        while torch.min(op) <= 0 or torch.isnan(op).any() or torch.isinf(op).any():
                            self.optimizer.zero_grad()
                #                    self.model.__init__(self.n_features, self.target_size)
                            self.model = make_fc(num_layers=self.num_layers,num_features=self.n_features)
                            op = self.model(feature).squeeze()

                        penaltyVector = np.zeros(decision_num)
                        for i in range(decision_num):
                            penaltyVector[i] = self.penalty[i+instance_num*decision_num]

                        c_torch = torch.from_numpy(self.c).float()
                        G_torch = torch.from_numpy(self.G).float()
                        A_torch = torch.from_numpy(self.A).float()
                        b_torch = torch.from_numpy(self.b).float()
                        h2_torch = torch.from_numpy(self.h2).float()
                        h3_torch = torch.from_numpy(self.h3).float()
                        h4_torch = torch.from_numpy(self.h4).float()
                        h5_torch = torch.from_numpy(self.h5).float()
                        penalty_torch = torch.from_numpy(penaltyVector).float()
                #                print(h2_torch.shape,h3_torch.shape,op.shape)

                #                    x_sol = IPOfunc(A=A_torch, b=b_torch, G=G_torch, h2=h2_torch, h3=h3_torch, h4=h4_torch, h5=h5_torch, c=c_torch, real_patient=value, penalty=penalty_torch, serve_patient_num=self.serve_patient_num, max_iter=self.max_iter, thr=self.thr, damping=self.damping,
                #                                smoothing=self.smoothing)(op)
                #                    incur_penalty = ip_model_wholeFile.incur_penalty
                #                    loss = (x_sol * c_torch).sum() + incur_penalty
                #                    if batchCnt % 30 == 0:
                #                        print(loss)
                #                    cur_loss += loss.item()

                        loss_IP = LP_relax_file.correction_single_obj(self.c, self.A, self.b, self.G, value, op, self.h2, self.h3, self.h4, self.h5, penaltyVector)
                        cur_loss_IP = cur_loss_IP + loss_IP
                #                    if batchCnt % 30 == 0:
                #                        print(loss_IP)

                        batchCnt = batchCnt + 1
                #                cur_loss = cur_loss / train_case_num
                #                print("cur_loss_LP: ", cur_loss)
                    cur_loss_IP = cur_loss_IP / train_case_num
                    print("cur_loss_IP: ", cur_loss_IP)
                    grad_list[e] = cur_loss_IP


            else:
                lr = 1e-5
                if e == warm_start_epoch_criterion:
                  print(lr)
                # print(lr)
                for param_group in self.optimizer.param_groups:
                   param_group['lr'] = lr
                #            print('stage 2')
                train_dl = data_utils.DataLoader(train_df, batch_size=self.batch_size, shuffle=False)
                instance_num = 0
                batchCnt = 0
                #            train_loss = np.zeros(1)

                for feature, value in train_dl:
                    self.optimizer.zero_grad()
                    op = self.model(feature).squeeze()
                    while torch.min(op) <= 0 or torch.isnan(op).any() or torch.isinf(op).any():
                        self.optimizer.zero_grad()
                #                    self.model.__init__(self.n_features, self.target_size)
                        self.model = make_fc(num_layers=self.num_layers,num_features=self.n_features)
                        op = self.model(feature).squeeze()

                    penaltyVector = np.zeros(decision_num)
                    for i in range(decision_num):
                        penaltyVector[i] = self.penalty[i+instance_num*decision_num]

                    c_torch = torch.from_numpy(self.c).float()
                    G_torch = torch.from_numpy(self.G).float()
                    A_torch = torch.from_numpy(self.A).float()
                    b_torch = torch.from_numpy(self.b).float()
                    h2_torch = torch.from_numpy(self.h2).float()
                    h3_torch = torch.from_numpy(self.h3).float()
                    h4_torch = torch.from_numpy(self.h4).float()
                    h5_torch = torch.from_numpy(self.h5).float()
                    penalty_torch = torch.from_numpy(penaltyVector).float()
                #                print(h2_torch.shape,h3_torch.shape,op.shape)
                    train_instance_start = time.time()
                    x_sol = IPOfunc(A=A_torch, b=b_torch, G=G_torch, h2=h2_torch, h3=h3_torch, h4=h4_torch, h5=h5_torch, c=c_torch, real_patient=value, penalty=penalty_torch, serve_patient_num=self.serve_patient_num, max_iter=self.max_iter, thr=self.thr, damping=self.damping,
                                smoothing=self.smoothing)(op)
                    train_instance_end = time.time()
                    incur_penalty = ip_model_wholeFile.incur_penalty
                    loss = (x_sol * c_torch).sum() + incur_penalty

                    if batchCnt % 30 == 0:
                      # print(loss)
                        print("LP_loss: ", loss, end=" ")
                #                train_loss[instance_num] = loss.detach().numpy()
                    loss_IP = LP_relax_file.correction_single_obj(self.c, self.A, self.b, self.G, value, op, self.h2, self.h3, self.h4, self.h5, penaltyVector)
                    cur_loss_IP = cur_loss_IP + loss_IP
                    
                    has_add_cuts = os.path.exists("cutting.mps")
                    if has_add_cuts:
                        os.remove("cutting.mps")
                    
                    if batchCnt % 30 == 0:
                      print("IP_loss: ", loss_IP)
#                    print(batchCnt, "IP_loss: ", loss_IP, "time: ", train_instance_end-train_instance_start)


                    batchCnt += 1
                    total_loss += loss.item()
                    loss = loss / decision_num
                    # if batchCnt % 3 == 0:
                    loss.backward()
                    self.optimizer.step()

                total_loss = total_loss / train_case_num
                logging.info("EPOCH Ends")
                #print("Epoch{}".format(e))
                #          print(train_loss)
                print("Epoch{} ::loss {} ->".format(e,total_loss), end=" ")
                cur_loss_IP = cur_loss_IP / train_case_num
                print("cur_loss_IP: ", cur_loss_IP, end=" ")
                stop_epoch = e
                grad_list[e] = cur_loss_IP

            epoch_end_time = time.time()
            if e >= warm_start_epoch_criterion:
              print("run this epoch: ", epoch_end_time - epoch_start_time)
            global train_loss
            train_loss = total_loss
            if e > 0 and grad_list[e] == grad_list[e-1]:
                break
            if e > warm_start_epoch_criterion - 1 and grad_list[e] >= grad_list[e-1]:
                break
            if grad_list[warm_start_epoch_criterion-2] > warm_start_val:
                break
            global bestTrainCorrReg
            if e >= warm_start_epoch_criterion and grad_list[e] < grad_list[e-1]:
                bestTrainCorrReg = grad_list[e]
                torch.save(clf.model.state_dict(), 'T1S3_T2S1_LCGT_ep' + str(extra_payment) + '_model.pkl')
            # print(self.val_loss(valid_econ, valid_prop))
            # print("______________")

    def val_loss(self, feature, value):
        valueTemp = value.numpy()
#        c_list = self.c.tolist()
#        G_list = self.G.tolist()
        test_instance = len(valueTemp) / self.batch_size
#        test_instance = 1
        real_obj = LP_relax_file.actual_obj(self.c, self.A, self.b, self.G, value, self.h2, self.h3, self.h4, self.h5, n_instance=int(test_instance))
#        print(real_obj)

        self.model.eval()
        criterion = nn.MSELoss(reduction='mean')  # nn.MSELoss(reduction='sum')
        valid_df = MyCustomDataset(feature, value)
        valid_dl = data_utils.DataLoader(valid_df, batch_size=self.batch_size, shuffle=False)
        prediction_loss = 0
        corr_obj_list = []
        num = 0
        n_instance=int(test_instance)
        pred_val = np.zeros(day_work_shift_num*n_instance)

        for feature, value in valid_dl:
            op = self.model(feature).squeeze()
#            print(op)
            loss = criterion(op, value)
            prediction_loss += loss.item()

            real_patient = {}
            pre_patient = {}
            for i in range(day_work_shift_num):
                real_patient[i] = value[i]
                pre_patient[i] = op[i]
                pred_val[num*day_work_shift_num+i] = op[i]

            penaltyVector = np.zeros(decision_num)
            for i in range(decision_num):
                penaltyVector[i] = self.penalty[i+num*decision_num]

            corrrlst = LP_relax_file.correction_single_obj(self.c, self.A, self.b, self.G, real_patient, pre_patient, self.h2, self.h3, self.h4, self.h5, penaltyVector)
            corr_obj_list.append(corrrlst)
            num = num + 1

        self.model.train()
#        print("corr_obj_list: ", corr_obj_list)
#        print("2SReg: ", real_obj - np.array(corr_obj_list))
#        return prediction_loss, abs(np.array(obj_list) - real_obj)
        return abs(real_obj - np.array(corr_obj_list)), pred_val


stopCriterion = 200

print("**** Combination ****")
testTime = 10
recordBest = np.zeros((1, testTime))


print("nurse_num: ", nurse_num, "day_num: ", day_num, "penalty_for_reschedule: ", penaltyTerm, "extra_serve_patient_num: ", extra_serve_patient_num, "extra_payment: ", extra_payment, "stop_epoch_criterion: ",stop_epoch_criterion)


for testmark in range(startmark, endmark):
    print(testmark)
    stop_epoch = 0
    cost = np.loadtxt(os.path.join(dataset_path, 'day_num=' + str(total_day_num) + '/payment/payment(' + str(testmark) + ').txt'))
    serve_patient_num = np.loadtxt(os.path.join(dataset_path, 'day_num=' + str(total_day_num) + '/serve_patient_num/serve_patient_num(' + str(testmark) + ').txt'))
    
    c = np.zeros(decision_num)
    for i in range(nurse_num):
        for j in range(day_shift_num):
            if j % shift_num != 3:
                c[i*day_shift_num+j] = cost[i]
            elif j % shift_num == 3:
                c[i*day_shift_num+j] = 0
    for i in range(day_shift_num):
        c[nurse_num * day_num * shift_num + i] = extra_payment

#    serve_patient_num = np.loadtxt(os.path.join(dataset_path, 'data/serve_patient_num/serve_patient_num(' + str(testmark) + ').txt'))
    A,b,G,h2,h3,h4,h5 = LP_relax_file.gen_matrix(nurse_num,day_num,shift_num,serve_patient_num,decision_num,day_shift_num)

    trainData = np.loadtxt(os.path.join(dataset_path, 'day_num=' + str(total_day_num) + '/train/train(' + str(testmark) + ').txt'))
    penalty_train = np.full(decision_num*train_case_num, penaltyTerm)
    #    penalty_train = np.loadtxt('./data/penalty' + str(penaltyTerm) + '/train_penalty' + str(penaltyTerm) + '/train_penalty(' + str(testmark) + ').txt')
    #    trainData = np.loadtxt('train.txt')
    x_train = trainData[:, 1:featureNum+1]
    y_train = trainData[:, featureNum+1]
    feature_train = torch.from_numpy(x_train).float()
    value_train = torch.from_numpy(y_train).float()

    testData = np.loadtxt(os.path.join(dataset_path, 'day_num=' + str(total_day_num) + '/test/test(' + str(testmark) + ').txt'))
    penalty_test = np.full(decision_num*test_case_num, penaltyTerm)
    #    penalty_test = np.loadtxt('./data/penalty' + str(penaltyTerm) + '/test_penalty' + str(penaltyTerm) + '/test_penalty(' + str(testmark) + ').txt')
    #    testData = np.loadtxt('test.txt')
    x_test = testData[:, 1:featureNum+1]
    y_test = testData[:, featureNum+1]
    feature_test = torch.from_numpy(x_test).float()
    value_test = torch.from_numpy(y_test).float()

    damping = 1e-7
    thr = 1e-7
    lr = 1e-4
    #lr = 1e-2
    bestTrainCorrReg = float("inf")
    max_retrain_time = 10
    # for j in range(1):
    start_time = time.time()
    while stop_epoch < warm_start_epoch_criterion - 1 and max_retrain_time > 0:
        max_retrain_time = max_retrain_time - 1
        clf = Intopt(c, G, A, b, h2, h3, h4, h5, penalty_train, serve_patient_num, damping=damping, lr=lr, n_features=featureNum, thr=thr, epochs=stop_epoch_criterion)
        clf.fit(feature_train, value_train)
        if stop_epoch >= warm_start_epoch_criterion or max_retrain_time <= 0:
            end_time = time.time()


    clfBest = Intopt(c, G, A, b, h2, h3, h4, h5, penalty_test, serve_patient_num, damping=damping, lr=lr, n_features=featureNum, thr=thr, epochs=10)
    clfBest.model.load_state_dict(torch.load('T1S3_T2S1_LCGT_ep' + str(extra_payment) + '_model.pkl'))
    #
    value = clfBest.model(feature_test).squeeze()
    value = value.detach().numpy()
    predValue = np.zeros((value.size, 3))

    val_rslt, pred_val = clfBest.val_loss(feature_test, value_test)

    for i in range(value.size):
        predValue[i][0] = int(i/day_work_shift_num)
        predValue[i][1] = value_test[i]
        predValue[i][2] = pred_val[i]
    np.savetxt(os.path.join(default_path, 'T1S3_T2S1_LCGT/T1S3_T2S1_LCGT_test(' + str(testmark) + ').txt'), predValue, fmt="%.2f")


    #HSD_rslt = str(testmark) + ' test: ' + str(np.sum(val_rslt[0])) + ' ' + str(np.sum(val_rslt[1]))
    HSD_rslt = str(testmark) + ' avgPReg: ' + str(np.mean(val_rslt)) + ' runtime: ' + str(end_time-start_time)
    print(HSD_rslt)
    recordBest[0][testmark] = np.sum(val_rslt)

print(recordBest)
