import numpy as np
from scipy.optimize import minimize
from scipy.io import arff
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

from utils import *
from WeightedGaussNB import WeightedGaussNB
import re
import warnings

def getXy(filename):
    data, meta = arff.loadarff(filename)
    df = pd.DataFrame(data)
    df.rename(columns={df.columns[df.columns.get_loc(df.columns[-1])]: 'class'}, inplace=True)
    X, y = preprocess(df)
    return X ,y

def cal_mse(X, y, w):

    clf = WeightedGaussNB()
    clf.fit(X, y)
    clf.setWeight(w)
    prob=clf.predict_proba(X)

    MSE=0
    for i in range(X.shape[0]):
        for k in range(len(set(y))):
            if k==y[k]:
                MSE+=(1-prob[i])**2
            else:
                MSE+=(prob[i])**2

    return MSE

def cal_cll(X, y, w):

    clf = WeightedGaussNB()
    clf.fit(X, y)
    clf.setWeight(w)
    prob = clf.predict_proba(X)

    prob = np.array(prob)
    prob = np.where(prob == 0, 1e-9, prob)

    y_idx = np.unique(y, return_index=True)[0]

    CLL = 0
    for i in range(X.shape[0]):
        idx=(int)(np.where(y_idx == y[i])[0][0])
        CLL += np.log(prob[i][idx])

    return -CLL

def constraint(x):
    return np.sum(x) - 1



def L_BFGS_B(X,y):
    warnings.filterwarnings('ignore', message="Method L-BFGS-B cannot handle constraints.")
    x0 = np.random.rand(X.shape[1])
    x0 = x0 / x0.sum()
    bounds = [(0+1e-9, 1-1e-9) for _ in range(len(x0))]
    con = {'type': 'eq', 'fun': constraint}
    result = minimize(lambda x:cal_cll(X,y,x), x0, method='L-BFGS-B', bounds=bounds, constraints=con)
    return result.x
