import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from RUG import RUGClassifier
import Datasets as DS

# Test problems
problems = [DS.banknote, DS.hearts, DS.ILPD, DS.ionosphere,
            DS.liver, DS.diabetes_pima, DS.tictactoe, DS.transfusion,
            DS.wdbc, DS.adult, DS.bank_mkt, DS.magic, DS.mushroom, DS.musk]

numOutCV = 10
numInCV = 3
randomState = 29232516
ruleLengthCost = True
falseNegativeCost = False
useAdaWeights = True
maxRMPcalls = 5 # Default is 30
rhsEps = 1.0e-2
wthreshold = 0.05 # Weight threshold
fname = 'RUG_vs_Others_w_threshold_results.txt'

for problem in problems:

    pname = problem.__name__.upper()
    print(pname)

    df = np.array(problem('datasets/'))
    X = df[:, 0:-1]
    y = df[:, -1]


    # Setting up the parameter grid
    RUG_pgrid = {'max_depth': [1, 2, 3]}

    scores = {'RUG': []}
    numOfRules = {'RUG': []} 
    avgRuleLengths = {'RUG': []}
 
    skf = StratifiedKFold(n_splits=numOutCV, shuffle=True, random_state=randomState)

    foldnum = 0
    for train_index, test_index in skf.split(X, y):
        foldnum += 1
        print('Fold number: ', foldnum)

        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        inner_cv = StratifiedKFold(n_splits=numInCV, shuffle=True, 
                                   random_state=randomState)
        
        # RUG parameter selection with CV
        bestscore = 0
        for md in RUG_pgrid['max_depth']:
            RUGestimator = RUGClassifier(max_depth=md, eps=rhsEps,
                                         threshold=wthreshold,
                                         rule_length_cost=ruleLengthCost,
                                         max_RMP_calls = maxRMPcalls,
                                         random_state=randomState)
            avgscore = 0
            for etrain_index, etest_index in inner_cv.split(X_train, y_train):
                eX_train, eX_test = X_train[etrain_index], X_train[etest_index]
                ey_train, ey_test = y_train[etrain_index], y_train[etest_index]
                RUGestimator.fit(eX_train, ey_train)
                RUG_pred = RUGestimator.predict(eX_test)
                acsc = accuracy_score(ey_test, RUG_pred)
                avgscore += acsc

            avgscore /= inner_cv.n_splits
            if (avgscore > bestscore):
                bestscore = avgscore
                bestmd = md

        # RUG fit
        RUGestimator = RUGClassifier(max_depth=bestmd, eps=rhsEps,
                                     threshold=wthreshold,
                                     rule_length_cost=ruleLengthCost,
                                     max_RMP_calls = maxRMPcalls,
                                     random_state=randomState)
        RUGestimator.fit(X_train, y_train)
        RUG_pred = RUGestimator.predict(X_test)
        scores['RUG'].append(accuracy_score(RUG_pred, y_test))
        numOfRules['RUG'].append(RUGestimator.getNumOfRules())
        avgRuleLengths['RUG'].append(RUGestimator.getAvgRuleLength())


    with open(fname, 'a') as f:
        print('--->', file=f)
        print(pname, file=f)
        print('Accuracy Scores:', file=f)
        print(scores, file=f)
        print('Rule Numbers:', file=f)
        print(numOfRules, file=f)
        print('Average Rule Lengths:', file=f)
        print(avgRuleLengths, file=f)
        print('<---\n', file=f)
        
