import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from RUX import RUXClassifier
from RUG import RUGClassifier
import Datasets as DS

# Test problems
problems = [DS.banknote, DS.hearts, DS.ILPD, DS.ionosphere,
            DS.liver, DS.diabetes_pima, DS.tictactoe, DS.transfusion,
            DS.wdbc, DS.adult, DS.bank_mkt, DS.magic, DS.mushroom, DS.musk,
            DS.oilspill, DS.phoneme, DS.mammography, DS.seeds,
            DS.wine, DS.glass, DS.ecoli, DS.sensorless]

numOutCV = 10
numInCV = 3
randomState = 29232516
ruleLengthCost = True
falseNegativeCost = False
useAdaWeights = True
rhsEps = 1.0e-2
fname = 'RUXG_results.txt'

for problem in problems:

    pname = problem.__name__.upper()
    print(pname)

    df = np.array(problem('datasets/'))
    X = df[:, 0:-1]
    y = df[:, -1]

    # Initializing Classifiers
    RFestimator = RandomForestClassifier(random_state=randomState)
    ADAestimator = AdaBoostClassifier(random_state=randomState, 
                                      algorithm='SAMME')
    DTestimator = DecisionTreeClassifier(random_state=randomState)

    # Setting up the parameter grids
    RF_pgrid = {'max_depth': [1, 2, 3],
                'n_estimators': [100, 200]}

    ADA_pgrid = {'base_estimator': [DecisionTreeClassifier(max_depth=1),
                                    DecisionTreeClassifier(max_depth=2),
                                    DecisionTreeClassifier(max_depth=3)],
                 'n_estimators': [100, 200]}
    
    DT_pgrid = {'max_depth': [1, 2, 3]}
    
    RUG_pgrid = {'max_depth': [1, 2, 3]}

    scores = {'RF': [], 'ADA': [], 'RUX(RF)': [], 'RUX(ADA)': [], 'DT': [], 'RUG': []}
    numOfRules = {'RF': [], 'ADA': [], 'RUX(RF)': [], 'RUX(ADA)': [], 'DT': [], 'RUG': []} 
    avgRuleLengths = {'RUX(RF)': [], 'RUX(ADA)': [], 'DT': [], 'RUG': []}
 
    skf = StratifiedKFold(n_splits=numOutCV, shuffle=True, random_state=randomState)

    foldnum = 0
    for train_index, test_index in skf.split(X, y):
        foldnum += 1
        print('Fold number: ', foldnum)

        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        inner_cv = StratifiedKFold(n_splits=numInCV, shuffle=True, 
                                   random_state=randomState)
        
        # RUG parameter selection with CV
        bestscore = 0
        for md in RUG_pgrid['max_depth']:
            RUGestimator = RUGClassifier(max_depth=md, eps=rhsEps,
                                         rule_length_cost=ruleLengthCost,
                                         random_state=randomState)
            avgscore = 0
            for etrain_index, etest_index in inner_cv.split(X_train, y_train):
                eX_train, eX_test = X_train[etrain_index], X_train[etest_index]
                ey_train, ey_test = y_train[etrain_index], y_train[etest_index]
                RUGestimator.fit(eX_train, ey_train)
                RUG_pred = RUGestimator.predict(eX_test)
                acsc = accuracy_score(ey_test, RUG_pred)
                avgscore += acsc

            avgscore /= inner_cv.n_splits
            if (avgscore > bestscore):
                bestscore = avgscore
                bestmd = md

        # RUG fit
        RUGestimator = RUGClassifier(max_depth=bestmd, eps=rhsEps,
                                     rule_length_cost=ruleLengthCost,
                                     random_state=randomState)
        RUGestimator.fit(X_train, y_train)
        RUG_pred = RUGestimator.predict(X_test)
        scores['RUG'].append(accuracy_score(RUG_pred, y_test))
        numOfRules['RUG'].append(RUGestimator.getNumOfRules())
        avgRuleLengths['RUG'].append(RUGestimator.getAvgRuleLength())

        # Others
        for pgrid, est, name in zip((RF_pgrid, ADA_pgrid, DT_pgrid),
                                    (RFestimator, ADAestimator, DTestimator),
                                    ('RF', 'ADA', 'DT')):
            gcv = GridSearchCV(estimator=est,
                                param_grid=pgrid,
                                scoring='accuracy',
                                n_jobs=1,
                                cv=inner_cv,
                                verbose=0,
                                refit=True)
            gcv_fit = gcv.fit(X_train, y_train)

            # Evaluate with the best estimator
            gcv_pred = gcv_fit.best_estimator_.predict(X_test)
            scores[name].append(accuracy_score(gcv_pred, y_test))
 
            if (est == DTestimator):
                numOfRules['DT'].append(gcv_fit.best_estimator_.get_n_leaves())
                avgRuleLengths['DT'].append(gcv_fit.best_estimator_.get_depth())
                
            if (est == RFestimator):
                RUXRF = RUXClassifier(rf=gcv_fit.best_estimator_,
                                      eps=rhsEps,
                                      rule_length_cost=ruleLengthCost,
                                      false_negative_cost=falseNegativeCost,                                      
                                      random_state=randomState)
                RUXRF_fit = RUXRF.fit(X_train, y_train)
                RUXRF_pred = RUXRF.predict(X_test)
                scores['RUX(RF)'].append(accuracy_score(RUXRF_pred, y_test))
                numOfRules['RUX(RF)'].append(RUXRF.getNumOfRules())
                numOfRules['RF'].append(RUXRF.getInitNumOfRules())                
                avgRuleLengths['RUX(RF)'].append(RUXRF.getAvgRuleLength())
                
            if (est == ADAestimator):
                RUXADA = RUXClassifier(ada=gcv_fit.best_estimator_,
                                       eps=rhsEps,
                                       use_ada_weights=useAdaWeights,
                                       random_state=randomState)
                RUXADA_fit = RUXADA.fit(X_train, y_train)
                RUXADA_pred = RUXADA.predict(X_test)
                scores['RUX(ADA)'].append(accuracy_score(RUXADA_pred, y_test))
                numOfRules['RUX(ADA)'].append(RUXADA.getNumOfRules())
                numOfRules['ADA'].append(RUXADA.getInitNumOfRules())
                avgRuleLengths['RUX(ADA)'].append(RUXADA.getAvgRuleLength())

    with open(fname, 'a') as f:
        print('--->', file=f)
        print(pname, file=f)
        print('Accuracy Scores:', file=f)
        print(scores, file=f)
        print('Rule Numbers:', file=f)
        print(numOfRules, file=f)
        print('Average Rule Lengths:', file=f)
        print(avgRuleLengths, file=f)
        print('<---\n', file=f)