#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 11 12:52:34 2020

@author: sibirbil
"""
import numpy as np
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from rulediscovery import RUXClassifier, RUGClassifier
import Datasets as DS

randomState = 25016
maxDepth = 3
rhsEps = 0.01
solver = 'gurobi' # or 'glpk'
    
problems = [DS.covtype]

print('Random State, Maximum Depth: %d, %d\n' % (randomState, maxDepth))

for problem in problems: 
    pname = problem.__name__.upper()
    print(pname)
    
    df = np.array(problem('datasets/'))
    rowsets = [50000, 100000, 150000, 200000, 250000, 300000]
    
    for rowlen in rowsets:
        leny = 0
        while (leny != 7):
            rows = np.random.choice(rowlen, df.shape[0])
            X = df[rows, 0:-1]
            y = df[rows, -1]
            leny = len(np.unique(y))
            print(leny)
        
        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, random_state=randomState, test_size=0.3)

        print('\n\n#### RESULTS #### \n')
        print('Number of samples: ', rowlen)
        
        RF = RandomForestClassifier(max_depth=maxDepth, random_state=randomState)
        RF_fit = RF.fit(X_train, y_train)
        RF_pred = RF_fit.predict(X_test)
            
        print('Accuracy of RF: ', accuracy_score(RF_pred, y_test)) 

        RUXRF = RUXClassifier(rf=RF_fit, eps=rhsEps,
                            rule_length_cost=True,
                            false_negative_cost=False, 
                            solver=solver,
                            random_state=randomState)
        RUXRF_fit = RUXRF.fit(X_train, y_train)
        RUXRF_pred = RUXRF.predict(X_test)

        print('Total number of RF rules: ', RUXRF.getInitNumOfRules())
        print('Total number of rules in RUX(RF): ', RUXRF.getNumOfRules())
        print('Accuracy of RUX(RF): ', accuracy_score(RUXRF_pred, y_test))
        print('Training time for RUX(RF)', RUXRF.getFitTime())
        print('Prediction time for RUX(RF)', RUXRF.getPredictTime())     

        ADA = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=maxDepth),
                                algorithm='SAMME',
                                random_state=randomState)
        ADA_fit = ADA.fit(X_train, y_train)
        ADA_pred = ADA_fit.predict(X_test)

        print('Accuracy of ADA: ', accuracy_score(ADA_pred, y_test))
        
        RUXADA = RUXClassifier(ada=ADA_fit, eps=rhsEps, 
                                use_ada_weights=True,
                                solver=solver,                            
                                random_state=randomState)
        RUXADA_fit = RUXADA.fit(X_train, y_train)
        RUXADA_pred = RUXADA.predict(X_test)

        print('Accuracy of RUX(ADA): ', accuracy_score(RUXADA_pred, y_test))
        print('Total number of ADA rules: ', RUXADA.getInitNumOfRules())
        print('Total number of rules in RUX(ADA): ', RUXADA.getNumOfRules())
        print('Training time for RUX(ADA)', RUXADA.getFitTime())
        print('Prediction time for RUX(ADA)', RUXADA.getPredictTime())  

        RUG = RUGClassifier(eps=rhsEps,
                            max_depth=maxDepth,
                            rule_length_cost=True,
                            false_negative_cost=False,
                            solver=solver,
                            random_state=randomState)
        RUG_fit = RUG.fit(X_train, y_train)
        RUG_pred = RUG.predict(X_test)

        print('Accuracy of RUG: ', accuracy_score(RUG_pred, y_test))
        print('Total number of rules in RUG: ', RUG.getNumOfRules())
        print('Training time for RUG', RUG.getFitTime())
        print('Prediction time for RUG', RUG.getPredictTime())  

        print()
