import numpy as np
import pandas as pd
import sklearn as sk
from sklearn.model_selection import cross_val_score
from method import *

def classifier_evaluation_with_model(classifier, X, Y):

    # Convert categorical string features to numerical using one-hot encoding
    X = pd.get_dummies(X)

    # baseline evaluation
    model = classifier
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10, scoring='accuracy')
    print(f"Baseline score: {scores.mean()} +- {scores.std()}")
    print("-"*20)


    # sobol total evaluation
    X_dropped_lowest = X.copy()
    for iter in range(len(X.columns)-1):
        total_indices = sobol_total_with_classifier(X_dropped_lowest, Y, model)
        for feature, index in zip(X_dropped_lowest.columns, total_indices):
            print(f"Feature: {feature}, Sobol' Total Index: {index:.4f}")
        min_index = np.argmin(total_indices)
        min_feature = X_dropped_lowest.columns[min_index]
        X_dropped_lowest = X_dropped_lowest.drop(min_feature, axis=1)
        model = classifier
        model.fit(X_dropped_lowest, Y)
        scores = cross_val_score(model, X_dropped_lowest, Y, cv=10, scoring='accuracy')
        print(f"Feature {min_feature} dropped, new score: {scores.mean()} +- {scores.std()}")
        print("-"*10)
    
    print("Sobol total evaluation done")
    print("-"*20)

    # shapley evaluation
    X_dropped_lowest_shap = X.copy()  
    for iter in range(len(X.columns)-1):
        shap_values = shap_with_classifier(X_dropped_lowest_shap, Y, model)
        # shap_values = calculate_shapley_values(X_dropped_lowest_shap, Y)
        for feature, value in zip(X_dropped_lowest_shap.columns, shap_values):
            print(f"Feature: {feature}, SHAP Value: {value}")
        min_shap_value = shap_values.min()
        min_shap_index = np.argmin(shap_values)
        min_shap_feature = X_dropped_lowest_shap.columns[min_shap_index]
        X_dropped_lowest_shap = X_dropped_lowest_shap.drop(min_shap_feature, axis=1)
        model = classifier
        model.fit(X_dropped_lowest_shap, Y)
        scores = cross_val_score(model, X_dropped_lowest_shap, Y, cv=10, scoring='accuracy')
        print(f"Feature {min_shap_feature} dropped, new score: {scores.mean()} +- {scores.std()}")
        print("-"*10)

    print("Shapley evaluation done")


def regressor_evaluation_with_model(regressor, X, Y):

    # baseline evaluation
    model = regressor
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10, scoring='r2')
    print(f"Baseline score: {scores.mean()} +- {scores.std()}")
    print("-"*20)


    # sobol total evaluation
    X_dropped_lowest = X.copy()
    for iter in range(len(X.columns)-1):
        total_indices = sobol_total_with_regressor(X_dropped_lowest, Y, model)
        for feature, index in zip(X_dropped_lowest.columns, total_indices):
            print(f"Feature: {feature}, Sobol' Total Index: {index:.4f}")
        min_index = np.argmin(total_indices)
        min_feature = X_dropped_lowest.columns[min_index]
        X_dropped_lowest = X_dropped_lowest.drop(min_feature, axis=1)
        model = regressor
        model.fit(X_dropped_lowest, Y)
        scores = cross_val_score(model, X_dropped_lowest, Y, cv=10, scoring='r2')
        print(f"Feature {min_feature} dropped, new score: {scores.mean()} +- {scores.std()}")
        print("-"*10)
    
    print("Sobol total evaluation done")
    print("-"*20)

    # shapley evaluation
    X_dropped_lowest_shap = X.copy()  
    for iter in range(len(X.columns)-1):
        shap_values = shap_with_regressor(X_dropped_lowest_shap, Y, model)
        # shap_values = calculate_shapley_values(X_dropped_lowest_shap, Y)
        for feature, value in zip(X_dropped_lowest_shap.columns, shap_values):
            print(f"Feature: {feature}, SHAP Value: {value}")
        min_shap_value = shap_values.min()
        min_shap_index = np.argmin(shap_values)
        min_shap_feature = X_dropped_lowest_shap.columns[min_shap_index]
        X_dropped_lowest_shap = X_dropped_lowest_shap.drop(min_shap_feature, axis=1)
        model = regressor
        model.fit(X_dropped_lowest_shap, Y)
        scores = cross_val_score(model, X_dropped_lowest_shap, Y, cv=10, scoring='r2')
        print(f"Feature {min_shap_feature} dropped, new score: {scores.mean()} +- {scores.std()}")
        print("-"*10)

    print("Shapley evaluation done")


def classifier_evaluation(classifier, X, Y):

    # baseline evaluation
    model = classifier
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10)
    print(f"Baseline score: {scores.mean()} +- {scores.std()}")
    print("-"*20)


    # sobol total evaluation
    X_dropped_lowest = X.copy()
    for iter in range(len(X.columns)-1):
        total_indices = sobol_total_indices(X_dropped_lowest, Y)
        for feature, index in zip(X_dropped_lowest.columns, total_indices):
            print(f"Feature: {feature}, Sobol' Total Index: {index:.4f}")
        min_index = np.argmin(total_indices)
        min_feature = X_dropped_lowest.columns[min_index]
        X_dropped_lowest = X_dropped_lowest.drop(min_feature, axis=1)
        model = classifier
        model.fit(X_dropped_lowest, Y)
        scores = cross_val_score(model, X_dropped_lowest, Y, cv=10)
        print(f"Feature {min_feature} dropped, new score: {scores.mean()} +- {scores.std()}")
        print("-"*10)
    
    print("Sobol total evaluation done")
    print("-"*20)

    # shapley evaluation
    X_dropped_lowest_shap = X.copy()  
    for iter in range(len(X.columns)-1):
        shap_values = calculate_shapley_values(X_dropped_lowest_shap, Y)
        # shap_values = calculate_shapley_values(X_dropped_lowest_shap, Y)
        for feature, value in zip(X_dropped_lowest_shap.columns, shap_values):
            print(f"Feature: {feature}, SHAP Value: {value}")
        min_shap_value = shap_values.min()
        min_shap_index = np.argmin(shap_values)
        min_shap_feature = X_dropped_lowest_shap.columns[min_shap_index]
        X_dropped_lowest_shap = X_dropped_lowest_shap.drop(min_shap_feature, axis=1)
        model = classifier
        model.fit(X_dropped_lowest_shap, Y)
        scores = cross_val_score(model, X_dropped_lowest_shap, Y, cv=10)
        print(f"Feature {min_feature} dropped, new score: {scores.mean()} +- {scores.std()}")
        print("-"*10)

    print("Shapley evaluation done")

def regressor_evaluation(regressor, X, Y):

    # baseline evaluation
    model = regressor
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10, scoring='r2')
    print(f"Baseline R2 score: {scores.mean()} +- {scores.std()}")
    print("-"*20)


    # sobol total evaluation
    X_dropped_lowest = X.copy()
    for iter in range(len(X.columns)-1):
        total_indices = sobol_total_indices(X_dropped_lowest, Y)
        for feature, index in zip(X_dropped_lowest.columns, total_indices):
            print(f"Feature: {feature}, Sobol' Total Index: {index:.4f}")
        min_index = np.argmin(total_indices)
        min_feature = X_dropped_lowest.columns[min_index]
        X_dropped_lowest = X_dropped_lowest.drop(min_feature, axis=1)
        model = regressor
        model.fit(X_dropped_lowest, Y)
        scores = cross_val_score(model, X_dropped_lowest, Y, cv=10, scoring='r2')
        print(f"Feature {min_feature} dropped, new R2 score: {scores.mean()} +- {scores.std()}")
        print("-"*10)
    
    print("Sobol total evaluation done")
    print("-"*20)

    # shapley evaluation
    X_dropped_lowest_shap = X.copy()  
    for iter in range(len(X.columns)-1):
        shap_values = calculate_shapley_values(X_dropped_lowest_shap, Y)
        for feature, value in zip(X_dropped_lowest_shap.columns, shap_values):
            print(f"Feature: {feature}, SHAP Value: {value}")
        min_shap_value = shap_values.min()
        min_shap_index = np.argmin(shap_values)
        min_shap_feature = X_dropped_lowest_shap.columns[min_shap_index]
        X_dropped_lowest_shap = X_dropped_lowest_shap.drop(min_shap_feature, axis=1)
        model = regressor
        model.fit(X_dropped_lowest_shap, Y)
        scores = cross_val_score(model, X_dropped_lowest_shap, Y, cv=10, scoring='r2')
        print(f"Feature {min_feature} dropped, new R2 score: {scores.mean()} +- {scores.std()}")
        print("-"*10)

    print("Shapley evaluation done")

def classifier_eval_old(X_input, Y):

    print("Sobol Total Indices")

    X = X_input.copy()

    total_indices = sobol_total_indices(X, Y)
    sorted_indices = np.argsort(total_indices)
    sorted_features = X.columns[sorted_indices]
    
    RandomForest = []
    RF_std = []
    DecisionTree = []
    DT_std = []
    LogisticRegression = []
    LR_std = []

    # baseline evaluation
    model = sk.ensemble.RandomForestClassifier()
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10)
    RandomForest.append(scores.mean())
    RF_std.append(scores.std())

    model = sk.tree.DecisionTreeClassifier()
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10)
    DecisionTree.append(scores.mean())
    DT_std.append(scores.std())

    model = sk.linear_model.LogisticRegression()
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10)
    LogisticRegression.append(scores.mean())
    LR_std.append(scores.std())

    for feature in sorted_features:

        if len(X.columns) == 1:
            break
        
        X = X.drop(feature, axis=1)

        model = sk.ensemble.RandomForestClassifier()
        model.fit(X, Y)
        scores = cross_val_score(model, X, Y, cv=10)
        RandomForest.append(scores.mean())
        RF_std.append(scores.std())

        model = sk.tree.DecisionTreeClassifier()
        model.fit(X, Y)
        scores = cross_val_score(model, X, Y, cv=10)
        DecisionTree.append(scores.mean())
        DT_std.append(scores.std())

        model = sk.linear_model.LogisticRegression()
        model.fit(X, Y)
        scores = cross_val_score(model, X, Y, cv=10)
        LogisticRegression.append(scores.mean())
        LR_std.append(scores.std())

    print("Random Forest Classifier")
    print(RandomForest)
    print(RF_std)
    print("-"*20)

    print("Decision Tree Classifier")
    print(DecisionTree)
    print(DT_std)
    print("-"*20)

    print("Logistic Regression Classifier")
    print(LogisticRegression)
    print(LR_std)
    print("-"*20)

    print("#"*30)

    print("Shapley Values")

    X = X_input.copy()

    shapley = calculate_shapley_values(X, Y)
    sorted_indices = np.argsort(shapley)
    sorted_features = X.columns[sorted_indices]

    RandomForest = []
    RF_std = []
    DecisionTree = []
    DT_std = []
    LogisticRegression = []
    LR_std = []

    # baseline evaluation
    model = sk.ensemble.RandomForestClassifier()
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10)
    RandomForest.append(scores.mean())
    RF_std.append(scores.std())

    model = sk.tree.DecisionTreeClassifier()
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10)
    DecisionTree.append(scores.mean())
    DT_std.append(scores.std())

    model = sk.linear_model.LogisticRegression()
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10)
    LogisticRegression.append(scores.mean())
    LR_std.append(scores.std())

    for feature in sorted_features:

        if len(X.columns) == 1:
            break

        X = X.drop(feature, axis=1)

        model = sk.ensemble.RandomForestClassifier()
        model.fit(X, Y)
        scores = cross_val_score(model, X, Y, cv=10)
        RandomForest.append(scores.mean())
        RF_std.append(scores.std())

        model = sk.tree.DecisionTreeClassifier()
        model.fit(X, Y)
        scores = cross_val_score(model, X, Y, cv=10)
        DecisionTree.append(scores.mean())
        DT_std.append(scores.std())

        model = sk.linear_model.LogisticRegression()
        model.fit(X, Y)
        scores = cross_val_score(model, X, Y, cv=10)
        LogisticRegression.append(scores.mean())
        LR_std.append(scores.std())

    print("Random Forest Classifier")
    print(RandomForest)
    print(RF_std)
    print("-"*20)

    print("Decision Tree Classifier")
    print(DecisionTree)
    print(DT_std)
    print("-"*20)

    print("Logistic Regression Classifier")
    print(LogisticRegression)
    print(LR_std)


def classifier_eval(X_input, Y):

    print("Sobol Total Indices")

    X = X_input.copy()

    # Convert categorical string features to numerical using one-hot encoding
    X = pd.get_dummies(X)

    X_train, X_test, Y_train, Y_test = sk.model_selection.train_test_split(X, Y, test_size=0.2, random_state=42)

    total_indices = sobol_total_indices(X_train, Y_train)
    sorted_indices = np.argsort(total_indices)
    sorted_features = X.columns[sorted_indices]
    
    RandomForest = []
    RF_std = []
    DecisionTree = []
    DT_std = []
    LogisticRegression = []
    LR_std = []

    # baseline evaluation

    RF_scores = []
    DT_scores = []
    LR_scores = []

    for i in range(10):

        scores = []
        model = sk.ensemble.RandomForestClassifier()
        model.fit(X_train, Y_train)
        score = model.score(X_test, Y_test)
        scores.append(score)
        RF_scores.append(np.mean(scores))

        scores = []
        model = sk.tree.DecisionTreeClassifier()
        model.fit(X_train, Y_train)
        score = model.score(X_test, Y_test)
        scores.append(score)
        DT_scores.append(np.mean(scores))

        scores = []
        model = sk.linear_model.LogisticRegression()
        model.fit(X_train, Y_train)
        score = model.score(X_test, Y_test)
        scores.append(score)
        LR_scores.append(np.mean(scores))

    RandomForest.append(np.mean(RF_scores))
    RF_std.append(np.std(RF_scores))
    DecisionTree.append(np.mean(DT_scores))
    DT_std.append(np.std(DT_scores))
    LogisticRegression.append(np.mean(LR_scores))
    LR_std.append(np.std(LR_scores))

    for feature in sorted_features:

        if len(X_train.columns) == 1:
            break
        
        X_train = X_train.drop(feature, axis=1)
        X_test = X_test.drop(feature, axis=1)

        RF_scores = []
        DT_scores = []
        LR_scores = []

        for i in range(10):

            scores = []
            model = sk.ensemble.RandomForestClassifier()
            model.fit(X_train, Y_train)
            score = model.score(X_test, Y_test)
            scores.append(score)
            RF_scores.append(np.mean(scores))

            scores = []
            model = sk.tree.DecisionTreeClassifier()
            model.fit(X_train, Y_train)
            score = model.score(X_test, Y_test)
            scores.append(score)
            DT_scores.append(np.mean(scores))

            scores = []
            model = sk.linear_model.LogisticRegression()
            model.fit(X_train, Y_train)
            score = model.score(X_test, Y_test)
            scores.append(score)
            LR_scores.append(np.mean(scores))
        
        RandomForest.append(np.mean(RF_scores))
        RF_std.append(np.std(RF_scores))
        DecisionTree.append(np.mean(DT_scores))
        DT_std.append(np.std(DT_scores))
        LogisticRegression.append(np.mean(LR_scores))
        LR_std.append(np.std(LR_scores))

    print("Random Forest Classifier")
    print(RandomForest)
    print(RF_std)
    print("-"*20)

    print("Decision Tree Classifier")
    print(DecisionTree)
    print(DT_std)
    print("-"*20)

    print("Logistic Regression Classifier")
    print(LogisticRegression)
    print(LR_std)
    print("-"*20)

    print("#"*30)

    print("Shapley Values")

    X = X_input.copy()

    X_train, X_test, Y_train, Y_test = sk.model_selection.train_test_split(X, Y, test_size=0.2, random_state=42)

    shapley = calculate_shapley_values(X_train, Y_train)
    sorted_indices = np.argsort(shapley)
    sorted_features = X.columns[sorted_indices]

    RandomForest = []
    RF_std = []
    DecisionTree = []
    DT_std = []
    LogisticRegression = []
    LR_std = []

    # baseline evaluation

    RF_scores = []
    DT_scores = []
    LR_scores = []

    for i in range(10):

        scores = []
        model = sk.ensemble.RandomForestClassifier()
        model.fit(X_train, Y_train)
        score = model.score(X_test, Y_test)
        scores.append(score)
        RF_scores.append(np.mean(scores))

        scores = []
        model = sk.tree.DecisionTreeClassifier()
        model.fit(X_train, Y_train)
        score = model.score(X_test, Y_test)
        scores.append(score)
        DT_scores.append(np.mean(scores))

        scores = []
        model = sk.linear_model.LogisticRegression()
        model.fit(X_train, Y_train)
        score = model.score(X_test, Y_test)
        scores.append(score)
        LR_scores.append(np.mean(scores))
    
    RandomForest.append(np.mean(RF_scores))
    RF_std.append(np.std(RF_scores))
    DecisionTree.append(np.mean(DT_scores))
    DT_std.append(np.std(DT_scores))
    LogisticRegression.append(np.mean(LR_scores))
    LR_std.append(np.std(LR_scores))

    for feature in sorted_features:

        if len(X_train.columns) == 1:
            break

        X_train = X_train.drop(feature, axis=1)
        X_test = X_test.drop(feature, axis=1)

        RF_scores = []
        DT_scores = []
        LR_scores = []

        for i in range(10):
                
            scores = []
            model = sk.ensemble.RandomForestClassifier()
            model.fit(X_train, Y_train)
            score = model.score(X_test, Y_test)
            scores.append(score)
            RF_scores.append(np.mean(scores))

            scores = []
            model = sk.tree.DecisionTreeClassifier()
            model.fit(X_train, Y_train)
            score = model.score(X_test, Y_test)
            scores.append(score)
            DT_scores.append(np.mean(scores))

            scores = []
            model = sk.linear_model.LogisticRegression()
            model.fit(X_train, Y_train)
            score = model.score(X_test, Y_test)
            scores.append(score)
            LR_scores.append(np.mean(scores))

        RandomForest.append(np.mean(RF_scores))
        RF_std.append(np.std(RF_scores))
        DecisionTree.append(np.mean(DT_scores))
        DT_std.append(np.std(DT_scores))
        LogisticRegression.append(np.mean(LR_scores))
        LR_std.append(np.std(LR_scores))

    print("Random Forest Classifier")
    print(RandomForest)
    print(RF_std)
    print("-"*20)

    print("Decision Tree Classifier")
    print(DecisionTree)
    print(DT_std)
    print("-"*20)

    print("Logistic Regression Classifier")
    print(LogisticRegression)
    print(LR_std)


def regressor_eval_old(X_input, Y):

    print("Sobol Total Indices")

    X = X_input.copy()

    total_indices = sobol_total_indices(X, Y)
    sorted_indices = np.argsort(total_indices)
    sorted_features = X.columns[sorted_indices]
    
    RandomForest = []
    RF_std = []
    DecisionTree = []
    DT_std = []
    LinearRegression = []
    LR_std = []

    # baseline evaluation
    model = sk.ensemble.RandomForestRegressor()
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10, scoring='r2')
    RandomForest.append(scores.mean())
    RF_std.append(scores.std())

    model = sk.tree.DecisionTreeRegressor()
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10, scoring='r2')
    DecisionTree.append(scores.mean())
    DT_std.append(scores.std())

    model = sk.linear_model.LinearRegression()
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10, scoring='r2')
    LinearRegression.append(scores.mean())
    LR_std.append(scores.std())

    for feature in sorted_features:

        if len(X.columns) == 1:
            break
        
        X = X.drop(feature, axis=1)

        model = sk.ensemble.RandomForestRegressor()
        model.fit(X, Y)
        scores = cross_val_score(model, X, Y, cv=10, scoring='r2')
        RandomForest.append(scores.mean())
        RF_std.append(scores.std())

        model = sk.tree.DecisionTreeRegressor()
        model.fit(X, Y)
        scores = cross_val_score(model, X, Y, cv=10, scoring='r2')
        DecisionTree.append(scores.mean())
        DT_std.append(scores.std())

        model = sk.linear_model.LinearRegression()
        model.fit(X, Y)
        scores = cross_val_score(model, X, Y, cv=10, scoring='r2')
        LinearRegression.append(scores.mean())
        LR_std.append(scores.std())

    print("Random Forest Regressor")
    print(RandomForest)
    print(RF_std)
    print("-"*20)

    print("Decision Tree Regressor")
    print(DecisionTree)
    print(DT_std)
    print("-"*20)

    print("Linear Regression Regressor")
    print(LinearRegression)
    print(LR_std)
    print("-"*20)

    print("#"*30)

    

    X = X_input.copy()

    shapley = calculate_shapley_values(X, Y)
    sorted_indices = np.argsort(shapley)
    sorted_features = X.columns[sorted_indices]

    RandomForest = []
    RF_std = []
    DecisionTree = []
    DT_std = []
    LinearRegression = []
    LR_std = []

    # baseline evaluation
    model = sk.ensemble.RandomForestRegressor()
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10, scoring='r2')
    RandomForest.append(scores.mean())
    RF_std.append(scores.std())
    
    model = sk.tree.DecisionTreeRegressor()
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10, scoring='r2')
    DecisionTree.append(scores.mean())
    DT_std.append(scores.std())

    model = sk.linear_model.LinearRegression()
    model.fit(X, Y)
    scores = cross_val_score(model, X, Y, cv=10, scoring='r2')
    LinearRegression.append(scores.mean())
    LR_std.append(scores.std())

    for feature in sorted_features:

        if len(X.columns) == 1:
            break

        X = X.drop(feature, axis=1)

        model = sk.ensemble.RandomForestRegressor()
        model.fit(X, Y)
        scores = cross_val_score(model, X, Y, cv=10, scoring='r2')
        RandomForest.append(scores.mean())
        RF_std.append(scores.std())

        model = sk.tree.DecisionTreeRegressor()
        model.fit(X, Y)
        scores = cross_val_score(model, X, Y, cv=10, scoring='r2')
        DecisionTree.append(scores.mean())
        DT_std.append(scores.std())

        model = sk.linear_model.LinearRegression()
        model.fit(X, Y)
        scores = cross_val_score(model, X, Y, cv=10, scoring='r2')
        LinearRegression.append(scores.mean())
        LR_std.append(scores.std())

    print("Random Forest Regressor")
    print(RandomForest)
    print(RF_std)
    print("-"*20)

    print("Decision Tree Regressor")
    print(DecisionTree)
    print(DT_std)
    print("-"*20)

    print("Linear Regression Regressor")
    print(LinearRegression)
    print(LR_std)

def regressor_eval(X_input, Y):

    print("Sobol Total Indices")

    X = X_input.copy()

    X_train, X_test, Y_train, Y_test = sk.model_selection.train_test_split(X, Y, test_size=0.2, random_state=42)

    total_indices = sobol_total_indices(X_train, Y_train)
    sorted_indices = np.argsort(total_indices)
    sorted_features = X.columns[sorted_indices]
    
    RandomForest = []
    RF_std = []
    DecisionTree = []
    DT_std = []
    LinearRegression = []
    LR_std = []

    # baseline evaluation

    RF_scores = []
    DT_scores = []
    LR_scores = []

    for i in range(10):
        scores = []
        model = sk.ensemble.RandomForestRegressor()
        model.fit(X_train, Y_train)
        score = model.score(X_test, Y_test)
        scores.append(score)
        RF_scores.append(np.mean(scores))

        scores = []
        model = sk.tree.DecisionTreeRegressor()
        model.fit(X_train, Y_train)
        score = model.score(X_test, Y_test)
        scores.append(score)
        DT_scores.append(np.mean(scores))

        scores = []
        model = sk.linear_model.LinearRegression()
        model.fit(X_train, Y_train)
        score = model.score(X_test, Y_test)
        scores.append(score)
        LR_scores.append(np.mean(scores))

    for feature in sorted_features:

        if len(X_train.columns) == 1:
            break
        
        X_train = X_train.drop(feature, axis=1)
        X_test = X_test.drop(feature, axis=1)

        RF_scores = []
        DT_scores = []
        LR_scores = []

        for i in range(10):

            scores = []
            model = sk.ensemble.RandomForestRegressor()
            model.fit(X_train, Y_train)
            score = model.score(X_test, Y_test)
            scores.append(score)
            RF_scores.append(np.mean(scores))

            scores = []
            model = sk.tree.DecisionTreeRegressor()
            model.fit(X_train, Y_train)
            score = model.score(X_test, Y_test)
            scores.append(score)
            DT_scores.append(np.mean(scores))

            scores = []
            model = sk.linear_model.LinearRegression()
            model.fit(X_train, Y_train)
            score = model.score(X_test, Y_test)
            scores.append(score)
            LR_scores.append(np.mean(scores))

        RandomForest.append(np.mean(RF_scores))
        RF_std.append(np.std(RF_scores))
        DecisionTree.append(np.mean(DT_scores))
        DT_std.append(np.std(DT_scores))
        LinearRegression.append(np.mean(LR_scores))
        LR_std.append(np.std(LR_scores))

    print("Random Forest Regressor")
    print(RandomForest)
    print(RF_std)
    print("-"*20)

    print("Decision Tree Regressor")
    print(DecisionTree)
    print(DT_std)
    print("-"*20)

    print("Linear Regression Regressor")
    print(LinearRegression)
    print(LR_std)
    print("-"*20)

    print("#"*30)

    

    X = X_input.copy()

    X_train, X_test, Y_train, Y_test = sk.model_selection.train_test_split(X, Y, test_size=0.2, random_state=42)

    shapley = calculate_shapley_values(X_train, Y_train)
    sorted_indices = np.argsort(shapley)
    sorted_features = X.columns[sorted_indices]

    RandomForest = []
    RF_std = []
    DecisionTree = []
    DT_std = []
    LinearRegression = []
    LR_std = []

    # baseline evaluation

    RF_scores = []
    DT_scores = []
    LR_scores = []

    for i in range(10):

        scores = []
        model = sk.ensemble.RandomForestRegressor()
        model.fit(X_train, Y_train)
        score = model.score(X_test, Y_test)
        scores.append(score)
        RF_scores.append(np.mean(scores))

        scores = []
        model = sk.tree.DecisionTreeRegressor()
        model.fit(X_train, Y_train)
        score = model.score(X_test, Y_test)
        scores.append(score)
        DT_scores.append(np.mean(scores))

        scores = []
        model = sk.linear_model.LinearRegression()
        model.fit(X_train, Y_train)
        score = model.score(X_test, Y_test)
        scores.append(score)
        LR_scores.append(np.mean(scores))

    RandomForest.append(np.mean(RF_scores))
    RF_std.append(np.std(RF_scores))
    DecisionTree.append(np.mean(DT_scores))
    DT_std.append(np.std(DT_scores))
    LinearRegression.append(np.mean(LR_scores))
    LR_std.append(np.std(LR_scores))

    for feature in sorted_features:

        if len(X_train.columns) == 1:
            break

        X_train = X_train.drop(feature, axis=1)
        X_test = X_test.drop(feature, axis=1)

        RF_scores = []
        DT_scores = []
        LR_scores = []

        for i in range(10):
                    
            scores = []
            model = sk.ensemble.RandomForestRegressor()
            model.fit(X_train, Y_train)
            score = model.score(X_test, Y_test)
            scores.append(score)
            RF_scores.append(np.mean(scores))

            scores = []
            model = sk.tree.DecisionTreeRegressor()
            model.fit(X_train, Y_train)
            score = model.score(X_test, Y_test)
            scores.append(score)
            DT_scores.append(np.mean(scores))

            scores = []
            model = sk.linear_model.LinearRegression()
            model.fit(X_train, Y_train)
            score = model.score(X_test, Y_test)
            scores.append(score)
            LR_scores.append(np.mean(scores))

        RandomForest.append(np.mean(RF_scores))
        RF_std.append(np.std(RF_scores))
        DecisionTree.append(np.mean(DT_scores))
        DT_std.append(np.std(DT_scores))
        LinearRegression.append(np.mean(LR_scores))
        LR_std.append(np.std(LR_scores))

    print("Random Forest Regressor")
    print(RandomForest)
    print(RF_std)
    print("-"*20)

    print("Decision Tree Regressor")
    print(DecisionTree)
    print(DT_std)
    print("-"*20)

    print("Linear Regression Regressor")
    print(LinearRegression)
    print(LR_std)