import numpy as np
import pandas as pd
import random
from scipy.stats import norm
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor

from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn import svm
import sklearn.model_selection as sk_ms
from sklearn.model_selection import train_test_split

def get_model_params(name):
    # returns model instance + fine tuning grid for gridsearch
    if name == 'Decision Tree Regression':
        #params = {'max_depth':[1,2,3,4,5,6,7,8,9,10,11,12,13,15,20]}
        params = {'max_depth':[1,2,3,5,8,10,15,20]}
        model = DecisionTreeRegressor()
        
    if name == 'Random Forrest Regression':
        #params = {'max_depth':[1,2,3,4,5,6,7,8,9,10,11,12,13,15,20]}
        params = {'max_depth':[1,2,3,5,8,10,15,20]}
        model = RandomForestRegressor()
        
    if name == 'XGBoost':
        #params = {'max_depth':[1,2,3,4,5,6,7,8,9,10,11,12,13,15,20]}
        params = {'max_depth':[1,2,3,5,8,10,15,20]}
        model = xgb.XGBRegressor(objective='reg:squarederror')

    if name == 'Gradient Boosting Regressor':
        #params = {'max_depth':[1,2,3,4,5,6,7,8,9,10,11,12,13,15,20]}
        params = {'max_depth':[1,2,3,5,8,10,15,20]}
        model = GradientBoostingRegressor()
        
    if name == 'Ridge':
        #params = {'alpha': np.arange(0, 1, 0.1)}
        params = {'alpha': np.arange(0, 1, 0.5)}
        model = Ridge()
        
    if name == 'knn_classifier':
        params = None
        model = KNeighborsClassifier()
        
    if name == 'knn_regression':
        #params = {'n_neighbors':[i+1 for i in range(40)] + [5*i for i in range(9, 30)], 'weights':['uniform', 'distance']}
        params = {'n_neighbors':[1,2,3,5,8,10,15,20], 'weights':['uniform', 'distance']}
        model = KNeighborsRegressor()
        
    if name == 'svm kernel':
        params = {'C': [0.1,1,10,100], 'gamma': [1,0.1,0.01,0.001],'kernel': ['rbf', 'poly', 'sigmoid']}
        model = svm.SVR()
        
    return model, params

def gridsearch(X, Y, name, seed=7):
    # if we are doing regression without regularization, no need to fine tune
    if name == 'Regression': return LinearRegression().fit(X, Y) 
    
    model, params = get_model_params(name)
    trained_tree = sk_ms.GridSearchCV(estimator  = model,
                                      param_grid = params,
                                      scoring    = 'r2',
                                      refit      = True).fit(X, Y)
    return trained_tree 


def cvar_step(X, Y, control_name, alpha_list, loss_name, random_state=1):
    X_cvar1, X_cvar2, Y_cvar1, Y_cvar2 = train_test_split(X, Y, test_size=0.5, random_state=1)
    X_list, Y_list = {0: X_cvar1, 1: X_cvar2}, {0: Y_cvar1, 1: Y_cvar2}
    cvar_list = {0: np.zeros(len(alpha_list)), 1:np.zeros(len(alpha_list))}
    for i in [0,1]:
        cur_X, cur_Y = X_list[i], Y_list[i]
        h = gridsearch(cur_X[control_name], cur_X[loss_name], 'XGBoost')
        h_loss_sorted = np.sort(h.predict(X_list[1-i][control_name]))
        h_len = len(h_loss_sorted)
        for k in range(len(alpha_list)):
            alpha = alpha_list[k]
            cutoff_idx = h_len - int(np.floor((1-alpha) * h_len))
            s = np.sum(h_loss_sorted[cutoff_idx+1:]) + h_loss_sorted[cutoff_idx] * (cutoff_idx - alpha*h_len)
            cvar_list[i][k] = s / (h_len*(1-alpha))
    return (cvar_list[0] + cvar_list[1])/2

def cvar(h_list, alpha_list):
    cvar_list = []
    h_list_sorted = np.sort(h_list)
    for alpha in alpha_list:
        cutoff_idx = int(np.floor(alpha * len(h_list_sorted)))
        cvar_list.append(np.mean(h_list_sorted[cutoff_idx:]))
    return cvar_list

def estimator(X, Y, control_name, loss_name, random_state=1):
    X_S1, X_S2, Y_S1, Y_S2 = train_test_split(X, Y, test_size=0.5, random_state=1)
    h = gridsearch(X_S1[control_name], X_S1[loss_name], 'XGBoost')
    
    return h