import random 
import numpy as np
from scipy.special import expit
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
import copy
from scipy import stats


# Function to compute the confidence interval
def mean_confidence_interval(data, confidence=0.95):
	data = np.array(data)
	mean = np.mean(data)
	sem = stats.sem(data)
	margin_of_error = sem * stats.t.ppf((1 + confidence) / 2., len(data) - 1)
	return mean, margin_of_error

# Extract error bars
def extract_error_bars(data):
	means = data
	errors = ci_acc
	return means, errors

def add_noise(vec,add_noise_TF):
	if add_noise_TF:
		n = len(vec)
		noise = np.random.normal(loc=n**(-1/4), scale=n**(-1/4), size=len(vec))
		vec += noise
	return vec

def add_noise_val(val, n, add_noise_TF):
	if add_noise_TF:
		noise = np.random.normal(loc=n**(-1/4), scale=n**(-1/4), size=1)
		val += noise
	return val

def learn_mu(obs, col_feature, col_label, params = None):
	# XGBoost regression model to regress Y on X and Z
	dtrain = xgb.DMatrix(obs[col_feature], label=obs[col_label])
	if params == None: 
		params = {
			'booster': 'gbtree',
			'eta': 0.1,
			'gamma': 0,
			'max_depth': 10,
			'min_child_weight': 1,
			'subsample': 1.0,
			'colsample_bytree': 1,
			'lambda': 0.0,
			'alpha': 0.0,
			'objective': 'reg:squarederror',
			'eval_metric': 'rmse',
			'n_jobs': 4  # Assuming you have 4 cores
		}
	bst = xgb.train(params, dtrain)
	return bst

def learn_pi(obs, col_feature, col_label, params=None):
	# XGBoost classification model to regress X on Z
	dtrain = xgb.DMatrix(obs[col_feature], label=obs[col_label])
	if params == None:
		params = {
			'booster': 'gbtree',
			'eta': 0.5,
			'gamma': 0,
			'max_depth': 20,
			'min_child_weight': 1,
			'subsample': 0.0,
			'colsample_bytree': 1,
			'objective': 'binary:logistic',  # Change as per your objective
			'eval_metric': 'logloss',  # Change as per your needs
			'reg_lambda': 0.0,
			'reg_alpha': 0.0,
			'nthread': 4
		}

	bst = xgb.train(params, dtrain)
	return bst

def learn_multi_pi(obs, col_feature, col_label, params=None):
	# XGBoost classification model to regress X on Z
	dtrain = xgb.DMatrix(obs[col_feature], label=obs[col_label])
	if params == None:
		params = {
			'booster': 'gbtree',
			'eta': 0.5,
			'gamma': 0,
			'max_depth': 20,
			'min_child_weight': 1,
			'subsample': 0.0,
			'colsample_bytree': 1,
			'objective': 'multi:softprob',  # Change as per your objective
			'num_class': len(np.unique(obs[col_label])),
			'eval_metric': 'softprob',  # Change as per your needs
			'reg_lambda': 0.0,
			'reg_alpha': 0.0,
			'nthread': 4
		}

	bst = xgb.train(params, dtrain)
	return bst

def find_mu_param(obs):
	features = [col for col in obs.columns if col not in ['Y']]
	# fixed_params = {
	# 	'booster': 'gbtree',
	# 	# 'eta': 0.5,
	# 	'gamma': 0,
	# 	# 'max_depth': 10,
	# 	'min_child_weight': 1,
	# 	'subsample': 0.8,
	# 	'colsample_bytree': 1,
	# 	'lambda': 0,
	# 	'alpha': 0,
	# 	'objective': 'reg:squarederror',
	# 	'eval_metric': 'rmse',
	# 	'n_jobs': 4  # Assuming you have 4 cores
	# }
	xgb_model = xgb.XGBRegressor(objective='reg:squarederror', eval_metric='rmse', n_jobs=4, booser = 'gbtree', gamma = 0, min_child_weight=1, subsample = 0.8, alpha=0)

	# Define the parameter grid
	param_grid = {
		'eta': [0.1, 0.3, 0.5, 1],
		'max_depth': [6, 10, 15]
	}

	# Initialize GridSearchCV
	grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, scoring='neg_root_mean_squared_error', cv=2, verbose=1, n_jobs=-1)

	# Fit the model
	grid_search.fit(obs[features], obs['Y'])

	return grid_search.best_params_

def estimate_odds_ratio(data_0, data_1, col_feature, n_sample, params = None):
	# Step 2: Randomly sample n_sample data points from both datasets
	samples_0 = data_0.sample(n=n_sample, random_state=42)
	samples_1 = data_1.sample(n=n_sample, random_state=42)
	
	# Step 3: Create a new dataframe with labels
	samples_0['L'] = 0
	samples_1['L'] = 1
	col_label = ['L']
	total_features = col_feature + col_label
	total_samples = pd.concat([samples_0[total_features], samples_1[total_features]], axis=0)
	
	# Step 4: Construct the XGBoost model
	model = learn_pi(total_samples, col_feature, col_label, params)
	return model
