import os
os.system("pip install --upgrade scikit-learn")

import sys

PACKAGE_PARENT = '..'
sys.path.append(PACKAGE_PARENT)

import torch
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from gaminet import GAMINetRegressor, GAMINetClassifier
from gaminet.utils import local_visualize
from gaminet.utils import global_visualize_density
from gaminet.utils import feature_importance_visualize
from gaminet.utils import plot_trajectory
from gaminet.utils import plot_regularization
import time
import sklearn
import argparse
from sklearn.datasets import load_diabetes
import pandas as pd
import h5py

import argparse

def main():

	parser = argparse.ArgumentParser(description="Train a model")
	parser.add_argument('--data_name', default = "None", type=str, help='type of data')
	args = parser.parse_args()
	
	path = '../../Dataset/Real-Data-Application/'


	###################################################
	#                  Model Setting                  #
	###################################################

	model = GAMINetRegressor(interact_num=10,
								 subnet_size_main_effect=(20, ) ,
								 subnet_size_interaction=(20, 20),
								 max_epochs=(2000, 2000, 2000),
								 learning_rates=(0.001, 0.001, 0.0001),
								 early_stop_thres=("auto", "auto", "auto"),
								 batch_size=1000,
								 reg_clarity=1,
								 loss_threshold=0.01,
								 warm_start=True,
								 verbose=False,
								 random_state=0)

	###################################################
	#                Experiment Setting               #
	###################################################
	if args.data_name == 'Image':
		with h5py.File(path+'EstimatedResponses.mat', 'r') as f:
			YTrainS1 = f['dataTrnS1'][:]
			YValS1 = f['dataValS1'][:]
			roiS1 = f['roiS1'][:]
		
		y_trn = torch.tensor(YTrainS1[:, np.where(roiS1 == 1)[1]])
		y_val = torch.tensor(YValS1[:, np.where(roiS1 == 1)[1]])
		
		X_trn = np.load(path+'complex_cell_train.npy')
		X_trn = torch.tensor(X_trn).to(torch.float32)
		X_test = np.load(path+'complex_cell_valid.npy')
		X_test = torch.tensor(X_test).to(torch.float32)
		
		
		sub_sample = 300; sub_feature = 1800; v1_idx = 810
		
		device = torch.device("cpu:0")
		X_train = X_trn[:sub_sample, :sub_feature].to(device)
		y_train = y_trn[:sub_sample, v1_idx].to(torch.float32).view(-1, 1).to(device)
		
		
		## Standardization
		X_test = X_test[:sub_sample, :sub_feature].to(device)
		y_test = y_val[:sub_sample, v1_idx].to(torch.float32).view(-1, 1).to(device)
	
	
		X_train = np.asarray(X_train)
		y_train = np.asarray(y_train).reshape(-1)

		model.fit(X_train, y_train)
		pred_test = model.predict(np.asarray(X_test))
		
		print(f"Case: {args.data_name} | RMSE: {np.sqrt(np.mean(np.square(pred_test - np.array(y_test)))):.4f}")
		
	elif args.data_name == 'Diabete':

		diabetes = load_diabetes()
	
		# Accessing features and target
		X = diabetes.data
		sparse_array = np.random.uniform(low=-1, high=1, size=(X.shape[0], 40))
		X = np.concatenate([X, sparse_array], axis = 1)
		X = torch.tensor(X).to(torch.float32)
		
		y = diabetes.target
		y = torch.tensor(y).to(torch.float32)
		
		X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
		X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)
		
		X_train = np.asarray(X_train)
		y_train = np.asarray(y_train).reshape(-1)

		model.fit(X_train, y_train)
		pred_test = model.predict(np.asarray(X_test))
		
		print(f"Case: {args.data_name} | RMSE: {np.sqrt(np.mean(np.square(pred_test - np.array(y_test)))):.4f}")
		
	elif args.data_name == 'Chip':
	
		Chip_data = torch.load(path + 'Chip_dict.pt', weights_only = True)
		X_train, y_train, X_test, y_test = Chip_data['Xtrain'], Chip_data['ytrain'], Chip_data['Xtest'], Chip_data['ytest']
		X_train, y_train, X_test, y_test = X_train.numpy(), y_train.numpy().reshape(-1), X_test.numpy(), y_test.numpy().reshape(-1)
		
		print("Down sampling on training data ... ")
		subsize = 150
		X_train, y_train = X_train[:subsize, :], y_train[:subsize]
		
		sparse_array_train = np.random.uniform(low=-1, high=1, size=(X_train.shape[0], 21))
		X_train = np.concatenate([X_train, sparse_array_train], axis = 1)
		X_train = torch.tensor(X_train).to(torch.float32)
		y_train = torch.tensor(y_train).to(torch.float32)
		
		X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.333, random_state=42)
		
		sparse_array_test = np.random.uniform(low=-1, high=1, size=(X_test.shape[0], 21))
		X_test = np.concatenate([X_test, sparse_array_test], axis = 1)

		X_train = np.asarray(X_train)
		y_train = np.asarray(y_train).reshape(-1)

		model.fit(X_train, y_train)
		pred_test = model.predict(np.asarray(X_test))
		
		print(f"Case: {args.data_name} | RMSE: {np.sqrt(np.mean(np.square(pred_test - np.array(y_test)))):.4f}")
		

	else:
		##################################################
		#              Wine/ Bike/ CA/ FICO              #
		##################################################
		cases = ['wine', 'bike', 'ca', 'fico'] 
		for case in cases:
				
			X_train = pd.read_csv(path+case+'_Train.csv')
			y_train = X_train['y']
			X_val = pd.read_csv(path+case+'_Valid.csv')
			y_val = X_val['y']
			X_test = pd.read_csv(path+case+'_Test.csv')
			y_test = X_test['y']
			X_train.drop(['y'], axis=1, inplace = True)
			X_val.drop(['y'], axis=1, inplace = True)
			X_test.drop(['y'], axis=1, inplace = True)
		
			X_train = np.asarray(X_train)
			y_train = np.asarray(y_train).reshape(-1)
			
			model.fit(X_train, y_train)
			pred_test = model.predict(np.asarray(X_test))
	
			print(f"Case: {case} | RMSE: {np.sqrt(np.mean(np.square(pred_test - np.array(y_test)))):.4f}")

if __name__ == "__main__":
    main()


