import numpy as np
from sklearn import preprocessing
from sklearn.datasets import load_svmlight_file
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
import pandas as pd
from sklearn.metrics import accuracy_score

from svsvr import *
from gridsearch_svsvl import *

dataset_name = "gisette_scale" #"splice_scale" #"liver-disorders_scale"
train_path = f'./datasets//{dataset_name}'
test_path = f'./datasets//{dataset_name}.t'

## Loading training data
X_train, y_train = load_svmlight_file(train_path)
X_train = np.asarray(X_train.todense())
y_train = np.asarray(y_train)

## Loading test data
X_test, y_test = load_svmlight_file(test_path)
X_test = np.asarray(X_test.todense())
y_test = np.asarray(y_test)

### Train SVSVL 
param_grid = {'C': [0.1, 1, 10], 
              'q_additivity': [1, 2, 3, 5]} 
best_C, best_q, best_K, best_score = precomputed_kernel_GridSearchCV(X_train, y_train, Cs=param_grid['C'], qds = param_grid['q_additivity'])
svsvc = SVSVC(q_additivity=best_q, C= best_C)
svsvc.fit(X_train, y_train)
ytest_svsvc = svsvc.predict(Shapley_kernel(X_test, X_train, q_additivity = best_q))
acc_svsvc = accuracy_score(ytest_svsvc, y_test)

######## Train RBF Kernel SVM 
param_grid = {'C': [0.1, 1, 10], 
              'gamma': [1, 0.1, 0.01],
              'kernel': ['rbf']} 

SVC_grid = GridSearchCV(SVC(), param_grid, refit = False, verbose = 3)
SVC_grid.fit(X_train, y_train)

svc_rbf = SVC(C = SVC_grid.best_params_['C'], gamma=SVC_grid.best_params_['gamma'], kernel='rbf')
svc_rbf.fit(X_train, y_train)
ytest_svc_rbf = svc_rbf.predict(X_test)
acc_svc_rbf = accuracy_score(ytest_svc_rbf, y_test)

######## Train Linear SVM 
param_grid = {'C':[0.1, 1, 10]}
linearSVC_grid = GridSearchCV(LinearSVC(),param_grid,cv=5,return_train_score=True)
linearSVC_grid.fit(X_train, y_train)

svc_linear = LinearSVC(C=linearSVC_grid.best_params_['C'])
svc_linear.fit(X_train, y_train)
ytest_svc_linear = svc_linear.predict(X_test)
acc_svc_linear = accuracy_score(ytest_svc_linear, y_test)

####### Train Logistic Regression
param_grid = { "C":np.logspace(-3,3,7), "penalty":["l2"]}# l1 lasso l2 ridge
logreg_grid = GridSearchCV(LogisticRegression(), param_grid, cv=10)
preprop = preprocessing.StandardScaler().fit(X_train)
logreg_grid.fit(preprop.transform(X_train), y_train)

logreg = LogisticRegression(C=logreg_grid.best_params_['C'])
logreg.fit(preprop.transform(X_train), y_train)
ytest_logreg = logreg.predict(preprop.transform(X_test))
acc_logreg = accuracy_score(ytest_logreg, y_test)

####### Train Random Forest 
param_grid = { 
    'n_estimators': [200, 500, 700],
    'max_depth' : [4,7,8]
}

rfc_grid = GridSearchCV(estimator=RandomForestClassifier(random_state=42), param_grid=param_grid, cv= 5)
rfc_grid.fit(X_train, y_train)

rfc = RandomForestClassifier(n_estimators=rfc_grid.best_params_['n_estimators'], max_depth=rfc_grid.best_params_['max_depth'])
rfc.fit(X_train, y_train)
ytest_rfc = rfc.predict(X_test)
acc_rfc = accuracy_score(ytest_rfc, y_test)

####### Train Naive Bayes
gnb = GaussianNB()
gnb.fit(X_train, y_train)
ytest_gnb = gnb.predict(X_test)
acc_gnb = accuracy_score(ytest_gnb, y_test)


## Save to CSV
df = pd.DataFrame(np.array([ytest_svsvc, ytest_svc_rbf, ytest_svc_linear, ytest_logreg, ytest_rfc, ytest_gnb]).T, columns=['SVSVL', "SVC_RBF", 'SVC_Linear', 'LR', 'RF', 'GNB'])
newrow = [acc_svsvc, acc_svc_rbf, acc_svc_linear, acc_logreg, acc_rfc, acc_gnb]
df.loc[len(df)] = newrow
df.to_csv(dataset_name + ".csv")

print('done')


