from utils import DataLoader, iso_scale, normalize, compute_quadratic_features
import numpy as np
from sklearn.preprocessing import scale
from sklearn.model_selection import GridSearchCV, train_test_split
from quad_jax_copy import QuadraticClassifier
from sklearn.svm import LinearSVC
import pandas as pd
import argparse

RANDOM_STATE = 0
TEST_SIZE = 0.2
C_SVM_PARAM = {"C": [0.001, 0.01, 0.1, 1, 10]}
NUC_QUAD_RADIUS_PARAM = {"lmbda": [0.001, 0.01, 0.1, 1, 10]}
MAX_ITER = 10000
CORES = 8
DATA_COPIES = 2*CORES

parser = argparse.ArgumentParser()
parser.add_argument("--dataset", help="dataset name")
parser.add_argument("--id", type=int, default=0, help="number of independent runs")
parser.add_argument("--n_cvfolds", type=int, default=4, help="cross val folds")
args = parser.parse_args()

dataset = DataLoader(args.dataset)
n_cvfolds = args.n_cvfolds

X, y = dataset.X, dataset.y

experimental_data = {'svm_train_acc': 0,
                     'svm_test_acc': 0,
                     'svm_scaled_train_acc' : 0,
                     'svm_scaled_test_acc': 0,
                     'nuc_train_acc': 0,
                     'nuc_test_acc': 0,
                     'nuc_compo_scaled_train_acc' : 0,
                     'nuc_compo_scaled_test_acc': 0,
                     'nuc_scaled_train_acc' : 0,
                     'nuc_scaled_test_acc': 0
                     }

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=args.id)

svm = GridSearchCV(LinearSVC(max_iter=10000, fit_intercept=False), C_SVM_PARAM, cv=n_cvfolds, refit=True, n_jobs=CORES, pre_dispatch=DATA_COPIES)
X_quad_train, X_quad_test = compute_quadratic_features(X_train, X_test, homogeneous=False)
svm.fit(X_quad_train, y_train)
svm_train_acc = svm.score(X_quad_train, y_train)
svm_test_acc = svm.score(X_quad_test, y_test)

nuc = QuadraticClassifier(dim=X.shape[1])
nuc_grid_search = GridSearchCV(nuc, NUC_QUAD_RADIUS_PARAM, cv=n_cvfolds, n_jobs=CORES, pre_dispatch=DATA_COPIES)
nuc_grid_search.fit(X_train, y_train, batch_size=X_train.shape[0], n_epoch=MAX_ITER)
nuc.lmbda = nuc_grid_search.best_params_['lmbda']
nuc.fit(X_train, y_train, n_epoch=MAX_ITER, batch_size=X_train.shape[0], plot=(args.id==1), fname=args.dataset+"Para_plot_run_"+str(args.id))
nuc_train_acc, nuc_test_acc = nuc.score(X_train, y_train), nuc.score(X_test, y_test)

# ISOTROPIC NORMALIZATION
training_mean = np.mean(X_train, axis=0)
X_train_scaled, training_covariance = iso_scale(scale(X_train, with_std=False))
X_test_scaled = normalize(X_test - training_mean, training_covariance)

X_quad_train, X_quad_test = compute_quadratic_features(X_train_scaled, X_test_scaled, homogeneous=False)
svm.fit(X_quad_train, y_train)
svm_scaled_train_acc, svm_scaled_test_acc = svm.score(X_quad_train, y_train), svm.score(X_quad_test, y_test)


nuc_grid_search.fit(X_train_scaled, y_train, batch_size=X_train.shape[0], n_epoch=MAX_ITER)
nuc.lmbda = nuc_grid_search.best_params_['lmbda']
nuc.fit(X_train_scaled, y_train, n_epoch=MAX_ITER, batch_size=X_train.shape[0], plot=(args.id==1), fname=args.dataset+"Para_scaled_plot_run"+str(args.id))
nuc_scaled_train_acc, nuc_scaled_test_acc = nuc.score(X_train_scaled, y_train), nuc.score(X_test_scaled, y_test)


#LOGGING
experimental_data["svm_train_acc"]=svm_train_acc
experimental_data["svm_test_acc"]=svm_test_acc
experimental_data["svm_scaled_train_acc"]=svm_scaled_train_acc
experimental_data["svm_scaled_test_acc"]=svm_scaled_test_acc
experimental_data["nuc_train_acc"]=nuc_train_acc
experimental_data["nuc_test_acc"]=nuc_test_acc
experimental_data["nuc_scaled_train_acc"]=nuc_scaled_train_acc
experimental_data["nuc_scaled_test_acc"]=nuc_scaled_test_acc


df = pd.Series(experimental_data)
df.to_csv("../log/"+"para_squared_hinge_"+args.dataset+"_run_" + str(args.id) +".csv")
