from utils import DataLoader, iso_scale, normalize, compute_quadratic_features
import numpy as np
from sklearn.preprocessing import scale
from sklearn.model_selection import ShuffleSplit, GridSearchCV, train_test_split
from quad_jax import QuadraticClassifier, batch_loss, batch_classifier
from sklearn.svm import LinearSVC
import pandas as pd
import matplotlib.pyplot as plt


RANDOM_STATE = 0
TEST_SIZE = 0.2
np.random.seed(RANDOM_STATE)
d_0 = 8
multiples = [3, 5, 10, 15]#, 100, 200, 500]
lmbda = 0.0001
n_runs = 1
epochs = 1000

dataset = DataLoader("diabetes")
X_real, y = dataset.X, dataset.y

X_real = scale(X_real)

nuc_results = [[] for d in multiples]
fro_results = [[] for d in multiples]

for i, m in enumerate(multiples):

    nuc = QuadraticClassifier(dim=m*d_0, lmbda = lmbda, norm='nuc')
    fro = QuadraticClassifier(dim=m*d_0, lmbda = lmbda, norm='fro')

    for r in range(n_runs):
        X = np.repeat(X_real, m, axis=1)
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

        nuc.fit(X_train, y_train, n_epoch=epochs, batch_size=len(X_train))
        fro.fit(X_train, y_train, n_epoch=epochs, batch_size=len(X_train))

        nuc_train_loss = batch_loss(nuc.A, nuc.b, nuc.c, X_train, y_train)
        nuc_test_loss = batch_loss(nuc.A, nuc.b, nuc.c, X_test, y_test)
        nuc_gap = np.abs(nuc_test_loss - nuc_train_loss)

        nuc_results[i].append(nuc_gap)

        fro_train_loss = batch_loss(fro.A, fro.b, fro.c, X_train, y_train)
        fro_test_loss = batch_loss(fro.A, fro.b, fro.c, X_test, y_test)
        fro_gap = np.abs(fro_test_loss - fro_train_loss)

        fro_results[i].append(fro_gap)
        
plt.errorbar(d_0*np.array(multiples), np.mean(nuc_results, axis=1), yerr = np.std(nuc_results, axis=1), label="nuclear norm")
plt.errorbar(d_0*np.array(multiples), np.mean(fro_results, axis=1), yerr = np.std(fro_results, axis=1), label="nuclear norm")

plt.savefig("REPEATING FEATURES.png")











