from utils import DataLoader, iso_scale, normalize, compute_quadratic_features
import numpy as np
from sklearn.preprocessing import scale
from sklearn.model_selection import ShuffleSplit
import pandas as pd
import argparse

RANDOM_STATE = 0
TEST_SIZE = 0.2
CORES = 24
DATA_COPIES = 2*CORES

parser = argparse.ArgumentParser()
parser.add_argument("--dataset", help="dataset name")
parser.add_argument("--n_runs", type=int, default=5, help="number of independent runs")
parser.add_argument("--n_cvfolds", type=int, default=4, help="cross val folds")
args = parser.parse_args()

dataset = DataLoader(args.dataset)
n_runs = args.n_runs
n_cvfolds = args.n_cvfolds

data_splits = ShuffleSplit(n_splits=n_runs, test_size=TEST_SIZE, random_state=RANDOM_STATE)
X, y = dataset.X, dataset.y

experimental_data = {"train_dim" : [],
                      "test_dim" : [],
                    "compo_train_dim" : [],
                    "compo_test_dim": [],
                    "norm_train_dim": [],
                    "norm_test_dim": []
                     }
run = 1
for train_idx, test_idx in data_splits.split(X):

    X_train, y_train = X[train_idx], y[train_idx]
    X_test, y_test = X[test_idx], y[test_idx]

    
    
    sigma_train = np.average([np.outer(x, x) for x in X_train], axis=0)
    sigma_test = np.average([np.outer(x, x) for x in X_test], axis=0)

    train_dim = np.trace(sigma_train)/np.linalg.norm(sigma_train, ord=2)
    test_dim = np.trace(sigma_test)/np.linalg.norm(sigma_test, ord=2)


    # COMPONENT WISE SCALING
    training_mean = np.mean(X_train, axis=0)
    training_component_stds = np.std(X_train, axis=0)
    X_train_scaled = scale(X_train)
    X_test_scaled = (X_test - training_mean)/training_component_stds

    sigma_train = np.average([np.outer(x, x) for x in X_train_scaled], axis=0)
    sigma_test = np.average([np.outer(x, x) for x in X_test_scaled], axis=0)

    compo_train_dim = np.trace(sigma_train)/np.linalg.norm(sigma_train, ord=2)
    compo_test_dim = np.trace(sigma_test)/np.linalg.norm(sigma_test, ord=2)
    

    # ISOTROPIC NORMALIZATION
    training_mean = np.mean(X_train, axis=0)
    X_train_scaled, training_covariance = iso_scale(scale(X_train, with_std=False))
    X_test_scaled = normalize(X_test - training_mean, training_covariance)

    sigma_train = np.average([np.outer(x, x) for x in X_train_scaled], axis=0)
    sigma_test = np.average([np.outer(x, x) for x in X_test_scaled], axis=0)

    norm_train_dim = np.trace(sigma_train)/np.linalg.norm(sigma_train, ord=2)
    norm_test_dim = np.trace(sigma_test)/np.linalg.norm(sigma_test, ord=2)
    

    #LOGGING
    experimental_data["train_dim"].append(train_dim)
    experimental_data["test_dim"].append(test_dim)
    experimental_data["compo_train_dim"].append(compo_train_dim)
    experimental_data["compo_test_dim"].append(compo_test_dim)
    experimental_data["norm_train_dim"].append(norm_train_dim)
    experimental_data["norm_test_dim"].append(norm_test_dim)
    run += 1

df = pd.DataFrame(experimental_data)
df.to_csv("../log/"+"intrinsics_"+args.dataset+".csv")
