import numpy as np
import torch
import torch.nn as nn
import argparse 
from sklearn.linear_model import LogisticRegression
import pandas as pd 

#Arguments
parser = argparse.ArgumentParser()
parser.add_argument('--version', type=str, default='1')
parser.add_argument('dataset_name', type=str)

args = parser.parse_args()

#Paths
save_data_path_train = '../data/' + args.dataset_name + '_TRAIN/'
save_data_path_test = '../data/' + args.dataset_name + '_TEST/'
save_model_path = '../results/trained_models'
save_results_path = '../results/results_csv'

#Load y
y_train = torch.load(save_data_path_train + 'y_tensor.pt').to(torch.float32)
y_test = torch.load(save_data_path_test + 'y_tensor.pt').to(torch.float32)

#Load ngrams
ngrams_low = torch.load(save_model_path + '/ngrams_bot_' + args.dataset_name + '_' + args.version + '.pt')
ngrams_mid = torch.load(save_model_path + '/ngrams_mid_' + args.dataset_name + '_' + args.version + '.pt')
ngrams = torch.cat([ngrams_low, ngrams_mid], dim=1)

#Make permutations
ngrams_train = ngrams[:len(y_train)].numpy()
ngrams_test = ngrams[len(y_train):].numpy()

permutation_train = np.random.permutation(len(y_train))
permutation_test = np.random.permutation(len(y_test))

ngrams_shuffle_train = ngrams_train[permutation_train]
ngrams_shuffle_test = ngrams_test[permutation_test]

y_train_shuffle = y_train[permutation_train]
y_test_shuffle = y_test[permutation_test]

#Split train / eval / test
train_index = int(0.70 * len(y_train))

ngrams_real_train = ngrams_shuffle_train[:train_index]
ngrams_real_eval = ngrams_shuffle_train[train_index:]

y_real_train = y_train_shuffle[:train_index]
y_real_eval = y_train_shuffle[train_index:]
y_real_test = y_test_shuffle

#Assign X_train and X_test
X_train = ngrams_real_train
X_eval = ngrams_real_eval
X_test = ngrams_shuffle_test

#Classif
list_beta = list()
list_alpha = list()
list_Accuracy_Test = list()
list_Accuracy_Eval = list()
list_Accuracy_Train = list()


for alpha in [0., 0.25, 0.50, 0.75, 0.98]:
    for beta in np.geomspace(1e-4, 1e6, num=100, endpoint=False):

        clf = LogisticRegression(max_iter=100000, penalty='elasticnet', multi_class='ovr', solver='saga', l1_ratio=alpha, C=beta).fit(X_train, y_real_train)

        learner = clf.predict(X_train)
        learner_eval = clf.predict(X_eval)
        prediction = clf.predict(X_test)

        Accuracy_Test = sum(abs(prediction == y_real_test.numpy())) / len(y_real_test.numpy())
        Accuracy_Eval = sum(abs(learner_eval == y_real_eval.numpy())) / len(y_real_eval.numpy())
        Accuracy_Train =  sum(abs(learner == y_real_train.numpy())) / len(y_real_train.numpy())

        list_beta.append(beta)
        list_alpha.append(alpha)
        list_Accuracy_Train.append(Accuracy_Train)
        list_Accuracy_Eval.append(Accuracy_Eval)
        list_Accuracy_Test.append(Accuracy_Test)

df_results = pd.DataFrame(list(zip(list_alpha, list_beta, list_Accuracy_Train, 
                                       list_Accuracy_Eval, list_Accuracy_Test)),
                              columns =['Alpha', 'Beta', 'Accuracy_Train', 'Accuracy_Eval', 'Accuracy_Test'])

df_results.to_csv(save_results_path + '/results_' + args.dataset_name + '_' + args.version + '.csv')




