from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from E01_ICE import E01_ICE
from auxfuncs_array import pred_linear
import torch
from auxfuncs_array import reorder_svm
from ICE_coreset import coreset_while

split = True
N = 5
n = 0

svm_res = torch.zeros((N,2))
lr_res = torch.zeros((N,2))
lda_res = torch.zeros((N,2))
ice_res = torch.zeros((N,2))
for i in range(N):
    print('\n')
    print(f'this is dataset {i}')
    data = np.loadtxt("datasets/rice_cammeo_3810_7D.csv", delimiter=",")
    
    data = np.unique(data, axis=0)
    
    X = data[:,:-1]
    y = data[:,-1]
    N,D = X .shape
    
    X = np.unique(X, axis=0)
    y[y==0] = -1
    y = y.astype(int)
    
    
    #scale the dataset
    min_val = X.min(axis=0)   # Minimum value of each column
    max_val = X.max(axis=0)   # Maximum value of each column
    epsilon = 1e-8
    X = 2 * (X - min_val) / (max_val - min_val+epsilon) - 1
    
    # Apply the scaling formula for each column
    
    # Add noise to the dataset
    np.random.seed(2024)
    noise_std_dev = 1e-8 # Suitable noise standard deviation
    noise = np.random.normal(0, noise_std_dev, size=X.shape)
    X = X + noise


    if split == True:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        N_train = X_train.shape[0]
        N_test = X_test.shape[0]
        print(f'  The number of training samples is {N_train}, the number of test samples is {N_test}')
    else:
        X_train = X
        y_train = y
        N_train = X_train.shape[0]
        print(f'  The number of training samples is {N_train}')
    
    #### SVM
    svm = SVC(kernel='linear', C=1.0)  # C is the regularization parameter
    svm.fit(X_train, y_train)
    
    y_pred_train = svm.predict(X_train)
    
    
    # Evaluate model performance
    train_acc_svm = accuracy_score(y_pred_train, y_train)
    # print(f"  SVM: Train Accuracy: {train_acc_svm:.4f}")
    
    y_pred_test = svm.predict(X_test)
    test_acc_svm = accuracy_score(y_pred_test, y_test)
    svm_train_nmc = np.sum(y_pred_train!= y_train)
    print(f"  SVM: Train nmc: {svm_train_nmc:.4f}, Test nmc: {np.sum(y_pred_test!= y_test):.4f}")
    print(f"  SVM: Train Accuracy: {train_acc_svm:.4f}, Test Accuracy: {test_acc_svm:.4f}")

    svm_res[i,0] = train_acc_svm
    svm_res[i,1] = test_acc_svm
    
    # logstic regression
    
    log_reg = LogisticRegression(multi_class='ovr', random_state=42)
    
    # Fit the model on training data
    log_reg.fit(X_train, y_train)
    
    y_pred_train = log_reg.predict(X_train)
    
    
    # Evaluate model performance
    train_acc_log_reg = accuracy_score(y_pred_train, y_train)
    # print(f"  Logistic-regression: Train Accuracy: {train_acc_log_reg:.4f}")
    
    y_pred_test = log_reg.predict(X_test)
    test_acc_log_reg = accuracy_score(y_pred_test, y_test)
    print(f"  Logistic-regression: Train Accuracy: {train_acc_log_reg:.4f}, Test Accuracy: {test_acc_log_reg:.4f}")
    lr_res[i,0] = train_acc_log_reg
    lr_res[i,1] = test_acc_log_reg


    ####  LDA results
    lda = LinearDiscriminantAnalysis()
    
    # Fit the model on training data
    lda.fit(X_train, y_train)
    
    # Make predictions on test data
    y_pred_train = lda.predict(X_train)
    
    
    # Evaluate model performance
    train_acc_lda = accuracy_score(y_pred_train, y_train)
    
    # print(f"  LDA: Train Accuracy: {train_acc_lda:.4f}")
    
    
    y_pred_test = lda.predict(X_test)
    test_acc_lda = accuracy_score(y_pred_test, y_test)
    print(f"  LDA: Train Accuracy: {train_acc_lda:.4f}, Test Accuracy: {test_acc_lda:.4f}")
    
    lda_res[i,0] = train_acc_lda
    lda_res[i,1] = test_acc_lda  
    
    ###### ICE-coreset

    
    inds = [i for i in range(N_train)]
    
    C, inds, blocksize, max_unchanged, Nremain, device, timelimit, verbose = 500, inds, 34, 20, 38, 'cuda', 2000, True
    paras_coresets = C, inds, blocksize, max_unchanged, Nremain, device, timelimit, verbose 
    paras_ice = (X_train,y_train, int(3e3), device)

    res = coreset_while( paras_coresets, paras_ice)
    
    current_best_test = N_train
    res = [c for c in res if c[0] is not None] 
    # for c in res[:1]:
    for c in res:  ## if select the one with best test accuracy among all candidate solutions that has lower solution than svm


        w = c[0]
        
        train_loss = c[1]
        train_acc_ice = 1 - train_loss/N_train
        
        y_pred_test = pred_linear(X_test, w)
        y_test = torch.tensor(y_test)
        loss_test = torch.sum(y_pred_test!=y_test)
        if loss_test > y_test.shape[0]/2:
            loss_test = torch.sum((-y_pred_test)!=y_test)
        test_acc_ice = 1 - loss_test/N_test
        

        if loss_test <= current_best_test and train_acc_ice <= svm_train_nmc:
            current_best_test = loss_test
            print(f'the trainning error is {train_loss}')
            ice_res[i,0] = train_acc_ice
            ice_res[i,1] = test_acc_ice  
            
            print(f"  ICE: Train Accuracy: {train_acc_ice:.4f}, Test Accuracy: {test_acc_ice:.4f}")




ice_mean = torch.mean(ice_res, dim = 0)
ice_std = torch.std(ice_res, dim = 0)


svm_mean = torch.mean(svm_res, dim = 0)
svm_std = torch.std(svm_res, dim = 0)

lr_mean = torch.mean(lr_res, dim = 0)
lr_std = torch.std(lr_res, dim = 0)

lda_mean = torch.mean(lda_res, dim = 0)
lda_std = torch.std(lda_res, dim = 0)

print(f'ICE result: {ice_mean, ice_std}')
print(f'SVM result: {svm_mean, svm_std}')
print(f'LR result: {lr_mean, lr_std}')
print(f'LDA result: {lda_mean, lda_std}')




# coreset, best_candidates= Deep_ICE_coreset(X_train, t_train, K=3, L = 50, M = 11, max_unchanged = 1, Nremain= 30, threshold=5, num_candidates=1000)
# best_candidates = [(vec, val.cpu().item(),block) for vec, val,block in best_candidates]

# with open('voicepath_704_2D_K=3_2022.csv', 'w', newline='') as f:
#   writer = csv.writer(f) 
#   writer.writerows(best_candidates) 

# print(coreset)
# coreset= [i for i in range(50)]
# res = Deep_ICE(coreset, X, t, 2, 500, P=True)


