import numpy as np
import pandas as pd
from sklearn import svm
Path = "C:\D_Disk\Label_DP_Final\Diabetes_Application\diabetes.csv"
Data_Frame = pd.read_csv(Path)
Y = 2 * np.array(Data_Frame['Outcome']) - 1
X = np.array(Data_Frame.iloc[:,0:-1].apply(lambda iterator: ((iterator - iterator.mean())/iterator.std()).round(2)))
N_size = X.shape[0]
Train_size = int(N_size*0.7)
Index_all = [i for i in range(N_size)]

def SVM(portion,epsilon=0.6):
    Error_set = []
    theta = np.exp(epsilon)/(1+np.exp(epsilon))
    for sed in range(1000):
        np.random.seed(sed)
        Permutated_Index = np.random.choice(Index_all, N_size, replace=False)
        X_r = X[Permutated_Index[0:int(Train_size*portion)]] # n<=Train_size
        Y_r = Y[Permutated_Index[0:int(Train_size*portion)]] # n<=Train_size
        X_e = X[Permutated_Index[Train_size::]]
        Y_e = Y[Permutated_Index[Train_size::]]
        Change = np.sign(theta - np.random.uniform(0, 1, len(Y_r)))
        Y_r_new = Y_r * Change
        Model = svm.LinearSVC(tol=0.001, max_iter=5000)
        Model.fit(X_r, Y_r_new)
        Error = Model.score(X_e,Y_e)
        Error_set.append([portion,1-Error])
    return(Error_set)

import pandas as pd
from seaborn import lineplot
Error_ALL_nlogn,Error_ALL_n,Error_ALL_dnlogn = [],[],[]
C_1,C_2,C_3 = np.sqrt(Train_size * 0.1)/np.log(Train_size * 0.1),np.sqrt(Train_size * 0.1), np.log(Train_size * 0.1)*np.sqrt(Train_size * 0.1)
for portion in [0.1 * i for i in np.arange(1,11)]:
    True_n = Train_size * portion
    Epsilon = C_1 * np.log(True_n)/np.sqrt(True_n)
    Error_Temp = SVM(portion,Epsilon)
    Error_ALL_nlogn += Error_Temp
    print(C_1,portion)
DF = pd.DataFrame(Error_ALL_nlogn)
for portion in [0.1 * i for i in np.arange(1,11)]:
    True_n = Train_size * portion
    Epsilon = C_2 /np.sqrt(True_n)
    Error_Temp = SVM(portion,Epsilon)
    Error_ALL_n += Error_Temp
    print(portion)
DF1 = pd.DataFrame(Error_ALL_n)

for portion in [0.1 * i for i in np.arange(1,11)]:
    True_n = Train_size * portion
    Epsilon = C_3 /np.sqrt(True_n)/np.log(True_n)
    Error_Temp = SVM(portion,Epsilon)
    Error_ALL_dnlogn += Error_Temp
DF2 = pd.DataFrame(Error_ALL_dnlogn)

import matplotlib.pyplot as plt
lineplot(data=DF, x=0, y=1, color='blue',label='$\epsilon \\asymp \log(n) n^{-1/2}$',  marker= 'o', markersize=5)
lineplot(data=DF1, x=0, y=1, color='red',label='$\epsilon \\asymp  n^{-1/2}$',  marker= 'o', markersize=5)
lineplot(data=DF2, x=0, y=1, color='yellow', label='$\epsilon \\asymp \log^{-1}(n)n^{-1/2}$', marker='o', markersize=5)
plt.xlabel('Percentage of Training Dataset')
plt.ylabel('Testing Error')
plt.legend()
plt.grid()
