import numpy as np
import sklearn.metrics as skm
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import average_precision_score
# This is for the progress bar.
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
from pyod.models.kpca import KPCA
import csv
import torch
import math
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support
import pandas as pd

from diffusion.non_param_dte import DTENonParametric as DTENP
from diffusion.dte import DTECategorical,DTEGaussian
import os
import pkg_resources

def read_after_gauss_data(file, normalization='z-score', train_level=1,test_level=2,seed=42):
    if file.endswith('.npz'):
        data = np.load(file, allow_pickle=True)
        x, y = data['X'], data['y']
        y = np.array(y, dtype=int)
    else:
        if file.endswith('pkl'):
            func = pd.read_pickle
        elif file.endswith('csv'):
            func = pd.read_csv
        else:
            raise NotImplementedError('')

        df = func(file)
        df.replace([np.inf, -np.inf], np.nan, inplace=True)
        df.fillna(method='ffill', inplace=True)
        x = df.values[:, :-1]
        y = np.array(df.values[:, -1], dtype=int)

    # train-test splitting
    rng = np.random.RandomState(seed)
    idx = rng.permutation(np.arange(len(x)))
    #idx = np.random.permutation(np.arange(len(x)))
    #print(idx[0:10])
    x, y = x[idx], y[idx]

    norm_idx = np.where(y==0)[0]
    anom_idx = np.where(y==1)[0]
    split = int(0.5 * len(norm_idx))
    train_norm_idx, test_norm_idx = norm_idx[:split], norm_idx[split:]

    x_train=x[train_norm_idx]
    x_test = x[np.hstack([test_norm_idx, anom_idx])]
    if normalization == 'min-max':
        minmax_scaler = MinMaxScaler()
        minmax_scaler.fit(x_train)
        x_train = minmax_scaler.transform(x_train)
        x_test = minmax_scaler.transform(x_test)

    elif normalization == 'z-score':
        mus = np.mean(x_train, axis=0)
        sds = np.std(x_train, axis=0)
        sds[sds == 0] = 1
        x_train = np.array([(xx - mus) / sds for xx in x_train])
        x_test = np.array([(xx - mus) / sds for xx in x_test])

    elif normalization == 'scale':
        x_train = x_train / 255
        x_test = x_test / 255
    elif normalization =='ours':
        mean=np.mean(x_train,0)
        std=np.std(x_train,0)
        x_train=(x_train-mean)/ (std + 1e-4)
        x_test= (x_test - mean)/(std + 1e-4)
    data_dim=x_train.shape[1]
    y_train = y[train_norm_idx]
    noise_train= np.random.normal(
    loc=0.0,
    scale=np.sqrt(train_level),
    size=(len(train_norm_idx), x.shape[1])
)  
    noise_test_abnorm= np.random.normal(
    loc=0.0,
    scale=np.sqrt(test_level),
    size=(len(anom_idx), x.shape[1])
)  
    noise_test_norm= np.random.normal(
    loc=0.0,
    scale=np.sqrt(train_level),
    size=(len(test_norm_idx), x.shape[1])
)  
    x[anom_idx]=x[anom_idx]+noise_test_abnorm
    x[test_norm_idx]=x[test_norm_idx]+noise_test_norm
    x[train_norm_idx]=x[train_norm_idx]+noise_train
    y_test = y[np.hstack([test_norm_idx, anom_idx])]

    print(f'Original size: [{x.shape}], Normal/Anomaly: [{len(norm_idx)}/{len(anom_idx)}] \n'
          f'After splitting: training/testing [{len(x_train)}/{len(x_test)}]')
    #print(str(torch.rand(1)))
    #sds=None
    # normalization

    return x_train, y_train, x_test, y_test

def read__data(file, normalization='z-score', seed=42):
    if file.endswith('.npz'):
        data = np.load(file, allow_pickle=True)
        x, y = data['X'], data['y']
        y = np.array(y, dtype=int)
    else:
        if file.endswith('pkl'):
            func = pd.read_pickle
        elif file.endswith('csv'):
            func = pd.read_csv
        else:
            raise NotImplementedError('')

        df = func(file)
        df.replace([np.inf, -np.inf], np.nan, inplace=True)
        df.fillna(method='ffill', inplace=True)
        x = df.values[:, :-1]
        y = np.array(df.values[:, -1], dtype=int)

    # train-test splitting
    rng = np.random.RandomState(seed)
    idx = rng.permutation(np.arange(len(x)))
    #print(idx[0:10])
    x, y = x[idx], y[idx]

    norm_idx = np.where(y==0)[0]
    anom_idx = np.where(y==1)[0]
    split = int(0.5 * len(norm_idx))
    train_norm_idx, test_norm_idx = norm_idx[:split], norm_idx[split:]

    x_train = x[train_norm_idx]
    data_dim=x_train.shape[1]
    y_train = y[train_norm_idx]

    x_test = x[np.hstack([test_norm_idx, anom_idx])]
    y_test = y[np.hstack([test_norm_idx, anom_idx])]

    print(f'Original size: [{x.shape}], Normal/Anomaly: [{len(norm_idx)}/{len(anom_idx)}] \n'
          f'After splitting: training/testing [{len(x_train)}/{len(x_test)}]')
    #print(str(torch.rand(1)))

    # normalization

    if normalization == 'z-score':
        mus = np.mean(x_train, axis=0)
        sds = np.std(x_train, axis=0)
        sds[sds == 0] = 1
        x_train = np.array([(xx - mus) / sds for xx in x_train])
        x_test = np.array([(xx - mus) / sds for xx in x_test])

    elif normalization == 'scale':
        x_train = x_train / 255
        x_test = x_test / 255
    elif normalization =='ours':
        mean=np.mean(x_train,0)
        std=np.std(x_train,0)
        x_train=(x_train-mean)/ (std + 1e-4)
        x_test= (x_test - mean)/(std + 1e-4)

    return x_train, y_train, x_test, y_test
for file_path in ["2_annthyroid","4_breastw","14_glass","23_mammography","29_Pima","39_vertebral","7_Cardiotocography","13_fraud","30_satellite","31_satimage-2","32_shuttle","3_backdoor","9_census","17_InternetAds","24_mnist","25_musk","36_speech"]:  # 匹配所有文件或文件夹
#for k in range (0,1):
 file_path=str("datasets/"+file_path+'.npz')
 #if "census" in file_path:
 #   j=1
 #if j!=1:
 #    continue
 #file_path="datasets/yelp.npz"
 first_slash_idx = file_path.find('/')
 #first_slash_idx = file_path.find('\\')
 dot_idx = file_path.rfind('.')
 data = file_path[first_slash_idx + 1 : dot_idx]
 best_auc,best_prc=0,0
 for lr in [0.001,0.005,0.01]:
  for T in [100,400,1000]:
   avg_auroc,avg_auprc=[],[]
   for n in range (0,1):
  #print(data)
   #train_data,train_lab,test_data,test_lab,Input_dim,std=read_OD_data(file_path,normalization='z-score')
    x_train,y_train,x_test,y_test=read__data(file_path,"z-score")
   #print(f"")
   #x_train,train_lab,x_test,y_test,Input_dim,std=read_noise_data(file_path,normalization='z-score',noise_rate=i/100)
   #print(x_train.shape[1])
   #model = DPAD(x_train,x_test,y_test,gamma=0.01,lamb=0.1,k=10,bs=8192,hidden_dims=[256, 128],num_classes=32,n_epochs=200,learning_rate=1e-4)
   #model.training()
   #score = model.decision_function(x_test)
   #model=Knn(random_state=None)
   #model=DeepSVDD(Input_dim)
   #model=IsolationForest()
   #model=OCSVM()
   #model=COPOD()
   #model=ECOD()
   #model=DIF(max_samples=198)
   #model=KNN(n_neighbors=10)
    model = DTECategorical()
    model.fit(x_train)
    score=model.predict_score(x_test)
   #score = model.predict_score(x_train,x_test)
    AUROC=roc_auc_score(y_test,score)
    ap_score = average_precision_score(y_test,score)
   best_auc=max(best_auc,AUROC)
   best_prc=max(best_prc,ap_score)
 with open('./tuned DTEC RESULTS/'+data+' .txt', 'a+') as f:
                f.write("best_auroc: "+str(best_auc)+" best_auprc: "+str(best_prc)+'\n')