################################################################################################
################################################################################################
# adapted by first author
################################################################################################
################################################################################################

#Orig:
# Three-way neighborhood characteristic region-based outlier detection (3WIROD) algorithm
# Please refer to the following papers:
# Zhang Xianyong, Yuan Zhong, and Miao Duoqian.Outlier Detection Using Three-Way
# Neighborhood Characteristic Regions and Corresponding Fusion Measurement[J].TKDE,2023.
# Uploaded by Yuan Zhong on August 29, 2023. E-mail:yuanzhong2799@foxmail.com.
import numpy as np
from scipy.io import loadmat
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial.distance import cdist

import pickle
import sklearn.metrics as skm
import os
import pandas as pd
from adbench.myutils import Utils
# from options import get_data_paths, BaseOptions


# def low_density_anomalies(test_log_probs, num_anomalies):
#     """ Helper function for the F1-score, selects the num_anomalies lowest values of test_log_prob
#     """
#     anomaly_indices = np.argpartition(test_log_probs, num_anomalies-1)[:num_anomalies]
#     preds = np.zeros(len(test_log_probs))
#     preds[anomaly_indices] = 1
#     return preds


def WNCROD_calc(data, X_tem, lammda):
    # input:
    # data is data matrix without decisions, where rows for samples and columns for attributes.
    # Numerical attributes should be normalized into [0,1].
    # Nominal attributes be replaced by different integer values.
    # X_tem denotes the selected condition subdata.
    # lammda is a given parameter for the radius adjustment.
    # output
    # Multiple neighborhood outlier factor (MNOF)
    n, m = data.shape
    X = np.zeros(n)
    X[X_tem] = 1

    D1 = m / 3
    D2 = m / 2
    D3 = 0.9 * m

    delta = np.zeros(m)
    ID = np.all(data <= 1, axis=0)
    delta[ID] = np.std(data[:, ID], axis=0) / lammda

    Lower = np.zeros((m, n))

    for col in range(m):
        RM_tem = cdist(data[:, [col]], data[:, [col]], metric='cityblock') <= delta[col]
        Lower_temp = np.min(np.maximum(1 - RM_tem, np.tile(X, (n, 1))), axis=1)
        Lower[col, :Lower_temp.shape[0]] = Lower_temp

    IB = np.tile(X, (m, 1)) - Lower
    NEB = np.min(IB, axis=0)
    NPB = IB - np.tile(NEB, (m, 1))

    n_X = int(sum(X))

    weight = np.zeros((n_X, m))

    for col in range(m):
        RM_tem = cdist(data[:, [col]], data[:, [col]], metric='cityblock') <= delta[col]
        weight_x = []

        for i in range(n_X):
            temp1 = RM_tem[X_tem[i], :]
            weight_temp = 1 - (np.sqrt((np.sum(np.minimum(temp1, X))) / n_X))
            weight_x.append(weight_temp)

        weight[:len(weight_x), col] = weight_x

    D_tem = np.zeros((n, n))

    for col in range(m):
        RM_tem = cdist(data[:, [col]], data[:, [col]], metric='cityblock') <= delta[col]
        D_tem += RM_tem

    NOM = m - D_tem
    X_OM = NOM[np.ix_(X_tem, X_tem)]

    NEB_num = np.zeros((n_X, m))
    Lower_num = np.zeros((n_X, m))
    NPB_num = np.zeros((n_X, m))

    for col in range(m):
        temp2 = Lower[col, :]
        temp3 = NPB[col, :]

        for i in range(n_X):
            temp1 = X_OM[i, :]
            NEB_num[i, col] = np.sum(np.minimum(NEB[X_tem], temp1 <= D1))
            NPB_num[i, col] = np.sum(np.minimum(temp3[X_tem], temp1 >= D2))
            Lower_num[i, col] = np.sum(np.minimum(temp2[X_tem], temp1 >= D3))

    MNOF = np.mean(((NEB_num + Lower_num + NPB_num) / n_X) * weight, axis=1)
    return MNOF

def WNCROD(training_data_pkl, trandata):
# if __name__ == "__main__":
#     # load_data = loadmat('/home/manhirt/Git/3WNCROD/Datasets/annthyroid.mat')
#     # trandata = load_data['trandata']
#     # scaler = MinMaxScaler()
#     # trandata[:, 1:] = scaler.fit_transform(trandata[:, 1:])
#     # trandata[:, 1:] = scaler.fit_transform(trandata[:, 1:])
#     opt = BaseOptions().parse()

#     if not os.path.exists(opt.output_directory):
#         os.makedirs(opt.output_directory)

#     aucroc_name = opt.output_directory + "/seed_"  + str(opt.dataset_seed) + "_AUCROC.csv"
#     aucpr_name = opt.output_directory + "/seed_" + str(opt.dataset_seed) + "_AUCPR.csv"
#     f1_name = opt.output_directory + "/seed_" + str(opt.dataset_seed) + "_AUCF1.csv"
#     # train_name = opt.output_directory + str(opt.dataset_seed) + "results" + "_TrainTime.csv"
#     # inference_name = opt.output_directory + str(opt.dataset_seed) + "results" + "_InferenceTime.csv"
    
#     try:
#         df_AUCROC = pd.read_csv(aucroc_name, index_col = 0) 
#     except:
#         df_AUCROC = pd.DataFrame(data=None)
#     try:
#         df_AUCPR = pd.read_csv(aucpr_name, index_col = 0)
#     except:
#         df_AUCPR = pd.DataFrame(data=None)
#     try:
#         df_F1 = pd.read_csv(f1_name, index_col = 0)
#     except:
#         df_F1 = pd.DataFrame(data=None)
#     # try:
#     #     df_train = pd.read_csv(train_name, index_col = 0)
#     # except:
#     #     df_train = pd.DataFrame(data=None)
#     # try:
#     #     df_inference = pd.read_csv(inference_name, index_col = 0)
#     # except:
#     #     df_inference = pd.DataFrame(data=None)


    name = "WNCROD"
    # data_paths = get_data_paths(opt.data_directory, opt.dataset_seed)

    # # start = 0

    # for path in data_paths:
    #     print(path)
    #     # if start == 1 or path == '/home/manhirt/Git/SSSD/data/adbench_seeds_sc_val/17_InternetAds/seed_0.pkl':
    #     #     start = 1


        # utils = Utils() 

        # with (open(f"{path}", "rb")) as data_file:
        #     data_pkl_all = pickle.load(data_file)

        # training_data_pkl = data_pkl_all["X_train"]
        # trandata = data_pkl_all["X_test"]
        # print(trandata.shape)

        # # print(trandata[10])
        # # print(trandata[:-1, 1:].shape)


        # X_tem = [0, 8, 10]
    X_tem = np.arange(0, trandata.shape[0])
    lammda = 1
    # out_scores = WNCROD(trandata[:-1, 1:], X_tem, lammda)
    out_scores = WNCROD_calc(trandata[:, 1:], X_tem, lammda)
    print(type(out_scores))
    print(out_scores.shape)
    print(out_scores)

    score = out_scores
    
    # indices = np.arange(len(data_pkl_all['y_test']))
    # p = low_density_anomalies(-score, len(indices[data_pkl_all['y_test']==1]))
    # f1_score = skm.f1_score(data_pkl_all['y_test'], p)
    # print('F1 score: ' + str(f1_score))


    # inds = np.where(np.isnan(score))
    # score[inds] = 0
    
    # result = utils.metric(y_true=data_pkl_all['y_test'], y_score=score)
    # print('AUCROC: ' + str(result['aucroc']))
    # print('AUCPR: ' + str(result['aucpr']))


    # path = path.rstrip('/')
    # parts = path.split(os.sep)

    # # Get the last folder name
    # dataset = os.path.basename(parts[-2])


    # df_F1.loc[dataset, name] = f1_score
    # df_AUCROC.loc[dataset, name] = result['aucroc']
    # df_AUCPR.loc[dataset, name] = result['aucpr']
    
    # # df_train.loc[dataset, name] = time_fit
    # # df_inference.loc[dataset, name] = time_inference
    
    # df_F1.to_csv(f1_name)
    # df_AUCROC.to_csv(aucroc_name)
    # df_AUCPR.to_csv(aucpr_name)
    return score
