import numpy as np
from sklearn.cluster import KMeans
import openml
from scipy.stats import pearsonr
import pandas as pd
import cmath
from sklearn import preprocessing
import time

np.random.seed(0)


for id in [40670]:


    dataset = openml.datasets.get_dataset(id)
    df_X, y, categorical_indicator, attribute_names = dataset.get_data(
        dataset_format="dataframe", target=dataset.default_target_attribute
    )
    num_qujian = 10
    
    for index,i in enumerate(categorical_indicator):
        if i:
            fea_mapping = {fea:idx for idx,fea in enumerate(set(df_X.iloc[:,index].values.dropna()))}
            df_X.iloc[:,index] = df_X.iloc[:,index].map(fea_mapping)
    from sklearn.preprocessing import LabelEncoder
    df_y = np.array(LabelEncoder().fit_transform(y))
    df_X = np.array(df_X)
    
    
    for i in range(len(df_X)):
        for j in range(len(df_X[0])):
            if df_X[i][j] != df_X[i][j] or df_X[i][j] == None:
                df_X[i][j] = 0
    x = df_X
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)
    df_X = x_scaled

    columns = []
    for i in range(0, df_X.shape[1]):
        columns.append(i)

    x = pd.DataFrame(df_X.tolist(), columns=columns)
    y = pd.DataFrame(df_y.tolist(), columns=[0])
    sum = []
    pear = []

    start_time = time.time()
    for i in range(0, df_X.shape[1]):
        res = 0
        num = 0
        meihang = []
        for j in range(0, df_X.shape[1]):
            tmp =  pearsonr(x[i],x[j])
            if tmp[0] != tmp[0]:
                meihang.append(0)
                continue
            meihang.append(abs(tmp[0]))
            res = res + abs(tmp[0])
            num = num + 1
        sum.append(res)
        pear.append(meihang)
    end_time = time.time()
    run_time = end_time - start_time

    pear = np.argsort(sum)
    print(pear)

    len_qujian = []
    tmp = (int)(len(pear) / num_qujian)
    res = len(pear) % num_qujian
    for pos_qujian in range(0, num_qujian):
        num_1 = tmp
        if res:
            num_1 = num_1 + 1
            res = res - 1
        len_qujian.append(num_1)
        
    print(len_qujian)
    pear_huafen = []

    pos = 0
    for i in range(len(len_qujian)):
        qujian = []
        for j in range(pos, pos + len_qujian[i]):
            qujian.append(pear[j])
        pear_huafen.append(qujian)
        pos = pos + len_qujian[i]
        
    np.save('./{}/pearson.npy'.format(id), pear_huafen)
