import numpy as np
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
from utils import read_data, check_data, adjust_dataset_size, split_labels, ColInfo, contruct_col_info
import category_encoders


def data_preprocess_global(dst, selected_labels, y_name):
    if selected_labels is not None:
        dst = dst[(dst[y_name] == selected_labels[0]) | (dst[y_name] == selected_labels[1])]
    dst.dropna(inplace=True)

    feature_enc = category_encoders.OrdinalEncoder(cols=list(set(dst.columns.tolist())-set([y_name])))
    label_enc = category_encoders.OrdinalEncoder(cols=[y_name],
                                     mapping=[{'col': y_name,'mapping': {selected_labels[0]: 0, selected_labels[1]: 1}}])
    dst = feature_enc.fit_transform(dst)
    dst = label_enc.fit_transform(dst)

    check_data(dst)

    return dst


def get_mushroom_data(file_path):
    target_col_name = 'class'
    selected_labels = ['e', 'p']

    dst = read_data(file_path)

    dst = data_preprocess_global(dst, selected_labels, y_name=target_col_name)

    dst = adjust_dataset_size(dst, action_type=1, y_name=target_col_name, sample_rate=1)

    dst_x, dst_y = split_labels(dst, y_name=target_col_name)

    col_info = contruct_col_info(list(dst_x.columns), [], target_col_name, dst_x)

    return dst_x, dst_y, col_info