import sys
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn import decomposition

pd.set_option("future.no_silent_downcasting", True)
# ## Data generation functions
def make_bank(n_samples=12):
    n_classes = 2
    n = int(n_samples / n_classes)
    
    # Load data
    try: data = pd.read_csv("data/bank.csv", sep=";")
    except FileNotFoundError: print("To use use the bank dataset, please retrieve it from https://doi.org/10.24432/C5K306 and place bank.csv in data/."); sys.exit(1)
    k = data.keys()

    # clean data
    for column_index in range(len(data.columns)):
        unique_entries = data[k[column_index]].unique()
        # print("unique_entries", unique_entries)
        for entry_index in range(len(unique_entries)):
            unique_entry = unique_entries[entry_index]
            if str(unique_entry).lstrip('-').isnumeric() == False:
                data[k[column_index]] = data[k[column_index]].replace(to_replace=unique_entry, value=float(entry_index))

    # define data
    data = data.astype(float)

    # Normalize data
    scaler = MinMaxScaler()
    data = pd.DataFrame(data=scaler.fit_transform(data), columns=data.columns)

    X = data.iloc[:, 0:len(data.columns)-1].to_numpy()
    Y = data.iloc[:, len(data.columns)-1].to_numpy()
    # y = (y*2) -1 # [0,1] -> [0,2] -> [-1,1]

    # Balance data
    X0 = np.array([x for x, y in zip(X, Y) if not y])
    X1 = np.array([x for x, y in zip(X, Y) if y])
    X = np.vstack([
        X0[np.random.choice(np.arange(len(X0)), n)],
        X1[np.random.choice(np.arange(len(X1)), n)]
    ])
    y = np.hstack((np.zeros(n), np.ones(n)))
    idx = np.random.choice(np.arange(n_samples), n_samples)
    return X[idx], y[idx]
    
