import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler

import argparse

import os

file = 'wine_pca4'

def process_csv(csv_path, output_dir, target_name):
    df = pd.read_csv(csv_path)

    y = df[target_name]
    X = df.drop(columns=[target_name])

    assert df.isnull().values.any() == False, 'Null values encountered'

    X = X.to_numpy()
    y = y.to_numpy()

    min_max_scaler = MinMaxScaler()
    X = min_max_scaler.fit_transform(X)
    y = min_max_scaler.fit_transform(y[:, np.newaxis])
    y = y[:,0].astype(np.int32)

    assert np.unique(y).shape[0] == 2, 'More than 2 values encountered for y'

    os.makedirs(output_dir, exist_ok=True)

    np.save(f'{output_dir}/X_data', X)
    np.save(f'{output_dir}/y_data', y)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Converts a csv into numpy and normalizes it')
    parser.add_argument('-c', '--csv_path', default='./csv/wine_pca.csv')
    parser.add_argument('-o', '--output_dir', default='./datasets/wine_pca')
    parser.add_argument('-t', '--target_name', default='class')

    args = parser.parse_args()

    process_csv(
        csv_path=args.csv_path,
        output_dir=args.output_dir,
        target_name=args.target_name,
    )