import pandas as pd
import numpy as np
import pdb 

compas_data = pd.read_csv('path/propublica_data_for_fairml.csv')

# Data Prep
input_feature = compas_data.iloc[:, [1,2,3,4,5,6,7,8,9,10,11]].values
target_feature = compas_data.iloc[:, 0].values
sensitive_feature = compas_data.iloc[:, 5].values

num_samples = input_feature.shape[0]
np.random.seed(0)
idx = np.random.choice(num_samples, num_samples, replace=False)

train_ids = idx[: int(num_samples * 0.8)]
test_ids = idx[int(num_samples * 0.8): ]

train_input_feature = input_feature[train_ids]
train_target_feature = target_feature[train_ids]
train_sensitive_feature = sensitive_feature[train_ids].squeeze()

test_input_feature = input_feature[test_ids]
test_target_feature = target_feature[test_ids]
test_sensitive_feature = sensitive_feature[test_ids].squeeze()

from sklearn.preprocessing import StandardScaler  
scaler = StandardScaler()  

# Fitting only on training data
scaler.fit(train_input_feature)  
train_data = scaler.transform(train_input_feature)  

# Applying same transformation to test data
test_data = scaler.transform(test_input_feature) 

np.save('save_path/train_data.npy', train_input_feature)
np.save('save_path/train_label.npy', train_target_feature)
np.save('save_path/train_sensitive.npy', train_sensitive_feature)

np.save('save_path/test_data.npy', test_input_feature)
np.save('save_path/test_label.npy', test_target_feature)
np.save('save_path/test_sensitive.npy', test_sensitive_feature)
