import numpy as np
from ucimlrepo import fetch_ucirepo 

def get_communities_and_crimes():
    instance = {}
    # fetch dataset 
    dataset = fetch_ucirepo(id=183) 

    dataset.data.features = dataset.data.features.replace('?', np.nan)
    # Drop columns with more than 10% missing values
    dataset.data.features.dropna(thresh=int(dataset.data.features.shape[0]), axis=1, inplace=True)
    dataset.data.features['sensitive'] = np.array((dataset.data.features['racepctblack'] > .2).astype(int))
    

    instance['data'] = dataset.data
    # Drop rows with missing 
    dataset.data.features.dropna(inplace=True)

    instance['y'] = np.array(dataset.data.targets)
    instance['group'] = dataset.data.features['sensitive'].values

    cols_to_drop = ['state', 'communityname', 'fold']
    X = dataset.data.features.drop(cols_to_drop, axis=1)
    # Replace '?' with NaN
    #X = np.where(X == '?', np.nan, X)
    # Convert to float
    #X = X.astype(float)
    instance['X'] = X.values
    instance['n'] = X.shape[0]
    instance['colnames'] = X.columns
    return instance


