from .base import XLSData


class LawSchool(XLSData):
    name = "lawschool"
    feat_dim = 22
    sens_dim = 6

    # Some values are nans, which will be deleted later. If we would take Gender as categorical, then these nans are
    # very tricky to avoid. This is why we don't make Gender a categorical column, i.e. it is just column [0].
    simple_sens_cols = [0, 1]

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def setup(self, stage: str):
        super().read('lawschool.xls',
                     sens_columns=["Gender", "Race"],
                     label_column=["admit"],
                     drop_columns=["enroll", "Year", "MissingRace", 'Black', 'Hispanic', 'Asian', 'White', 'URM'],
                     categorical_values=["Gender", "Race", "college"],
                     mapping={},
                     mapping_label={},
                     normalise_columns=["LSAT", "GPA"],
                     drop_rows={"MissingRace": [1]})
