class DatasetConfig:
    def __init__(self):
        self.all_splits = {"all": {"all": lambda df: df}}
        self.individual_identifiers = []
        self.individual_metadata_identifiers = []
        self.role_identifiers = []
        self.role_metadata_identifiers = []
        self.application_identifier = lambda row: 1
        self.outcome_identifier = lambda row: -1


class JLEConfig:
    def __init__(self):
        all_split = {"all": lambda df: df}
        age_split = {
            "young": lambda df: df[df.age < 50],
            "old": lambda df: df[df.age > 50],
        }
        employment_split = {
            "employed": lambda df: df[df.employmentstatus == "Employed"],
            "unemployed": lambda df: df[df.employmentstatus == "Unemployed"],
        }
        gender_split = {
            "female": lambda df: df[df.gender == "Female"],
            "male": lambda df: df[df.gender == "Male"],
        }
        skill_split = {
            "low": lambda df: df[df.skill == "Low"],
            "high": lambda df: df[df.skill == "High"],
        }
        self.all_splits = {
            "all": all_split,
            "age": age_split,
            "employment": employment_split,
            "skill": skill_split,
            "gender": gender_split,
        }
        self.individual_identifiers = [
            "firstname",
            "lastname",
            "state",
            "tripletnumber",
            "employmentstatus",
            "skill",
            "age",
        ]
        self.individual_metadata_identifiers = [
            "age",
            "employmentstatus",
            "skill",
            "gender",
        ]
        # Note: Grouping by clid will be precise but yield exactly 4 appl. per role
        # We should also consider expanding (using 'company', 'companydescription')
        # But do so correctly so as to not throw away 1/3 of the data (where 'company') is missing
        self.role_identifiers = ["clid"]
        self.role_metadata_identifiers = ["companydescription", "state"]
        self.application_identifier = lambda row: 1
        self.outcome_identifier = lambda row: 1 if row["callback"] == "yes" else -1


class JPEConfig:
    def __init__(self):
        all_split = {"all": lambda df: df}
        age_split = {
            "young": lambda df: df[df.age < 35],
            "middle": lambda df: df[35 < df.age][df.age < 55],
            "old": lambda df: df[df.age > 50],
        }
        # employment_split = {'employed' : lambda df: df[df.employmentstatus == 'Employed'], 'unemployed' : lambda df: df[df.employmentstatus == 'Unemployed']}
        gender_split = {
            "female": lambda df: df[df.gender == "Female"],
            "male": lambda df: df[df.gender == "Male"],
        }
        occupation_split = {
            "admin": lambda df: df[df.occupation == "admin"],
            "janitor": lambda df: df[df.occupation == "janitor"],
            "sales": lambda df: df[df.occupation == "sales"],
            "security": lambda df: df[df.occupation == "security"],
        }
        skill_split = {
            "low": lambda df: df[df.skill == "Low"],
            "high": lambda df: df[df.skill == "High"],
        }
        self.all_splits = {
            "all": all_split,
            "age": age_split,
            "skill": skill_split,
            "gender": gender_split,
            "occupation": occupation_split,
        }
        self.individual_identifiers = [
            "firstname",
            "lastname",
            "city",
            "occupation",
            "tripletfolder",
        ]
        self.individual_metadata_identifiers = []
        self.role_identifiers = ["jobid"]
        self.role_metadata_identifiers = []
        self.application_identifier = lambda row: 1
        self.outcome_identifier = lambda row: 1 if row["callback"] == 1.0 else -1


class LabourEconomicsConfig:
    def __init__(self):
        self.all_splits = {"all": {"all": lambda df: df}}
        # self.individual_identifiers = ['occupation', 'tripletnumber', 'firstname', 'lastname', 'majoresume', 'indigeneoustypeinset', 'tribalgroup']
        # resumename + missingskill + indigenoustype + tribalgroup + state
        self.individual_identifiers = [
            "resumename",
            "missingskill",
            "indigenoustypeinset",
            "tribalgroup",
            "state",
            "tripletnumber",
        ]
        self.individual_metadata_identifiers = []
        self.role_identifiers = ["clid"]
        self.role_metadata_identifiers = []
        self.application_identifier = lambda row: 1
        self.outcome_identifier = lambda row: 1 if row["callback"] == 1.0 else -1
