{
    "id": 0,
    "domain": "sociology",
    "workflow_tags": "regression, feature engineering, data cleaning",
    "domain_knowledge": "This is a longitudinal survey data that does not have a perfect response rate. Addressing & accounting for missing information in longitudinal survey data is critical to ensuring the validity and reliability of the analysis outcomes. Spending and saving behavior can be used as proxies for Time preference of an individual. Incarceration status can also be gauged by studying the place of residence. If it is ‘Jail’, or other facilities of detention, incarceration can be assumed. Test scores and class standings are some of the ways to gauge the academic performance of an individual.",
    "datasets": [
        {
            "name": "nls_raw.csv",
            "description": "The dataset contains information from National Longitudinal Survey of Youth (NLSY79). It includes information about the Demographics, Family Background, Education, Health, Residential, Financial & Criminal Records of the participants.",
            "max_depth": 0,
            "columns": {
                "raw": [
                    {
                        "name": "ID# (range 1-12686) 1979",
                        "description": "Unique Identifier of the respondent",
                        "depth": 0
                    },
                    {
                        "name": "Sample ID, 1979 (interview)",
                        "description": "Sample Identification Code",
                        "depth": 0
                    },
                    {
                        "name": "Age of respondent, 1979",
                        "description": "Age of respondent in 1979",
                        "depth": 0
                    },
                    {
                        "name": "Age of respondent at interview date, 1981",
                        "description": "Age of respondent in 1981",
                        "depth": 0
                    },
                    {
                        "name": "Age of respondent at interview date, 1989",
                        "description": "Age of respondent in 1989",
                        "depth": 0
                    },
                    {
                        "name": "Occupation of adult male in household at age 14, 1979",
                        "description": "Occupation of the adult male present in the household of the respondent at age 14 in 1979. Variable records the occupation of the father figure of the repondent, values include FARMER AND FARM MANAGERS, PROFESSIONAL,TECHNICAL AND KINDRED etc",
                        "depth": 0
                    },
                    {
                        "name": "Highest grade completed by respondent's mother, 1979",
                        "description": "Highest grade or year of regular school that respondent's mother ever completed till 1979",
                        "depth": 0
                    },
                    {
                        "name": "Highest grade completed by respondent's father, 1979",
                        "description": "Highest grade or year of regular school that respondent's father ever completed till 1979",
                        "depth": 0
                    },
                    {
                        "name": "Highest grade completed, 1979",
                        "description": "Highest grade or year of regular school that respondent have completed and got credit for till 1979",
                        "depth": 0
                    },
                    {
                        "name": "Racial/ethnic cohort, 1979",
                        "description": "Respondent's racial/ethnic cohort, contains one of three values 1:BLACK, 2:HISPANIC, 3:NON-BLACK NON-HISPANIC",
                        "depth": 0
                    },
                    {
                        "name": "Sex of respondent, 1979",
                        "description": "Sex of the respondent, 1:MALE or 2:FEMALE",
                        "depth": 0
                    },
                    {
                        "name": "Family size, 1979",
                        "description": "Family size of the respondent in 1979",
                        "depth": 0
                    },
                    {
                        "name": "Ever convicted of an illegal act in adult court before 1980",
                        "description": "Boolean variable that indicates if the respondent was convicted of an illegal act in adult court other than minor traffic violations before 1980",
                        "depth": 0
                    },
                    {
                        "name": "Ever been sentenced in any correctional institution before 1980",
                        "description": "Boolean variable that indicated if the respondent was sentenced to spend time in a corrections institute, like a jail, prison, or a youth institution like a training school or reform school or not before 1980",
                        "depth": 0
                    },
                    {
                        "name": "Height of respondent, 1981",
                        "description": "Height of the respondent in inches in 1981",
                        "depth": 0
                    },
                    {
                        "name": "Height of respondent, 1985",
                        "description": "Height of the respondent in inches in 1985",
                        "depth": 0
                    },
                    {
                        "name": "Weight of respondent, 1981",
                        "description": "Weight of the respondent in kilograms in 1981",
                        "depth": 0
                    },
                    {
                        "name": "Weight of respondent, 1989",
                        "description": "Weight of the respondent in kilograms in 1989",
                        "depth": 0
                    },
                    {
                        "name": "Weight of respondent, 1992",
                        "description": "Weight of the respondent in kilograms in 1992",
                        "depth": 0
                    },
                    {
                        "name": "Rank in class last year attended at this school, 1981",
                        "description": "Respondent's rank in the class that he attended in school last year (in 1980) (variable recorded in 1981)",
                        "depth": 0
                    },
                    {
                        "name": "Number of students in class last year attended at this school, 1981",
                        "description": "Number of students in the respondent's class for the last year attended this school",
                        "depth": 0
                    },
                    {
                        "name": "ASVAB - Arithmetic Reasoning Z Score (rounded), 1981",
                        "description": "This variable represents the standardized scores of respondents on the Arithmetic Reasoning section of the ASVAB test. It provides a way to compare individuals' performance on this specific aspect of the test within a standardized framework.",
                        "depth": 0
                    },
                    {
                        "name": "ASVAB - Word Knowledge Z Score (rounded), 1981",
                        "description": "This variable represents the standardized scores of respondents on the Word Knowledge section of the ASVAB test, allowing for comparison of individuals' performance on this specific aspect of the test within a standardized framework.",
                        "depth": 0
                    },
                    {
                        "name": "ASVAB - Paragraph Comprehension Z Score (rounded), 1981",
                        "description": "This variable represents the standardized scores of respondents on the Paragraph Comprehension section of the ASVAB test, allowing for comparison of individuals' performance on this specific aspect of the test within a standardized framework.",
                        "depth": 0
                    },
                    {
                        "name": "ASVAB - Mathematics Knowledge Z Score (rounded), 1981",
                        "description": "This variable represents the standardized scores of respondents on the Mathematics Knowledge section of the ASVAB test, facilitating comparison of individuals' performance on this specific aspect of the test within a standardized framework.",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1981",
                        "description": "Type of residence respondent is living in the 1981, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1982",
                        "description": "Type of residence respondent is living in the 1982, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1983",
                        "description": "Type of residence respondent is living in the 1983, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1984",
                        "description": "Type of residence respondent is living in the 1984, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1985",
                        "description": "Type of residence respondent is living in the 1985, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1986",
                        "description": "Type of residence respondent is living in the 1986, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1987",
                        "description": "Type of residence respondent is living in the 1987, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1988",
                        "description": "Type of residence respondent is living in the 1988, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1989",
                        "description": "Type of residence respondent is living in the 1989, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1990",
                        "description": "Type of residence respondent is living in the 1990, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1991",
                        "description": "Type of residence respondent is living in the 1991, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1992",
                        "description": "Type of residence respondent is living in the 1992, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1993",
                        "description": "Type of residence respondent is living in the 1993, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1994",
                        "description": "Type of residence respondent is living in the 1994, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Type of residence respondent is living in, 1996",
                        "description": "Type of residence respondent is living in the 1996, contains one of these values 1:ABOARD SHIP, BARRACKS,    2:BACHELOR, OFFICER QUARTERS,    3:DORM, FRATERNITY, SORORITY,    4:HOSPITAL,    5:JAIL,    6:OTHER TEMPORARY QUARTERS,    11:OWN DWELLING UNIT,    12:ON-BASE MIL FAM HOUSING,    13:OFF-BASE MIL FAM HOUSING,    14:ORPHANAGE,    15:RELIGIOUS INSTITUTION,    16:OTHER INDIVIDUAL QUARTERS,    17:PARENTAL,    18:HHI CONDUCTED WITH PARENT,    19:R IN PARENTAL HOUSEHOLD",
                        "depth": 0
                    },
                    {
                        "name": "Family net wealth, 1985",
                        "description": "Total Net Wealth for Family. Created by summing all asset values and subtracting all debts for the year 1985",
                        "depth": 0
                    },
                    {
                        "name": "Family net wealth, 1990",
                        "description": "Total Net Wealth for Family. Created by summing all asset values and subtracting all debts for the year 1990",
                        "depth": 0
                    },
                    {
                        "name": "Family net wealth, 1996 (key data point)",
                        "description": "Total Net Wealth for Family. Created by summing all asset values and subtracting all debts for the year 1996",
                        "depth": 0
                    },
                    {
                        "name": "Market value of residential property respondent/spouse own, 1985",
                        "description": "Market value of residential property that respondent/spouse owned in 1985",
                        "depth": 0
                    },
                    {
                        "name": "Market value of residential property respondent/spouse own, 1990",
                        "description": "Market value of residential property that respondent/spouse owned in 1990",
                        "depth": 0
                    },
                    {
                        "name": "Market value of residential property respondent/spouse own, 1996",
                        "description": "Market value of residential property that respondent/spouse owned in 1996",
                        "depth": 0
                    },
                    {
                        "name": "Total market value of farm, business, and other property, 1985",
                        "description": "Total market value of all of the real estate, assets in the business(es), farm operation(s) in 1985",
                        "depth": 0
                    },
                    {
                        "name": "Total market value of farm, business, and other property, 1990",
                        "description": "Total market value of all of the real estate, assets in the business(es), farm operation(s) in 1990",
                        "depth": 0
                    },
                    {
                        "name": "Total market value of farm, business, and other property, 1996",
                        "description": "Total market value of all of the real estate, assets in the business(es), farm operation(s) in 1996",
                        "depth": 0
                    },
                    {
                        "name": "Market Value of vehicles respondent/spouse own, 1985",
                        "description": "Total market value of all vehicles including automobiles that respondent/spouse owned in 1985",
                        "depth": 0
                    },
                    {
                        "name": "Market Value of vehicles respondent/spouse own, 1990",
                        "description": "Total market value of all vehicles including automobiles that respondent/spouse owned in 1990",
                        "depth": 0
                    },
                    {
                        "name": "Market Value of vehicles respondent/spouse own, 96",
                        "description": "Total market value of all vehicles including automobiles that respondent/spouse owned in 1996",
                        "depth": 0
                    },
                    {
                        "name": "Total market value of items over $500, 1985",
                        "description": "Total market value of all the other assets of the respondent that were worth more than $500 in 1985",
                        "depth": 0
                    },
                    {
                        "name": "Total market value of items over $500, 1990",
                        "description": "Total market value of all the other assets of the respondent that were worth more than $500 in 1990",
                        "depth": 0
                    },
                    {
                        "name": "Total market value of items over $500, 1996",
                        "description": "Total market value of all the other assets of the respondent that were worth more than $500 in 1996",
                        "depth": 0
                    },
                    {
                        "name": "Total net family income, previous calendar year, 1979",
                        "description": "Total net family income for the previous calendar year (1978) (recorded in 1979)",
                        "depth": 0
                    },
                    {
                        "name": "Total net family income, previous calendar year, 1985",
                        "description": "Total net family income for the previous calendar year (1984) (recorded in 1985)",
                        "depth": 0
                    },
                    {
                        "name": "Total net family income, previous calendar year, 1989",
                        "description": "Total net family income for the previous calendar year (1989) (recorded in 1989)",
                        "depth": 0
                    },
                    {
                        "name": "Was more money put into or taken out of R/spouse savings since last interview, 1989",
                        "description": "Categorical variable indicating if was more money was put into or taken out of respondent/spouse savings since last interview in 1989.\nIt contains four values 1:PUT MORE MONEY IN, 2:TOOK MORE MONEY OUT, 3:NO CHANGE, 4:NO SAVINGS",
                        "depth": 0
                    },
                    {
                        "name": "Net amount respondent/spouse put into savings since last interview, 1989",
                        "description": "Net amount of money that respondent/spouse put into their savings since last interview in 1989",
                        "depth": 0
                    },
                    {
                        "name": "Net amount respondent/spouse took out of savings since last interview, 1989",
                        "description": "Net amount of money that respondent/spouse took out of savings since last interview in 1989",
                        "depth": 0
                    }
                ]
            }
        }
    ],
    "intermediate": [],
    "hypotheses": {
        "main": [
            {
                "depth": 0,
                "target": "",
                "expr": "",
                "expr_ind": "",
                "text": "Higher time preference associated with higher BMI for 1989 data."
            }
        ],
        "intermediate": []
    },
    "workflow": "1. Selected appropriate variables from the raw data\ne.g.: Height: Height of respondent on 1985 instead of 1981.\n      Income: Total net family income, 1989 (There are many other income variables in the raw data)\n      Age:    Age of respondent at 1989 (Derived from Age at 1979).\nData Transformation:\n2. Replaced -1 to -5 values (unavailable data) with NaN\n3. Imputed the missing values in the AGE and INCOME variable with mean.\n4. AGE_1989 had missing values, hence derived the variable as [AGE_1979 + 10]\n5. Created a BMI variable using: bmi = (weight) * 0.453592 / (height) * 0.0254\n6. Divided the Family income variable by 1000$ (Mentioned in the paper)\n7. Created an AGE^2 variable (From the paper)\n8. One-hot encoded RACE variable into BLACK and HISPANIC\n9. One-hot encooded GENDER variable into MALE and FEMALE\n10. Selected 'Was more money put into or taken out of R/spouse savings since last interview, 1989' as the Time Preference variable.\n  DISSAVED = 1 if 'TOOK MORE MONEY OUT' else 0\n  SAMESAVE = 1 if 'NO SAVINGS' or 'NO CHANGE' else 0\n11. Dropped the unimportant columns for replication\n12. DISSAVED and SAMESAVE as independent variables and BMI as dependent variable\n13. Fit an OLS Regression Model",
    "queries": [
        [
            {
                "qid": 0,
                "true_hypothesis": "Higher time preference associated with higher BMI for 1989 data.",
                "question_type": "relationship",
                "question": "Does increased time preference leads to higher BMI?"
            }
        ]
    ]
}