annealing_bng:
  path: /playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0044_BNG(anneal.ORIG,nominal,1000000).csv
  dataset_description: "BNG Annealing: Dataset about steel annealing processes with 39 attributes and 1,000,000 rows. Predicts the type of steel based on process parameters."
  feature_descriptions:
    family: "Type of annealing process family (e.g., B, G, etc.)"
    product-type: "Type of product being annealed"
    steel: "Steel type code"
    carbon: "Carbon content"
    hardness: "Hardness rating"
    temper_rolling: "Whether temper rolling was applied"
    condition: "Initial condition of the steel"
    formability: "Formability rating (categorical)"
    strength: "Strength category"
    non-ageing: "Whether steel is non-ageing"
    surface-finish: "Surface finish type"
    surface-quality: "Surface quality grade"
    enamelability: "Enamelability rating"
    bc: "Presence of BC property"
    bf: "Presence of BF property"
    bt: "Presence of BT property"
    bw: "Presence of BW property"
    bl: "Presence of BL property"
    m: "Material indicator"
    chrom: "Chromium treatment presence"
    phos: "Phosphate coating presence"
    cbond: "C-Bond strength"
    marvi: "MARVI score"
    exptl: "Experimental flag"
    ferro: "Ferro test indicator"
    corr: "Corrosion resistance"
    blue: "Blueing indicator"
    lustre: "Lustre indicator"
    jurofm: "Jurofm indicator"
    s: "S indicator"
    p: "P indicator"
    shape: "Shape code"
    thick: "Thickness"
    width: "Width"
    len: "Length"
    oil: "Oil indicator"
    bore: "Bore indicator"

labor_bng:
  path: /playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0046_BNG(labor,nominal,1000000).csv
  dataset_description: "BNG Labor: Dataset representing employment contracts and collective bargaining agreements. 1,000,000 rows and 17 attributes. Predicts acceptance or rejection of contract."
  feature_descriptions:
    wage-increase-first-year: "Wage increase percentage for the first year"
    wage-increase-second-year: "Wage increase percentage for the second year"
    wage-increase-third-year: "Wage increase percentage for the third year"
    cost-of-living-adjustment: "Cost of living adjustment clause"
    working-hours: "Number of working hours per week"
    pension: "Pension contribution and structure"
    holiday: "Number of paid holidays"
    vacation: "Vacation days offered"
    longterm-disability-assistance: "Availability of long-term disability assistance"
    contribution-to-dental-plan: "Employer contribution to dental plan"
    bereavement-assistance: "Bereavement leave assistance"
    contribution-to-health-plan: "Employer contribution to health plan"
    educational-assistance: "Educational assistance provided"
    transportation-assistance: "Transport benefits"
    contribution-to-retirement-plan: "Retirement plan contributions"
    job-security: "Job security clause level"
    contract-acceptance: "Whether contract was accepted (target)"

letter_bng:
  dataset_description: "BNG Letter Recognition: Optical character recognition dataset with 1,000,000 rows and 16 features extracted from pixel values. Predicts the letter A-Z."
  feature_descriptions:
    x-box: "Horizontal position of box"
    y-box: "Vertical position of box"
    width: "Width of box"
    high: "Height of box"
    onpix: "Number of pixels in box"
    x-bar: "Mean horizontal position of pixels"
    y-bar: "Mean vertical position of pixels"
    x2bar: "Mean x squared"
    y2bar: "Mean y squared"
    xybar: "Mean x * y"
    x2ybr: "Mean x squared * y"
    xy2br: "Mean x * y squared"
    x-ege: "Mean edge count along x axis"
    xegvy: "Edge variance along x"
    y-ege: "Mean edge count along y axis"
    yegvx: "Edge variance along y"

autos_bng:
  dataset_description: "BNG Autos: Automobile dataset with 1,000,000 rows and 26 features about car specifications. Predicts car price."
  feature_descriptions:
    symboling: "Insurance risk rating"
    normalized-losses: "Normalized loss value"
    make: "Manufacturer name"
    fuel-type: "Fuel type: gas or diesel"
    aspiration: "Engine aspiration type: std or turbo"
    num-of-doors: "Number of doors"
    body-style: "Body style (e.g., sedan, hatchback)"
    drive-wheels: "Drive wheels configuration (FWD, RWD, etc.)"
    engine-location: "Engine location (front or rear)"
    wheel-base: "Wheelbase length"
    length: "Overall length"
    width: "Overall width"
    height: "Overall height"
    curb-weight: "Weight of car without passengers"
    engine-type: "Type of engine (e.g., dohc)"
    num-of-cylinders: "Number of cylinders"
    engine-size: "Engine size (cc)"
    fuel-system: "Fuel delivery system"
    bore: "Bore size of cylinder"
    stroke: "Stroke length of piston"
    compression-ratio: "Compression ratio"
    horsepower: "Engine horsepower"
    peak-rpm: "RPM at peak horsepower"
    city-mpg: "Miles per gallon (city)"
    highway-mpg: "Miles per gallon (highway)"
    price: "Price of the vehicle (target)"

lymph_bng:
  dataset_description: "BNG Lymphography: Medical dataset about lymph node diagnoses. 1,000,000 samples and 18 attributes. Classifies type of lymphography outcome."
  feature_descriptions:
    lymphatics: "Condition of lymphatic system"
    block-of-affere: "Blockage in afferent lymph vessels"
    bl-of-lymph-c: "Blockage of lymph capillaries"
    bl-of-lymph-s: "Blockage of lymph sinuses"
    by-pass: "Presence of bypasses"
    extravasates: "Extravasates (leakage) presence"
    regeneration-of: "Signs of regeneration"
    early-uptake-in: "Early uptake of tracer"
    lym.nodes-dimin: "Diminished lymph nodes"
    lym.nodes-enlar: "Enlarged lymph nodes"
    changes-in-node: "Structural changes in lymph nodes"
    defect-in-node: "Defects observed in nodes"
    changes-in-stru: "Changes in structure"
    changes-in-shap: "Changes in shape"
    changes-in-size: "Size-related changes"
    number-of-node: "Number of lymph nodes involved"
    class: "Diagnosis class (normal, metastases, etc.)"
0049_cleveland:
  dataset_description: "Cleveland Heart Disease: Clinical dataset for detecting presence of heart disease. Contains 303 samples and 14 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0049_BNG(lymph,nominal,1000000).csv"
  feature_descriptions:
    age: "Age of patient in years"
    sex: "Gender (1 = male; 0 = female)"
    cp: "Chest pain type (1–4)"
    trestbps: "Resting blood pressure (mm Hg)"
    chol: "Serum cholesterol (mg/dl)"
    fbs: "Fasting blood sugar > 120 mg/dl (1 = true; 0 = false)"
    restecg: "Resting electrocardiographic results (0–2)"
    thalach: "Maximum heart rate achieved"
    exang: "Exercise induced angina (1 = yes; 0 = no)"
    oldpeak: "ST depression induced by exercise relative to rest"
    slope: "Slope of the peak exercise ST segment (1–3)"
    ca: "Number of major vessels colored by fluoroscopy (0–3)"
    thal: "Thalassemia (3 = normal; 6 = fixed defect; 7 = reversible defect)"
    target: "Diagnosis of heart disease (0 = no disease, 1–4 = disease severity)"

0050_BNG_breast-cancer:
  dataset_description: "BNG Breast Cancer (Nominal): Scaled Wisconsin Breast Cancer dataset. 1,000,000 samples, 9 categorical attributes. Classifies tumors as benign or malignant."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0050_BNG(breast-cancer,nominal,1000000).csv"
  feature_descriptions:
    clump_thickness: "Clump thickness"
    uniformity_of_cell_size: "Uniformity of cell size"
    uniformity_of_cell_shape: "Uniformity of cell shape"
    marginal_adhesion: "Marginal adhesion"
    single_epithelial_cell_size: "Single epithelial cell size"
    bare_nuclei: "Bare nuclei"
    bland_chromatin: "Bland chromatin"
    normal_nucleoli: "Normal nucleoli"
    mitoses: "Mitoses"
    class: "Tumor class (benign, malignant)"

0053_BNG_bridges_version1:
  dataset_description: "BNG Bridges v1 (Nominal): Large-scale version of the UCI Bridges dataset. 1,000,000 samples, 8 categorical attributes. Predicts bridge type."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0053_delta_elevators.csv"
  feature_descriptions:
    type: "Type of superstructure"
    span: "Number of spans"
    material: "Primary material"
    use: "Intended use"
    river: "River classification"
    height: "Bridge height category"
    enclosure: "Enclosure type"
    deck_structure: "Deck structure type"
    class: "Bridge type label"

0054_BNG_bridges_version2:
  dataset_description: "BNG Bridges v2 (Nominal): Variant of the UCI Bridges dataset. 1,000,000 samples, 8 categorical attributes. Predicts bridge type."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0054_BNG(bridges_version2,nominal,1000000).csv"
  feature_descriptions:
    type: "Type of superstructure"
    span: "Number of spans"
    material: "Primary material"
    use: "Intended use"
    river: "River classification"
    height: "Bridge height category"
    enclosure: "Enclosure type"
    deck_structure: "Deck structure type"
    class: "Bridge type label"

0056_BNG_cmc:
  dataset_description: "BNG Contraceptive Method Choice: Reprocessed UCI CMC dataset. 55,296 samples, 9 categorical attributes. Predicts contraceptive method used."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0056_BNG(cmc,nominal,55296).csv"
  feature_descriptions:
    wife_age: "Wife’s age in years"
    wife_education: "Wife’s education level"
    husband_education: "Husband’s education level"
    num_children: "Number of children ever born"
    wife_religion: "Wife’s religion"
    wife_working: "Is wife working?"
    husband_occupation: "Husband’s occupation"
    media_exposure: "Media exposure level"
    age_first_child: "Age at first birth"
    contraceptive_method: "Contraceptive method used (no-use, long-term, short-term)"

0057_BNG_mushroom:
  dataset_description: "BNG Mushroom: Large-scale version of the UCI Mushroom dataset. 1,000,000 samples, 22 categorical attributes. Classifies mushrooms as edible or poisonous."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0057_cholesterol.csv"
  feature_descriptions:
    cap_shape: "Cap shape"
    cap_surface: "Cap surface"
    cap_color: "Cap color"
    bruises: "Presence of bruises"
    odor: "Odor type"
    gill_attachment: "Gill attachment"
    gill_spacing: "Gill spacing"
    gill_size: "Gill size"
    gill_color: "Gill color"
    stalk_shape: "Stalk shape"
    stalk_root: "Stalk root"
    stalk_surface_above_ring: "Surface above ring"
    stalk_surface_below_ring: "Surface below ring"
    stalk_color_above_ring: "Color above ring"
    stalk_color_below_ring: "Color below ring"
    veil_type: "Veil type"
    veil_color: "Veil color"
    ring_number: "Ring number"
    ring_type: "Ring type"
    spore_print_color: "Spore print color"
    population: "Population distribution"
    habitat: "Habitat type"
    class: "Mushroom class (edible, poisonous)"

0057_cholesterol:
  dataset_description: "Cholesterol: Clinical measurements of serum cholesterol and related health indicators. ~300 samples, 5 attributes. Regression task predicting cholesterol level."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0057_cholesterol.csv"
  feature_descriptions:
    age: "Age of patient"
    sex: "Gender (M/F)"
    blood_pressure: "Blood pressure reading"
    heart_rate: "Resting heart rate"
    bmi: "Body Mass Index"
    cholesterol: "Serum cholesterol level (mg/dl)"

0058_BNG_colic_ORIG:
  dataset_description: "BNG Horse Colic (Original, Nominal): Reprocessed UCI Horse Colic dataset. 1,000,000 samples, 22 categorical attributes. Predicts presence of colic."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0058_BNG(colic.ORIG,nominal,1000000).csv"
  feature_descriptions:
    surgery: "Was surgery performed?"
    age: "Age category"
    hospital_number: "Hospital case number"
    rectal_temp: "Rectal temperature"
    pulse: "Pulse rate"
    respiratory_rate: "Respiratory rate"
    temperature_of_extremities: "Extremity temperature"
    peripheral_pulse: "Peripheral pulse"
    mucous_membrane: "Mucous membrane color"
    capillary_refill_time: "Capillary refill time"
    pain: "Pain level"
    peristalsis: "Peristalsis rate"
    abdominal_distension: "Abdominal distension"
    nasogastric_tube: "Nasogastric tube presence"
    nasogastric_reflux: "Nasogastric reflux"
    nasogastric_reflux_ph: "Nasogastric reflux pH"
    rectal_exam_feces: "Rectal exam feces"
    abdomen: "Abdominal findings"
    packed_cell_volume: "Packed cell volume"
    total_protein: "Total protein"
    abdom_appearance: "Abdominal appearance"
    outcome: "Colic outcome class"

0059_BNG_colic:
  dataset_description: "BNG Horse Colic (Nominal): Simplified nominal-only UCI Horse Colic. 1,000,000 samples, 17 categorical attributes. Predicts colic outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0059_BNG(colic,nominal,1000000).csv"
  feature_descriptions:
    (same as 0058_BNG_colic_ORIG, excluding numeric fields)

0062_BNG_page-blocks:
  dataset_description: "BNG Page Blocks: Large-scale reprocessing of UCI Page Blocks dataset. 295,245 samples, 10 numeric attributes. Classifies blocks on a page."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0062_BNG(page-blocks,nominal,295245).csv"
  feature_descriptions:
    height: "Block height"
    length: "Block length"
    area: "Block area"
    eccentricity: "Block eccentricity"
    perimeter: "Block perimeter"
    blackpix: "Number of black pixels"
    blackpixvar: "Variance of black pixels"
    whitetotal: "Number of white pixels"
    scattern: "Scatter index"
    mean: "Mean pixel value"
    class: "Block type label"

0063_BNG_credit-g:
  dataset_description: "BNG German Credit (Nominal): Reprocessed UCI German Credit dataset. 1,000,000 samples, 20 categorical attributes. Predicts credit risk."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0063_BNG(credit-g,nominal,1000000).csv"
  feature_descriptions:
    status: "Credit status"
    duration: "Duration in months"
    credit_history: "Credit history"
    purpose: "Purpose of credit"
    amount: "Credit amount"
    savings: "Savings account/bonds"
    employment: "Employment since"
    installment_rate: "Installment rate"
    personal_status: "Personal status and sex"
    other_debtors: "Other debtors/guarantors"
    residence_since: "Residential since"
    property: "Property type"
    age: "Age in years"
    other_installment: "Other installment plans"
    housing: "Housing type"
    existing_credits: "Number of existing credits"
    job: "Job category"
    dependents: "Number of dependents"
    telephone: "Telephone ownership"
    foreign_worker: "Foreign worker status"
    class: "Credit risk (good, bad)"

0065_BNG_cylinder-bands:
  dataset_description: "BNG Cylinder Bands: Reprocessed UCI Cylinder Bands dataset. 100,000 samples, 10 numeric attributes. Regression task predicting band properties."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0065_BNG(cylinder-bands,nominal,1000000).csv"
  feature_descriptions:
    pressure: "Pressure value"
    temperature: "Temperature reading"
    volume: "Volume measurement"
    density: "Density"
    viscosity: "Viscosity"
    conductivity: "Conductivity"
    elasticity: "Elasticity"
    hardness: "Hardness"
    flow_rate: "Flow rate"
    time: "Time stamp"
    class: "Band property category"

0066_BNG_dermatology:
  dataset_description: "BNG Dermatology: Reprocessed UCI Dermatology dataset. 1,000,000 samples, 34 numeric/categorical attributes. Classifies dermatological diseases."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0066_BNG(dermatology,nominal,1000000).csv"
  feature_descriptions:
    erythema: "Erythema level"
    scaling: "Scaling severity"
    definite_borders: "Definite borders presence"
    itching: "Itching presence"
    koebner_phenomenon: "Koebner phenomenon presence"
    polygonal_papules: "Polygonal papules"
    follicular_papules: "Follicular papules"
    oral_mucosal_involvement: "Oral mucosal involvement"
    knee_and_elbow_involvement: "Knee & elbow involvement"
    scalp_involvement: "Scalp involvement"
    family_history: "Family history"
    melanin_incontinence: "Melanin incontinence"
    eosinophils: "Eosinophils in biopsy"
    PNL_infiltrate: "PNL infiltrate"
    fibrosis_of_papillary_dermis: "Fibrosis of papillary dermis"
    exocytosis: "Exocytosis presence"
    acanthosis: "Acanthosis"
    hyperkeratosis: "Hyperkeratosis"
    parakeratosis: "Parakeratosis"
    clubbing_of_elong_nest: "Clubbing of elongation nests"
    inflammatory_monoluclear_infilitrate: "Inflammatory mononuclear infiltrate"
    band_like_infiltrate: "Band-like infiltrate"
    age: "Patient age"
    class: "Disease class label"

0068_BNG_sick:
  dataset_description: "BNG Thyroid Disease (Sick): Reprocessed UCI Thyroid dataset. 1,000,000 samples, 21 categorical attributes. Predicts sick vs. healthy thyroid status."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0068_BNG(sick,nominal,1000000).csv"
  feature_descriptions:
    T3_resin_uptake: "T3 resin uptake"
    total_serum_thyroxine: "Total serum thyroxine"
    total_serum_tri: "Total serum triiodothyronine"
    TSH: "Thyroid stimulating hormone"
    basal_TSH: "Basal TSH"
    max_TSH: "Max TSH"
    TBG: "Thyroxine-binding globulin"
    goitre: "Goitre presence"
    tumor: "Tumor presence"
    hypopituitary: "Hypopituitarism"
    psych: "Psychiatric disease"
    referral_source: "Referral source"
    age: "Age"
    sex: "Sex"
    class: "Thyroid status"

0070_BNG_glass:
  dataset_description: "BNG Glass Identification: Reprocessed UCI Glass dataset. 137,781 samples, 9 numeric attributes. Classifies glass type."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0070_BNG(glass,nominal,137781).csv"
  feature_descriptions:
    RI: "Refractive index"
    Na: "Sodium content"
    Mg: "Magnesium content"
    Al: "Aluminum content"
    Si: "Silicon content"
    K: "Potassium content"
    Ca: "Calcium content"
    Ba: "Barium content"
    Fe: "Iron content"
    class: "Glass type"

0071_BNG_soybean:
  dataset_description: "BNG Soybean: Reprocessed UCI Soybean dataset. 47,500 samples, 35 categorical attributes. Classifies soybean disease."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0071_BNG(soybean).csv"
  feature_descriptions:
    date: "Date of observation"
    plant_stand: "Plant stand"
    precip: "Precipitation"
    temp: "Temperature"
    hail: "Hail damage"
    crop_hist: "Crop history"
    area_damaged: "Area damaged"
    severity: "Severity"
    seed_tmt: "Seed treatment"
    germination: "Germination rate"
    plant_growth: "Plant growth"
    leaves: "Leaf spots"
    leaf_malf: "Leaf malformation"
    leaf_mildew: "Leaf mildew"
    stem: "Stem spots"
    fruit: "Fruit spots"
    class: "Disease class"

0071_machine_cpu:
  dataset_description: "Machine CPU Performance: Regression dataset predicting CPU performance. 209 samples, 7 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0071_BNG(soybean).csv"
  feature_descriptions:
    MYCT: "Machine cycle time"
    MMIN: "Minimum main memory"
    MMAX: "Maximum main memory"
    CACH: "Cache memory"
    CHMIN: "Channel minimum"
    CHMAX: "Channel maximum"
    PRP: "Published relative performance"
    ERP: "Estimated relative performance"

0074_BNG_tic-tac-toe:
  dataset_description: "BNG Tic-Tac-Toe Endgame: Reprocessed UCI Tic-Tac-Toe dataset. 958 samples, 10 categorical attributes. Classifies winning positions."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0074_meta_stream_intervals.arff.csv"
  feature_descriptions:
    top-left: "Top-left square"
    top-middle: "Top-middle square"
    top-right: "Top-right square"
    middle-left: "Middle-left square"
    middle-middle: "Middle-middle square"
    middle-right: "Middle-right square"
    bottom-left: "Bottom-left square"
    bottom-middle: "Bottom-middle square"
    bottom-right: "Bottom-right square"
    class: "Win for X?"

0074_meta_stream_intervals:
  dataset_description: "Meta Streaming Intervals: Meta-dataset for concept-drift benchmarks. ~50,000 intervals with summary statistics."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0074_meta_stream_intervals.arff.csv"
  feature_descriptions:
    stream_id: "Stream identifier"
    start_time: "Interval start timestamp"
    end_time: "Interval end timestamp"
    mean: "Mean value over interval"
    std: "Standard deviation over interval"
    drift_detected: "Drift flag"
    class: "Drift type"

0076_BNG_trains:
  dataset_description: "BNG Trains: Reprocessed UCI Trains dataset for relational learning. 1,000,000 samples, 10 categorical attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0076_BNG(trains).csv"
  feature_descriptions:
    train_length: "Train length"
    num_engines: "Number of engines"
    max_speed: "Maximum speed"
    load: "Load weight"
    fuel_type: "Type of fuel"
    route: "Route configuration"
    maintenance: "Maintenance status"
    passenger_count: "Passenger count"
    cargo_weight: "Cargo weight"
    class: "Train type"

delta_elevators:
  dataset_description: "Delta Elevators: Regression dataset predicting elevator vibrations. 2,990 samples, 9 attributes."
  feature_descriptions:
    unit_number: "Elevator unit identifier"
    vibration: "Vibration reading"
    rotation_frequency: "Rotation frequency (Hz)"
    torque: "Torque measurement"
    voltage: "Voltage reading"
    current: "Current draw"
    temperature: "Operating temperature"
    pressure: "Hydraulic pressure"
    load: "Load weight"
    target: "Vibration anomaly score"
0076_BNG_trains:
  dataset_description: "BNG Trains: Reprocessed UCI Trains dataset for relational learning. 1,000,000 samples and 31 attributes. Classifies train configuration type."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0076_BNG(trains).csv"
  feature_descriptions:
    Number_of_cars: "Total number of cars in the train"
    Number_of_different_loads: "Count of distinct load types across all cars"
    num_wheels_2: "Number of wheels on car #2"
    length_2: "Length category of car #2"
    shape_2: "Shape code of car #2"
    num_loads_2: "Number of loads carried by car #2"
    load_shape_2: "Shape of loads in car #2"
    num_wheels_3: "Number of wheels on car #3"
    length_3: "Length category of car #3"
    shape_3: "Shape code of car #3"
    num_loads_3: "Number of loads carried by car #3"
    load_shape_3: "Shape of loads in car #3"
    num_wheels_4: "Number of wheels on car #4"
    length_4: "Length category of car #4"
    shape_4: "Shape code of car #4"
    num_loads_4: "Number of loads carried by car #4"
    load_shape_4: "Shape of loads in car #4"
    num_wheels_5: "Number of wheels on car #5"
    length_5: "Length category of car #5"
    shape_5: "Shape code of car #5"
    num_loads_5: "Number of loads carried by car #5"
    load_shape_5: "Shape of loads in car #5"
    Rectangle_next_to_rectangle: "Adjacency flag: rectangle car next to rectangle car"
    Rectangle_next_to_triangle: "Adjacency flag: rectangle car next to triangle car"
    Rectangle_next_to_hexagon: "Adjacency flag: rectangle car next to hexagon car"
    Rectangle_next_to_circle: "Adjacency flag: rectangle car next to circle car"
    Triangle_next_to_triangle: "Adjacency flag: triangle next to triangle"
    Triangle_next_to_hexagon: "Adjacency flag: triangle next to hexagon"
    Triangle_next_to_circle: "Adjacency flag: triangle next to circle"
    Hexagon_next_to_hexagon: "Adjacency flag: hexagon next to hexagon"
    Hexagon_next_to_circle: "Adjacency flag: hexagon next to circle"
    Circle_next_to_circle: "Adjacency flag: circle next to circle"
    class: "Train configuration class label"

0077_BNG_heart-statlog:
  dataset_description: "BNG Heart Statlog: Large-scale version of the Statlog Heart Disease dataset. 1,000,000 samples and 14 attributes. Classifies presence of heart disease."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0077_BNG(heart-statlog,nominal,1000000).csv"
  feature_descriptions:
    age: "Age in years"
    sex: "Gender (1 = male; 0 = female)"
    chest: "Chest pain type (categorical 1–4)"
    resting_blood_pressure: "Resting blood pressure (mm Hg)"
    serum_cholestoral: "Serum cholesterol (mg/dl)"
    fasting_blood_sugar: "Fasting blood sugar > 120 mg/dl (1 = true; 0 = false)"
    resting_electrocardiographic_results: "Resting ECG results (0–2)"
    maximum_heart_rate_achieved: "Maximum heart rate achieved"
    exercise_induced_angina: "Exercise‐induced angina (1 = yes; 0 = no)"
    oldpeak: "ST depression induced by exercise relative to rest"
    slope: "Slope of the peak exercise ST segment (1–3)"
    number_of_major_vessels: "Number of major vessels colored by fluoroscopy (0–3)"
    thal: "Thalassemia type (3 = normal; 6 = fixed defect; 7 = reversible defect)"
    class: "Heart disease class (0 = no disease; 1 = disease present)"

0078_BNG_vehicle:
  dataset_description: "BNG Vehicle Silhouettes: Large-scale reprocessing of the Statlog Vehicle Silhouettes dataset. 1,000,000 samples and 18 attributes. Classifies vehicle type."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0078_BNG(vehicle,nominal,1000000).csv"
  feature_descriptions:
    COMPACTNESS: "Silhouette compactness measure"
    CIRCULARITY: "Silhouette circularity measure"
    DISTANCE_CIRCULARITY: "Distance-based circularity"
    RADIUS_RATIO: "Ratio of maximum to minimum radius"
    PR.AXIS_ASPECT_RATIO: "Principal axis aspect ratio"
    MAX.LENGTH_ASPECT_RATIO: "Maximal length aspect ratio"
    SCATTER_RATIO: "Scatter ratio of silhouette points"
    ELONGATEDNESS: "Elongatedness measure"
    PR.AXIS_RECTANGULARITY: "Principal axis rectangularity"
    MAX.LENGTH_RECTANGULARITY: "Max length rectangularity"
    SCALED_VARIANCE_MAJOR: "Scaled variance along major axis"
    SCALED_VARIANCE_MINOR: "Scaled variance along minor axis"
    SCALED_RADIUS_OF_GYRATION: "Scaled radius of gyration"
    SKEWNESS_ABOUT_MAJOR: "Skewness about major axis"
    SKEWNESS_ABOUT_MINOR: "Skewness about minor axis"
    KURTOSIS_ABOUT_MAJOR: "Kurtosis about major axis"
    KURTOSIS_ABOUT_MINOR: "Kurtosis about minor axis"
    HOLLOWS_RATIO: "Hollows ratio of silhouette"
    Class: "Vehicle class label"

0079_BNG_hepatitis:
  dataset_description: "BNG Hepatitis: Scaled UCI Hepatitis dataset. 1,000,000 samples and 20 attributes. Predicts patient survival."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0079_BNG(hepatitis,nominal,1000000).csv"
  feature_descriptions:
    AGE: "Age in years"
    SEX: "Gender (1 = male; 2 = female)"
    STEROID: "On steroid medication (yes/no)"
    ANTIVIRALS: "On antiviral medication (yes/no)"
    FATIGUE: "Fatigue symptom present (yes/no)"
    MALAISE: "Malaise symptom present (yes/no)"
    ANOREXIA: "Anorexia symptom present (yes/no)"
    LIVER_BIG: "Enlarged liver (yes/no)"
    LIVER_FIRM: "Firm liver (yes/no)"
    SPLEEN_PALPABLE: "Palpable spleen (yes/no)"
    SPIDERS: "Spider angiomas present (yes/no)"
    ASCITES: "Ascites present (yes/no)"
    VARICES: "Varices present (yes/no)"
    BILIRUBIN: "Serum bilirubin (mg/dl)"
    ALK_PHOSPHATE: "Alkaline phosphatase (IU/L)"
    SGOT: "Serum glutamic‐oxaloacetic transaminase (IU/L)"
    ALBUMIN: "Serum albumin (g/dl)"
    PROTIME: "Prothrombin time (seconds)"
    HISTOLOGY: "Histology result (yes/no)"
    Class: "Survival class (1 = lived; 2 = died)"

0079_coil2000:
  dataset_description: "Coil2000: UCI marketing response dataset. 9,000 samples and 85 attributes. Classifies customer response to mailing campaign."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0079_BNG(hepatitis,nominal,1000000).csv"
  feature_descriptions:
    MOSTYPE: "Customer segmentation type"
    MAANTHUI: "Months in current home"
    MGEMOMV: "Birth month"
    MGEMLEEF: "Age in months"
    MOSHOOFD: "Household type code"
    MGODRK: "Primary income source code"
    MGODPR: "Secondary income source code"
    MGODOV: "Overtime income indicator"
    MGODGE: "State pension indicator"
    MRELGE: "Relationship status code"
    MRELSA: "Marital status code"
    MRELOV: "Living arrangement code"
    MFALLEEN: "Lives alone indicator"
    MFGEKIND: "Has children indicator"
    MFWEKIND: "Has working children indicator"
    MOPLHOOG: "Highest education level"
    MOPLMIDD: "Middle education level"
    MOPLLAAG: "Lowest education level"
    MBERHOOG: "High wealth indicator"
    MBERZELF: "Self‐employed indicator"
    MBERBOER: "Farmer indicator"
    MBERMIDD: "Middle class indicator"
    MBERARBG: "Public sector worker indicator"
    MBERARBO: "Civil service indicator"
    MSKA: "Owns car indicator"
    MSKB1: "Owns second car indicator"
    MSKB2: "Owns third car indicator"
    MSKC: "Company car indicator"
    MSKD: "Fleet car indicator"
    MHHUUR: "Home rented indicator"
    MHKOOP: "Home owned indicator"
    MAUT1: "Owns one car indicator"
    MAUT2: "Owns two cars indicator"
    MAUT0: "Owns no car indicator"
    MZFONDS: "Savings fund membership"
    MZPART: "Pension fund membership"
    MINKM30: "Distance to work <30 km indicator"
    MINK3045: "Distance to work 30–45 km indicator"
    MINK4575: "Distance to work 45–75 km indicator"
    MINK7512: "Distance to work >75 km indicator"
    MINK123M: "Distance to work >123 km indicator"
    MINKGEM: "Average commuting distance"
    MKOOPKLA: "Purchased clothing indicator"
    PWAPART: "Vacation apart indicator"
    PWABEDR: "Vacation abroad indicator"
    PWALAND: "Vacation at home indicator"
    PPERSAUT: "Personal car use frequency"
    PBESAUT: "Business car use frequency"
    PMOTSCO: "Public transport use frequency"
    PVRAAUT: "Passenger car use frequency"
    PAANHANG: "Trailer ownership indicator"
    PTRACTOR: "Tractor ownership indicator"
    PWERKT: "Works at home indicator"
    PBROM: "Bicycle ownership indicator"
    PLEVEN: "Live entertainment attendance frequency"
    PPERSONG: "Person-grade indicator"
    PGEZONG: "Health club membership"
    PWAOREG: "Water sports membership"
    PBRAND: "Brand loyalty indicator"
    PZEILPL: "Sailing club membership"
    PPLEZIER: "Amusement park attendance frequency"
    PFIETS: "Regular cycling indicator"
    PINBOED: "In‐home entertainment frequency"
    PBYSTAND: "Spectator sports attendance frequency"
    AWAPART: "Vacation apart (alternate)"
    AWABEDR: "Vacation abroad (alternate)"
    AWALAND: "Vacation at home (alternate)"
    APERSAUT: "Personal car use (alternate)"
    ABESAUT: "Business car use (alternate)"
    AMOTSCO: "Public transport (alternate)"
    AVRAAUT: "Passenger car use (alternate)"
    AAANHANG: "Trailer ownership (alternate)"
    ATRACTOR: "Tractor ownership (alternate)"
    AWERKT: "Works at home (alternate)"
    ABROM: "Bicycle ownership (alternate)"
    ALEVEN: "Live entertainment (alternate)"
    APERSONG: "Person-grade (alternate)"
    AGEZONG: "Health club (alternate)"
    AWAOREG: "Water sports (alternate)"
    ABRAND: "Brand loyalty (alternate)"
    AZEILPL: "Sailing club (alternate)"
    APLEZIER: "Amusement park (alternate)"
    AFIETS: "Cycling frequency (alternate)"
    AINBOED: "In‐home entertainment (alternate)"
    ABYSTAND: "Spectator sports (alternate)"
    class: "Response to mailing campaign (yes/no)"

0080_BNG_vote:
  dataset_description: "BNG Congressional Voting Records: Large-scale reprocessing of the UCI Voting Records dataset. 1,000,000 samples and 17 attributes. Classifies party affiliation."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0080_BNG(vote).csv"
  feature_descriptions:
    handicapped-infants: "Vote on ‘handicapped infants’ bill (yes/no)"
    water-project-cost-sharing: "Vote on ‘water project cost sharing’ (yes/no)"
    adoption-of-the-budget-resolution: "Vote on budget resolution adoption (yes/no)"
    physician-fee-freeze: "Vote on physician fee freeze (yes/no)"
    el-salvador-aid: "Vote on El Salvador aid (yes/no)"
    religious-groups-in-schools: "Vote on religious groups in schools (yes/no)"
    anti-satellite-test-ban: "Vote on anti-satellite test ban (yes/no)"
    aid-to-nicaraguan-contras: "Vote on aid to Nicaraguan Contras (yes/no)"
    mx-missile: "Vote on MX missile deployment (yes/no)"
    immigration: "Vote on immigration reform (yes/no)"
    synfuels-corporation-cutback: "Vote on synfuels corporation cutback (yes/no)"
    education-spending: "Vote on education spending (yes/no)"
    superfund-right-to-sue: "Vote on Superfund right-to-sue provision (yes/no)"
    crime: "Vote on crime bills (yes/no)"
    duty-free-exports: "Vote on duty-free exports (yes/no)"
    export-administration-act-south-africa: "Vote on Export Administration Act (South Africa) (yes/no)"
    class: "Party affiliation (democrat/republican)"

0081_BNG_hypothyroid:
  dataset_description: "BNG Hypothyroid: Scaled UCI Thyroid Disease dataset. 1,000,000 samples and 29 attributes. Classifies thyroid disorder types."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0081_BNG(hypothyroid,nominal,1000000).csv"
  feature_descriptions:
    age: "Age in years"
    sex: "Gender (M/F)"
    on_thyroxine: "On thyroxine medication (yes/no)"
    query_on_thyroxine: "Test query for thyroxine (yes/no)"
    on_antithyroid_medication: "On antithyroid medication (yes/no)"
    sick: "Patient sick indicator (yes/no)"
    pregnant: "Pregnant indicator (yes/no)"
    thyroid_surgery: "Prior thyroid surgery (yes/no)"
    I131_treatment: "Received I-131 treatment (yes/no)"
    query_hypothyroid: "Test query for hypothyroid (yes/no)"
    query_hyperthyroid: "Test query for hyperthyroid (yes/no)"
    lithium: "On lithium medication (yes/no)"
    goitre: "Goitre present (yes/no)"
    tumor: "Tumor present (yes/no)"
    hypopituitary: "Hypopituitarism indicator (yes/no)"
    psych: "Psychiatric condition (yes/no)"
    TSH_measured: "TSH measured indicator (yes/no)"
    TSH: "TSH level"
    T3_measured: "T3 measured indicator (yes/no)"
    T3: "T3 level"
    TT4_measured: "TT4 measured indicator (yes/no)"
    TT4: "TT4 level"
    T4U_measured: "T4 uptake measured indicator (yes/no)"
    T4U: "T4 uptake level"
    FTI_measured: "Free Thyroxine Index measured indicator (yes/no)"
    FTI: "Free Thyroxine Index"
    TBG_measured: "Thyroxine-binding globulin measured indicator (yes/no)"
    TBG: "Thyroxine-binding globulin level"
    referral_source: "Referral source code"
    Class: "Thyroid disorder class label"

0084_BNG_zoo:
  dataset_description: "BNG Zoo: Large-scale reprocessing of the UCI Zoo dataset. 1,000,000 samples and 17 attributes. Classifies animal type."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0084_BNG(zoo,nominal,1000000).csv"
  feature_descriptions:
    animal: "Animal name"
    hair: "Has hair (yes/no)"
    feathers: "Has feathers (yes/no)"
    eggs: "Lays eggs (yes/no)"
    milk: "Produces milk (yes/no)"
    airborne: "Can fly (yes/no)"
    aquatic: "Lives in water (yes/no)"
    predator: "Is predator (yes/no)"
    toothed: "Has teeth (yes/no)"
    backbone: "Has backbone (yes/no)"
    breathes: "Breathes air (yes/no)"
    venomous: "Is venomous (yes/no)"
    fins: "Has fins (yes/no)"
    legs: "Number of legs"
    tail: "Has tail (yes/no)"
    domestic: "Domesticated (yes/no)"
    catsize: "Size relative to cat threshold (yes/no)"
    type: "Animal type class label"

0105_kropt:
  dataset_description: "Kropt: Chess endgame move prediction dataset. ~319 samples and 7 attributes. Classifies optimal move codes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0105_kropt.csv"
  feature_descriptions:
    white_king_col: "Column of the white king (1–8)"
    white_king_row: "Row of the white king (1–8)"
    white_rook_col: "Column of the white rook (1–8)"
    white_rook_row: "Row of the white rook (1–8)"
    black_king_col: "Column of the black king (1–8)"
    black_king_row: "Row of the black king (1–8)"
    game: "Move code classification label"

0106_baseball:
  dataset_description: "Baseball: Player performance statistics dataset. ~1,079 samples and 16 attributes. Predicts player salary or Hall-of-Fame status."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0106_baseball.csv"
  feature_descriptions:
    Number_seasons: "Total seasons played"
    Games_played: "Number of games played"
    At_bats: "Number of at bats"
    Runs: "Number of runs scored"
    Hits: "Number of hits"
    Doubles: "Number of doubles"
    Triples: "Number of triples"
    Home_runs: "Number of home runs"
    RBIs: "Runs batted in"
    Walks: "Number of bases on balls"
    Strikeouts: "Number of strikeouts"
    Batting_average: "Batting average"
    On_base_pct: "On-base percentage"
    Slugging_pct: "Slugging percentage"
    Fielding_ave: "Fielding average"
    Position: "Primary fielding position"
    Hall_of_Fame: "Hall-of-Fame induction status (yes/no)"

0107_analcatdata_lawsuit:
  dataset_description: "Analcatdata Lawsuit: Legal case outcomes dataset. ~60 samples and 5 attributes. Classifies whether employee was sued successfully."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0107_analcatdata_lawsuit.csv"
  feature_descriptions:
    Length.of.service: "Years of service"
    CAP: "Capital at stake (in USD thousands)"
    PA.normalized: "Normalized prior awards"
    Minority: "Minority status (yes/no)"
    Laid.off: "Laid off prior to lawsuit (yes/no)"

0111_analcatdata_creditscore:
  dataset_description: "Analcatdata CreditScore: Credit application dataset. ~500 samples and 7 attributes. Classifies credit acceptance."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0111_wisconsin.csv"
  feature_descriptions:
    Age: "Applicant age"
    Income.per.dependent: "Income divided by number of dependents"
    Monthly.credit.card.exp: "Monthly credit card expenditure"
    Own.home: "Own home status (yes/no)"
    Self.employed: "Self-employed status (yes/no)"
    Derogatory.reports: "Number of derogatory credit reports"
    Application.accepted: "Credit application accepted (yes/no)"

0111_wisconsin:
  dataset_description: "Wisconsin Breast Cancer: Medical dataset for tumor classification. 699 samples and 32 attributes. Classifies tumors as benign or malignant."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0111_wisconsin.csv"
  feature_descriptions:
    lymph_node_status: "Lymph node status (positive/negative)"
    radius_mean: "Mean radius of tumor (pixels)"
    radius_se: "Radius standard error"
    radius_worst: "Worst-case radius"
    texture_mean: "Mean texture"
    texture_se: "Texture standard error"
    texture_worst: "Worst-case texture"
    perimeter_mean: "Mean perimeter"
    perimeter_se: "Perimeter standard error"
    perimeter_worst: "Worst-case perimeter"
    area_mean: "Mean area"
    area_se: "Area standard error"
    area_worst: "Worst-case area"
    smoothness_mean: "Mean smoothness"
    smoothness_se: "Smoothness standard error"
    smoothness_worst: "Worst-case smoothness"
    compactness_mean: "Mean compactness"
    compactness_se: "Compactness standard error"
    compactness_worst: "Worst-case compactness"
    concavity_mean: "Mean concavity"
    concavity_se: "Concavity standard error"
    concavity_worst: "Worst-case concavity"
    concave_points_mean: "Mean concave points"
    concave_points_se: "Concave points standard error"
    concave_points_worst: "Worst-case concave points"
    symmetry_mean: "Mean symmetry"
    symmetry_se: "Symmetry standard error"
    symmetry_worst: "Worst-case symmetry"
    fractal_dimension_mean: "Mean fractal dimension"
    fractal_dimension_se: "Fractal dimension standard error"
    fractal_dimension_worst: "Worst-case fractal dimension"
    tumor_size: "Tumor size (mm)"
    time: "Time to recurrence or censoring (months)"
    class: "Tumor class (benign, malignant)"

0113_auto_price:
  dataset_description: "Auto Price: Automobile specifications and pricing dataset. 205 samples and 17 attributes. Regression task predicting car price."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0113_auto_price.csv"
  feature_descriptions:
    symboling: "Car risk factor (-3 to +3)"
    normalized-losses: "Normalized insurance losses"
    wheel-base: "Wheelbase (inches)"
    length: "Car length (inches)"
    width: "Car width (inches)"
    height: "Car height (inches)"
    curb-weight: "Vehicle curb weight (lbs)"
    engine-size: "Engine displacement (cc)"
    bore: "Cylinder bore (inches)"
    stroke: "Piston stroke (inches)"
    compression-ratio: "Engine compression ratio"
    horsepower: "Engine horsepower"
    peak-rpm: "Peak engine RPM"
    city-mpg: "City fuel economy (mpg)"
    highway-mpg: "Highway fuel economy (mpg)"
    price: "Market price (USD)"

0117_fruitfly:
  dataset_description: "Fruitfly: Biological dataset of Drosophila mating. 1,400 samples and 5 attributes. Classifies mating success."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0117_fruitfly.csv"
  feature_descriptions:
    PARTNERS: "Number of previous partners"
    TYPE: "Genetic type of fly"
    THORAX: "Thorax length (mm)"
    SLEEP: "Sleep duration (hours/day)"
    class: "Mating success class (successful/unsuccessful)"
0119_lowbwt:
  dataset_description: "Low Birthweight: Medical dataset of neonatal and maternal features. 189 samples and 10 attributes. Classifies low birth weight risk."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0119_lowbwt.csv"
  feature_descriptions:
    LOW: "Low birth weight indicator (1 = < 2,500g; 0 = ≥ 2,500g)"
    AGE: "Mother’s age in years"
    LWT: "Mother’s weight at last menstrual period (lbs)"
    RACE: "Mother’s race (1 = White; 2 = Black; 3 = Other)"
    SMOKE: "Smoking during pregnancy (1 = yes; 0 = no)"
    PTL: "Previous preterm labors count"
    HT: "History of hypertension (1 = yes; 0 = no)"
    UI: "Uterine irritability (1 = yes; 0 = no)"
    FTV: "Number of physician visits during first trimester"
    class: "Low birth weight class (same as LOW)"

0121_triazines:
  dataset_description: "Triazines: Chemical compound dataset measuring herbicide activity. 186 samples and 61 attributes. Regression task predicting biological activity."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0121_triazines.csv"
  feature_descriptions:
    p1_polar: "Polar surface area for compound 1"
    p1_size: "Molecular size descriptor for compound 1"
    p1_flex: "Molecular flexibility for compound 1"
    p1_h_doner: "Number of hydrogen bond donors in compound 1"
    p1_h_acceptor: "Number of hydrogen bond acceptors in compound 1"
    p1_pi_doner: "Number of π-electron donors in compound 1"
    p1_pi_acceptor: "Number of π-electron acceptors in compound 1"
    p1_polarisable: "Polarizability descriptor for compound 1"
    p1_sigma: "Sigma electron descriptor for compound 1"
    p1_branch: "Branching descriptor for compound 1"
    p2_polar: "Polar surface area for compound 2"
    p2_size: "Molecular size descriptor for compound 2"
    p2_flex: "Molecular flexibility for compound 2"
    p2_h_doner: "Number of hydrogen bond donors in compound 2"
    p2_h_acceptor: "Number of hydrogen bond acceptors in compound 2"
    p2_pi_doner: "Number of π-electron donors in compound 2"
    p2_pi_acceptor: "Number of π-electron acceptors in compound 2"
    p2_polarisable: "Polarizability descriptor for compound 2"
    p2_sigma: "Sigma electron descriptor for compound 2"
    p2_branch: "Branching descriptor for compound 2"
    p3_polar: "Polar surface area for compound 3"
    p3_size: "Molecular size descriptor for compound 3"
    p3_flex: "Molecular flexibility for compound 3"
    p3_h_doner: "Number of hydrogen bond donors in compound 3"
    p3_h_acceptor: "Number of hydrogen bond acceptors in compound 3"
    p3_pi_doner: "Number of π-electron donors in compound 3"
    p3_pi_acceptor: "Number of π-electron acceptors in compound 3"
    p3_polarisable: "Polarizability descriptor for compound 3"
    p3_sigma: "Sigma electron descriptor for compound 3"
    p3_branch: "Branching descriptor for compound 3"
    p4_polar: "Polar surface area for compound 4"
    p4_size: "Molecular size descriptor for compound 4"
    p4_flex: "Molecular flexibility for compound 4"
    p4_h_doner: "Number of hydrogen bond donors in compound 4"
    p4_h_acceptor: "Number of hydrogen bond acceptors in compound 4"
    p4_pi_doner: "Number of π-electron donors in compound 4"
    p4_pi_acceptor: "Number of π-electron acceptors in compound 4"
    p4_polarisable: "Polarizability descriptor for compound 4"
    p4_sigma: "Sigma electron descriptor for compound 4"
    p4_branch: "Branching descriptor for compound 4"
    p5_polar: "Polar surface area for compound 5"
    p5_size: "Molecular size descriptor for compound 5"
    p5_flex: "Molecular flexibility for compound 5"
    p5_h_doner: "Number of hydrogen bond donors in compound 5"
    p5_h_acceptor: "Number of hydrogen bond acceptors in compound 5"
    p5_pi_doner: "Number of π-electron donors in compound 5"
    p5_pi_acceptor: "Number of π-electron acceptors in compound 5"
    p5_polarisable: "Polarizability descriptor for compound 5"
    p5_sigma: "Sigma electron descriptor for compound 5"
    p5_branch: "Branching descriptor for compound 5"
    p6_polar: "Polar surface area for compound 6"
    p6_size: "Molecular size descriptor for compound 6"
    p6_flex: "Molecular flexibility for compound 6"
    p6_h_doner: "Number of hydrogen bond donors in compound 6"
    p6_h_acceptor: "Number of hydrogen bond acceptors in compound 6"
    p6_pi_doner: "Number of π-electron donors in compound 6"
    p6_pi_acceptor: "Number of π-electron acceptors in compound 6"
    p6_polarisable: "Polarizability descriptor for compound 6"
    p6_sigma: "Sigma electron descriptor for compound 6"
    p6_branch: "Branching descriptor for compound 6"
    activity: "Measured herbicide activity (μM)"

0122_autoPrice:
  dataset_description: "AutoPrice (Classification): Alternate version of the Auto Price dataset. 205 samples and 16 attributes. Classifies cars into price categories."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0122_autoPrice.csv"
  feature_descriptions:
    symboling: "Risk factor rating (-3 to +3)"
    normalized-losses: "Normalized insurance losses"
    wheel-base: "Wheelbase length (inches)"
    length: "Vehicle length (inches)"
    width: "Vehicle width (inches)"
    height: "Vehicle height (inches)"
    curb-weight: "Vehicle curb weight (lbs)"
    engine-size: "Engine displacement (cc)"
    bore: "Cylinder bore diameter (inches)"
    stroke: "Piston stroke length (inches)"
    compression-ratio: "Engine compression ratio"
    horsepower: "Engine horsepower"
    peak-rpm: "Engine peak RPM"
    city-mpg: "Fuel economy in city (mpg)"
    highway-mpg: "Fuel economy on highway (mpg)"
    class: "Price category label"

0124_analcatdata_impeach:
  dataset_description: "Analcatdata Impeach: U.S. Senate impeachment vote data. 50 samples and 9 attributes. Records vote counts and political outcomes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0124_analcatdata_impeach.csv"
  feature_descriptions:
    State: "State represented"
    Perjury.guilty: "Count voting guilty on perjury charge"
    Obstruction.guilty: "Count voting guilty on obstruction charge"
    Total.guilty: "Total guilty votes"
    Party: "Senator’s party affiliation"
    Conservatism: "Conservatism score"
    Clinton.vote.1996: "Vote share for Clinton in 1996"
    Reelection.year: "Year of reelection campaign"
    First.term: "First-term indicator (1 = first term; 0 = incumbent)"
    Won.state.1996: "Reelected in 1996 (yes/no)"

0124_cloud:
  dataset_description: "Cloud: Meteorological dataset of cloud types. 1,024 samples and 6 attributes. Classifies cloud type."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0124_analcatdata_impeach.csv"
  feature_descriptions:
    seeded: "Seeded cloud indicator (yes/no)"
    season: "Season during observation (1–4)"
    NC: "Number of clouds"
    SC: "Stratus cloud count"
    NWC: "Number of weather cells"
    TE: "Cloud type label"

0125_pharynx:
  dataset_description: "Pharynx: Medical dataset on pharyngeal cancer. 74 samples and 11 attributes. Classifies survival outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0125_pharynx.csv"
  feature_descriptions:
    Inst: "Institution code"
    sex: "Patient sex (M/F)"
    Treatment: "Treatment type"
    Grade: "Tumor grade (1–3)"
    Age: "Patient age in years"
    Condition: "Preoperative condition code"
    Site: "Tumor site code"
    T: "Tumor size/stage (T1–T4)"
    N: "Lymph node involvement (N0–N2)"
    Status: "Postoperative status"
    class: "Survival class (alive/deceased)"

0125_wind:
  dataset_description: "Wind: Meteorological dataset of daily wind speeds across Irish stations. ~5,000 samples and 15 attributes. Regression task predicting wind speed."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0125_pharynx.csv"
  feature_descriptions:
    year: "Observation year"
    month: "Observation month"
    day: "Observation day"
    RPT: "Wind speed at Rosslare"
    VAL: "Wind speed at Valentia"
    ROS: "Wind speed at Rosslare Harbour"
    KIL: "Wind speed at Kilkeel"
    SHA: "Wind speed at Shannon"
    BIR: "Wind speed at Birr"
    DUB: "Wind speed at Dublin"
    CLA: "Wind speed at Claremorris"
    MUL: "Wind speed at Mullingar"
    CLO: "Wind speed at Clonmel"
    BEL: "Wind speed at Belfast"
    MAL: "Wind speed at Malin Head"

0129_space_ga:
  dataset_description: "Space_GA: Georgia county election dataset. 159 samples and 7 attributes. Regression task predicting log voter turnout."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0129_space_ga.csv"
  feature_descriptions:
    ln(VOTES/POP): "Natural log of votes divided by population"
    POP: "County population"
    EDUCATION: "Percent with college education"
    HOUSES: "Number of housing units"
    INCOME: "Median household income"
    XCOORD: "County centroid X coordinate"
    YCOORD: "County centroid Y coordinate"

0130_breastTumor:
  dataset_description: "Breast Tumor: Medical dataset on breast cancer recurrence. 286 samples and 10 attributes. Classifies tumor recurrence."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0130_breastTumor.csv"
  feature_descriptions:
    age: "Age group of patient"
    menopause: "Menopause status (lt40, gt50, premeno)"
    inv-nodes: "Number of positive lymph nodes"
    node-caps: "Caps on nodes present (yes/no)"
    deg-malig: "Degree of malignancy (1–3)"
    breast: "Breast side affected (left/right)"
    breast-quad: "Quadrant of tumor (inner/outer/etc.)"
    irradiation: "Received irradiation (yes/no)"
    recurrence: "Time to recurrence (months)"
    class: "Recurrence indicator (yes/no)"

0136_BNG_anneal_ORIG:
  dataset_description: "BNG Annealing (Original): Reprocessed UCI Annealing dataset. 39 attributes and 1,000,000 samples. Classifies steel type."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0136_BNG(anneal.ORIG).csv"
  feature_descriptions:
    family: "Annealing process family"
    product-type: "Product type being annealed"
    steel: "Steel type code"
    carbon: "Carbon content indicator"
    hardness: "Steel hardness level"
    temper_rolling: "Temper rolling applied (yes/no)"
    condition: "Initial condition code"
    formability: "Formability rating"
    strength: "Strength rating"
    non-ageing: "Non-ageing steel indicator"
    surface-finish: "Surface finish type"
    surface-quality: "Surface quality grade"
    enamelability: "Enamelability rating"
    bc: "BC property presence"
    bf: "BF property presence"
    bt: "BT property presence"
    bw/me: "BW/ME property presence"
    bl: "BL property presence"
    m: "Material class"
    chrom: "Chromium content indicator"
    phos: "Phosphorus content indicator"
    cbond: "Carbon bonding type"
    marvi: "Martensitic variant indicator"
    exptl: "Experimental process flag"
    ferro: "Ferro property indicator"
    corr: "Corrosion resistance indicator"
    blue/bright/varn/clean: "Surface appearance code"
    lustre: "Surface lustre level"
    jurofm: "Juridical form code"
    s: "Sulfur content indicator"
    p: "Phosphorus content indicator"
    shape: "Grain shape code"
    thick: "Thickness (mm)"
    width: "Width (mm)"
    len: "Length (mm)"
    oil: "Oil quenching indicator"
    bore: "Bore hardness indicator"
    packing: "Packing density"
    class: "Steel class label"

0137_BNG_labor:
  dataset_description: "BNG Labor: Reprocessed UCI Labor Relations dataset. 17 attributes and 1,000,000 samples. Classifies compensation plan level."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0137_BNG(labor).csv"
  feature_descriptions:
    duration: "Duration of employment (years)"
    wage-increase-first-year: "First-year wage increase (%)"
    wage-increase-second-year: "Second-year wage increase (%)"
    wage-increase-third-year: "Third-year wage increase (%)"
    cost-of-living-adjustment: "COLA percentage (%)"
    working-hours: "Standard working hours per week"
    pension: "Pension plan level"
    standby-pay: "Standby pay rate"
    shift-differential: "Shift differential rate"
    education-allowance: "Education allowance amount"
    statutory-holidays: "Paid statutory holidays per year"
    vacation: "Vacation days per year"
    longterm-disability-assistance: "Disability assistance level"
    contribution-to-dental-plan: "Dental plan contribution (%)"
    bereavement-assistance: "Bereavement assistance days"
    contribution-to-health-plan: "Health plan contribution (%)"
    class: "Compensation plan class label"

0138_BNG_letter:
  dataset_description: "BNG Letter Recognition: Reprocessed UCI Letter Recognition dataset. 17 attributes and 1,000,000 samples. Classifies capital letters A–Z."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0138_BNG(letter).csv"
  feature_descriptions:
    x-box: "Horizontal box position"
    y-box: "Vertical box position"
    width: "Bounding box width"
    high: "Bounding box height"
    onpix: "Number of on pixels"
    x-bar: "Mean x-coordinate of on pixels"
    y-bar: "Mean y-coordinate of on pixels"
    x2bar: "Mean squared x-coordinate"
    y2bar: "Mean squared y-coordinate"
    xybar: "Mean product of x and y coordinates"
    x2ybr: "Mean x²y balance"
    xy2br: "Mean xy² balance"
    x-ege: "Edge count on x-axis"
    xegvy: "Edge variance on y-axis"
    y-ege: "Edge count on y-axis"
    yegvx: "Edge variance on x-axis"
    class: "Letter class label"

0139_BNG_autos:
  dataset_description: "BNG Autos: Reprocessed UCI Auto MPG dataset. 25 attributes and 1,000,000 samples. Predicts car price."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0139_BNG(autos).csv"
  feature_descriptions:
    normalized-losses: "Normalized insurance losses"
    make: "Manufacturer name"
    fuel-type: "Fuel type (gas/diesel)"
    aspiration: "Aspiration type (std/turbo)"
    num-of-doors: "Number of doors"
    body-style: "Body style type"
    drive-wheels: "Drive wheels configuration"
    engine-location: "Engine location (front/rear)"
    wheel-base: "Wheelbase length (inches)"
    length: "Vehicle length (inches)"
    width: "Vehicle width (inches)"
    height: "Vehicle height (inches)"
    curb-weight: "Vehicle curb weight (lbs)"
    engine-type: "Engine type code"
    num-of-cylinders: "Number of cylinders"
    engine-size: "Engine displacement (cc)"
    fuel-system: "Fuel system type"
    bore: "Cylinder bore diameter (inches)"
    stroke: "Piston stroke length (inches)"
    compression-ratio: "Engine compression ratio"
    horsepower: "Engine horsepower"
    peak-rpm: "Engine peak RPM"
    city-mpg: "Fuel economy in city (mpg)"
    highway-mpg: "Fuel economy on highway (mpg)"
    price: "Market price (USD)"
    symboling: "Risk factor rating (-3 to +3)"

0140_BNG_lymph:
  dataset_description: "BNG Lymphography: Medical dataset about lymph node diagnoses. 1,000,000 samples and 19 attributes. Classifies lymphography outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0140_BNG(lymph).csv"
  feature_descriptions:
    lymphatics: "Condition of lymphatic system"
    block_of_affere: "Blockage in afferent lymph vessels"
    bl_of_lymph_c: "Blockage of lymph capillaries"
    bl_of_lymph_s: "Blockage of lymph sinuses"
    by_pass: "Presence of bypasses"
    extravasates: "Extravasates (leakage) presence"
    regeneration_of: "Signs of regeneration"
    early_uptake_in: "Early uptake of tracer"
    lym_nodes_dimin: "Diminished lymph nodes"
    lym_nodes_enlar: "Enlarged lymph nodes"
    changes_in_lym: "Structural changes in lymph nodes"
    defect_in_node: "Defects observed in nodes"
    changes_in_node: "Size-related node changes"
    changes_in_stru: "Changes in structure"
    special_forms: "Special node forms"
    dislocation_of: "Node dislocation presence"
    exclusion_of_no: "Node exclusion count"
    no_of_nodes_in: "Number of nodes involved"
    class: "Diagnosis class label"

0142_BNG_breast_w:
  dataset_description: "BNG Breast Cancer (Wisconsin): Reprocessed UCI Wisconsin Breast Cancer dataset. 1,000,000 samples and 9 attributes. Classifies tumors."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0142_BNG(breast-w).csv"
  feature_descriptions:
    Clump_Thickness: "Clump thickness"
    Cell_Size_Uniformity: "Uniformity of cell size"
    Cell_Shape_Uniformity: "Uniformity of cell shape"
    Marginal_Adhesion: "Marginal adhesion"
    Single_Epi_Cell_Size: "Single epithelial cell size"
    Bare_Nuclei: "Bare nuclei count"
    Bland_Chromatin: "Chromatin texture"
    Normal_Nucleoli: "Nucleoli size"
    Mitoses: "Mitoses count"
    Class: "Tumor class (benign, malignant)"

0144_BNG_bridges_version1:
  dataset_description: "BNG Bridges v1: Reprocessed UCI Bridges dataset. 13 attributes and 1,000,000 samples. Classifies bridge type."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0144_BNG(bridges_version1).csv"
  feature_descriptions:
    IDENTIF: "Bridge identifier"
    RIVER: "River crossed code"
    LOCATION: "Location code"
    ERECTED: "Year erected"
    PURPOSE: "Intended purpose code"
    LENGTH: "Span length (feet)"
    LANES: "Number of lanes"
    CLEAR-G: "Clearance gauge (feet)"
    T-OR-D: "Type of deck"
    MATERIAL: "Construction material"
    SPAN: "Number of spans"
    REL-L: "Relation length code"
    TYPE: "Bridge type label"

0146_BNG_cmc:
  dataset_description: "BNG Contraceptive Method Choice: Reprocessed UCI CMC dataset. 1,000,000 samples and 10 attributes. Predicts contraceptive method used."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0146_BNG(cmc).csv"
  feature_descriptions:
    Wifes_age: "Wife’s age in years"
    Wifes_education: "Wife’s education level code"
    Husbands_education: "Husband’s education level code"
    Number_of_children_ever_born: "Number of children born"
    Wifes_religion: "Wife’s religion code"
    Wifes_now_working?: "Is wife currently working? (yes/no)"
    Husbands_occupation: "Husband’s occupation code"
    Standard-of-living_index: "Standard of living index"
    Media_exposure: "Media exposure level"
    Contraceptive_method_used: "Method used (no-use, long-term, short-term)"

0147_BNG_colic_ORIG:
  dataset_description: "BNG Horse Colic (Extended): Reprocessed UCI Horse Colic extended dataset. 30 attributes and 1,000,000 samples. Predicts colic outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0147_BNG(colic.ORIG).csv"
  feature_descriptions:
    surgery: "Surgery performed (yes/no)"
    Age: "Age category"
    Hospital_Number: "Hospital case number"
    rectal_temperature: "Rectal temperature (°C)"
    pulse: "Pulse rate (beats/min)"
    respiratory_rate: "Respiratory rate (breaths/min)"
    temperature_of_extremities: "Extremity temperature (normal/cold)"
    peripheral_pulse: "Peripheral pulse quality"
    mucous_membranes: "Mucous membrane color"
    capillary_refill_time: "Capillary refill time (sec)"
    pain: "Pain level code"
    peristalsis: "Peristalsis rate"
    abdominal_distension: "Abdominal distension (yes/no)"
    nasogastric_tube: "Nasogastric tube present (yes/no)"
    nasogastric_reflux: "Nasogastric reflux (yes/no)"
    nasogastric_reflux_PH: "pH of reflux"
    rectal_examination_-_feces: "Feces on rectal exam"
    abdomen: "Abdominal findings code"
    packed_cell_volume: "Packed cell volume (%)"
    total_protein: "Total protein (g/dl)"
    abdominocentesis_appearance: "Appearance of fluid"
    abdomcentesis_total_protein: "Total protein in fluid"
    outcome: "Colic outcome class"
    site_of_lesion: "Lesion site code"
    type_of_lesion: "Lesion type code"
    subtype_of_lesion: "Lesion subtype code"
    pathology_cp_data: "Pathology CP data code"
    surgical_lesion: "Surgical lesion presence (yes/no)"

0148_BNG_colic:
  dataset_description: "BNG Horse Colic (Nominal): Simplified UCI Horse Colic nominal dataset. 23 attributes and 1,000,000 samples. Predicts colic outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0148_BNG(colic).csv"
  feature_descriptions:
    surgery: "Surgery performed (yes/no)"
    Age: "Age category"
    rectal_temperature: "Rectal temperature (°C)"
    pulse: "Pulse rate (beats/min)"
    respiratory_rate: "Respiratory rate (breaths/min)"
    temp_extremities: "Extremity temperature (normal/cold)"
    peripheral_pulse: "Peripheral pulse quality"
    mucous_membranes: "Mucous membrane color"
    capillary_refill_time: "Capillary refill time (sec)"
    pain: "Pain level code"
    peristalsis: "Peristalsis rate"
    abdominal_distension: "Abdominal distension (yes/no)"
    nasogastric_tube: "Nasogastric tube present (yes/no)"
    nasogastric_reflux: "Nasogastric reflux (yes/no)"
    nasogastric_reflux_PH: "pH of reflux"
    rectal_examination: "Feces on rectal exam"
    abdomen: "Abdominal findings code"
    packed_cell_volume: "Packed cell volume (%)"
    total_protein: "Total protein (g/dl)"
    abdominocentesis_appearance: "Appearance of fluid"
    abdomcentesis_total_protein: "Total protein in fluid"
    outcome: "Colic outcome class"
    surgical_lesion: "Surgical lesion presence (yes/no)"
0149_socmob:
  dataset_description: "SocMob: Sociological dataset of intergenerational occupational mobility. 4,000 samples and 6 attributes. Predicts son’s occupation distribution."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0149_socmob.csv"
  feature_descriptions:
    fathers_occupation: "Father’s primary occupation code"
    sons_occupation: "Son’s primary occupation code"
    family_structure: "Family structure type"
    race: "Race of the family (categorical)"
    counts_for_sons_first_occupation: "Count of sons in their first occupation"
    counts_for_sons_current_occupation: "Count of sons in their current occupation"

0150_BNG_page-blocks:
  dataset_description: "BNG Page Blocks: Large-scale reprocessing of the UCI Page Blocks dataset. 295,245 samples and 11 attributes. Classifies blocks on a printed page."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0150_BNG(page-blocks).csv"
  feature_descriptions:
    height: "Block height"
    lenght: "Block length"
    area: "Block area"
    eccen: "Eccentricity of block"
    p_black: "Proportion of black pixels"
    p_and: "Proportion of ‘and’ pixels"
    mean_tr: "Mean transition rate"
    blackpix: "Count of black pixels"
    blackand: "Count of black-and-white transitions"
    wb_trans: "White-black transition count"
    class: "Block type label"

0153_BNG_cylinder-bands:
  dataset_description: "BNG Cylinder Bands: Manufacturing dataset on printing cylinder bands. 1,000,000 samples and 38 attributes. Classifies band type or quality."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0153_BNG(cylinder-bands).csv"
  feature_descriptions:
    timestamp: "Measurement timestamp"
    cylinder_number: "Cylinder identifier"
    customer: "Customer code"
    job_number: "Job number"
    grain_screened: "Grain screening status"
    ink_color: "Ink color code"
    proof_on_ctd_ink: "Proof on coated ink flag"
    blade_mfg: "Blade manufacturer code"
    cylinder_division: "Cylinder division code"
    paper_type: "Paper type code"
    ink_type: "Ink type code"
    direct_steam: "Direct steam usage flag"
    solvent_type: "Solvent type code"
    type_on_cylinder: "Type-on-cylinder code"
    press: "Press identifier"
    unit_number: "Unit number"
    cylinder_size: "Cylinder size code"
    paper_mill_location: "Paper mill location code"
    plating_tank: "Plating tank code"
    proof_cut: "Proof cut measurement"
    viscosity: "Ink viscosity"
    caliper: "Paper caliper thickness"
    ink_temperature: "Ink temperature"
    humidity: "Ambient humidity"
    roughness: "Paper roughness"
    blade_pressure: "Blade pressure"
    varnish_pct: "Varnish percentage"
    press_speed: "Press speed"
    ink_pct: "Ink percentage"
    solvent_pct: "Solvent percentage"
    ESA_Voltage: "Electrostatic applicator voltage"
    ESA_Amperage: "Electrostatic applicator amperage"
    wax: "Wax additive percentage"
    hardener: "Hardener additive percentage"
    roller_durometer: "Roller hardness measurement"
    current_density: "Current density"
    anode_space_ratio: "Anode-to-space ratio"
    chrome_content: "Chrome content percentage"
    band_type: "Band type label"

0154_BNG_dermatology:
  dataset_description: "BNG Dermatology: Large-scale reprocessing of the UCI Dermatology dataset. 1,000,000 samples and 35 attributes. Classifies dermatological diseases."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0154_BNG(dermatology).csv"
  feature_descriptions:
    erythema: "Erythema level"
    scaling: "Scaling severity"
    definite_borders: "Definite borders present"
    itching: "Itching present"
    koebner_phenomenon: "Koebner phenomenon present"
    polygonal_papules: "Polygonal papules present"
    follicular_papules: "Follicular papules present"
    oral_mucosal_involvement: "Oral mucosal involvement"
    knee_and_elbow_involvement: "Knee & elbow involvement"
    scalp_involvement: "Scalp involvement"
    family_history: "Family history of skin disease"
    melanin_incontinence: "Melanin incontinence"
    eosinophils_in_the_infiltrate: "Eosinophils in infiltrate"
    PNL_infiltrate: "Polymorphonuclear leukocyte infiltrate"
    fibrosis_of_the_papillary_dermis: "Papillary dermis fibrosis"
    exocytosis: "Exocytosis present"
    acanthosis: "Acanthosis present"
    hyperkeratosis: "Hyperkeratosis present"
    parakeratosis: "Parakeratosis present"
    clubbing_of_the_rete_ridges: "Clubbing of rete ridges"
    elongation_of_the_rete_ridges: "Elongation of rete ridges"
    thinning_of_the_suprapapillary_epidermis: "Suprapapillary epidermis thinning"
    spongiform_pustule: "Spongiform pustule present"
    munro_microabcess: "Munro microabscess present"
    focal_hypergranulosis: "Focal hypergranulosis present"
    disappearance_of_the_granular_layer: "Granular layer disappearance"
    perivascular_monoluclear_infiltrate: "Perivascular mononuclear infiltrate"
    inflammatory_monoluclear_infiltrate: "Inflammatory mononuclear infiltrate"
    band_like_infiltrate: "Band-like infiltrate present"
    age: "Patient age"
    class: "Disease class label"

0156_BNG_glass:
  dataset_description: "BNG Glass Identification: Large-scale reprocessing of the UCI Glass dataset. 137,781 samples and 10 attributes. Classifies glass type."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0156_BNG(glass).csv"
  feature_descriptions:
    RI: "Refractive index"
    Na: "Sodium content"
    Mg: "Magnesium content"
    Al: "Aluminum content"
    Si: "Silicon content"
    K: "Potassium content"
    Ca: "Calcium content"
    Ba: "Barium content"
    Fe: "Iron content"
    Type: "Glass type label"

0158_BNG_heart-statlog:
  dataset_description: "BNG Heart Statlog: Large-scale version of the Statlog Heart Disease dataset. 1,000,000 samples and 14 attributes. Classifies presence of heart disease."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0158_BNG(heart-statlog).csv"
  feature_descriptions:
    age: "Age in years"
    sex: "Gender (1 = male; 0 = female)"
    chest: "Chest pain type (1–4)"
    resting_blood_pressure: "Resting blood pressure (mm Hg)"
    serum_cholestoral: "Serum cholesterol (mg/dl)"
    fasting_blood_sugar: "Fasting blood sugar > 120 mg/dl (1 = true; 0 = false)"
    resting_electrocardiographic_results: "Resting ECG results (0–2)"
    maximum_heart_rate_achieved: "Maximum heart rate achieved"
    exercise_induced_angina: "Exercise-induced angina (1 = yes; 0 = no)"
    oldpeak: "ST depression induced by exercise relative to rest"
    slope: "Slope of the peak exercise ST segment (1–3)"
    number_of_major_vessels: "Number of major vessels colored by fluoroscopy (0–3)"
    thal: "Thalassemia type (3 = normal; 6 = fixed defect; 7 = reversible defect)"
    class: "Disease presence (0 = healthy; 1 = disease)"

0160_BNG_hepatitis:
  dataset_description: "BNG Hepatitis: Scaled UCI Hepatitis dataset. 1,000,000 samples and 20 attributes. Predicts patient survival."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0160_BNG(hepatitis).csv"
  feature_descriptions:
    AGE: "Age in years"
    SEX: "Gender (1 = male; 2 = female)"
    STEROID: "On steroids (yes/no)"
    ANTIVIRALS: "On antivirals (yes/no)"
    FATIGUE: "Fatigue present (yes/no)"
    MALAISE: "Malaise present (yes/no)"
    ANOREXIA: "Anorexia present (yes/no)"
    LIVER_BIG: "Enlarged liver (yes/no)"
    LIVER_FIRM: "Firm liver (yes/no)"
    SPLEEN_PALPABLE: "Palpable spleen (yes/no)"
    SPIDERS: "Spider angiomas present (yes/no)"
    ASCITES: "Ascites present (yes/no)"
    VARICES: "Varices present (yes/no)"
    BILIRUBIN: "Serum bilirubin (mg/dl)"
    ALK_PHOSPHATE: "Alkaline phosphatase (IU/L)"
    SGOT: "SGOT enzyme level (IU/L)"
    ALBUMIN: "Serum albumin (g/dl)"
    PROTIME: "Prothrombin time (seconds)"
    HISTOLOGY: "Histology result (yes/no)"
    Class: "Survival outcome (1 = lived; 2 = died)"

0161_bank32nh:
  dataset_description: "Bank32nh: Statlog (Heart) variant dataset with 32 numeric features. ~4,000 samples and 32 attributes. Predicts binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0161_bank32nh.csv"
  feature_descriptions:
    a1cx: "Attribute 1 X-coordinate channel"
    a1cy: "Attribute 1 Y-coordinate channel"
    a1sx: "Attribute 1 secondary X-channel"
    a1sy: "Attribute 1 secondary Y-channel"
    a1rho: "Attribute 1 correlation measure"
    a1pop: "Attribute 1 probability density"
    a2cx: "Attribute 2 X-coordinate channel"
    a2cy: "Attribute 2 Y-coordinate channel"
    a2sx: "Attribute 2 secondary X-channel"
    a2sy: "Attribute 2 secondary Y-channel"
    a2rho: "Attribute 2 correlation measure"
    a2pop: "Attribute 2 probability density"
    a3cx: "Attribute 3 X-coordinate channel"
    a3cy: "Attribute 3 Y-coordinate channel"
    a3sx: "Attribute 3 secondary X-channel"
    a3sy: "Attribute 3 secondary Y-channel"
    a3rho: "Attribute 3 correlation measure"
    a3pop: "Attribute 3 probability density"
    temp: "Temperature measurement"
    b1x: "Band 1 X-measure"
    b1y: "Band 1 Y-measure"
    b1call: "Band 1 call indicator"
    b1eff: "Band 1 effectiveness score"
    b2x: "Band 2 X-measure"
    b2y: "Band 2 Y-measure"
    b2call: "Band 2 call indicator"
    b2eff: "Band 2 effectiveness score"
    b3x: "Band 3 X-measure"
    b3y: "Band 3 Y-measure"
    b3call: "Band 3 call indicator"
    b3eff: "Band 3 effectiveness score"
    m1xl: "Measurement 1 level"
    rej: "Rejection flag"

0162_BNG_zoo:
  dataset_description: "BNG Zoo: Large-scale reprocessing of the UCI Zoo dataset. 1,000,000 samples and 17 attributes. Classifies animal type."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0162_BNG(zoo).csv"
  feature_descriptions:
    hair: "Has hair (yes/no)"
    feathers: "Has feathers (yes/no)"
    eggs: "Lays eggs (yes/no)"
    milk: "Produces milk (yes/no)"
    airborne: "Can fly (yes/no)"
    aquatic: "Lives in water (yes/no)"
    predator: "Is predator (yes/no)"
    toothed: "Has teeth (yes/no)"
    backbone: "Has backbone (yes/no)"
    breathes: "Breathes air (yes/no)"
    venomous: "Is venomous (yes/no)"
    fins: "Has fins (yes/no)"
    legs: "Number of legs"
    tail: "Has tail (yes/no)"
    domestic: "Domesticated (yes/no)"
    catsize: "Size relative to cat (yes/no)"
    type: "Animal type label"

0163_cpu:
  dataset_description: "CPU Performance: Regression dataset predicting CPU performance. 209 samples and 8 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0163_cpu.csv"
  feature_descriptions:
    vendor: "CPU vendor code"
    MYCT: "Machine cycle time (ns)"
    MMIN: "Minimum main memory (KB)"
    MMAX: "Maximum main memory (KB)"
    CACH: "Cache size (KB)"
    CHMIN: "Minimum channels"
    CHMAX: "Maximum channels"
    class: "Published relative performance"

0164_cpu_small:
  dataset_description: "CPU Small: Small CPU load dataset for system monitoring. ~1,000 samples and 12 attributes. Regression on user load."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0164_cpu_small.csv"
  feature_descriptions:
    iread: "Instruction read count"
    iwrite: "Instruction write count"
    scall: "System call count"
    sread: "System read count"
    swrite: "System write count"
    fork: "Fork operation count"
    exec: "Exec operation count"
    rchar: "Read characters count"
    wchar: "Written characters count"
    runqsz: "Run queue size"
    freemem: "Free memory (KB)"
    freeswap: "Free swap (KB)"
    usr: "User CPU time percentage"

0166_bank8FM:
  dataset_description: "Bank8FM: Statlog (Heart) variant with 8 features. ~4,000 samples and 9 attributes. Predicts binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0166_bank8FM.csv"
  feature_descriptions:
    a1cx: "Attribute 1 X-coordinate channel"
    a1cy: "Attribute 1 Y-coordinate channel"
    b2x: "Band 2 X-measure"
    b2y: "Band 2 Y-measure"
    a2pop: "Attribute 2 probability density"
    a3pop: "Attribute 3 probability density"
    temp: "Temperature measurement"
    m1xl: "Measurement 1 level"
    rej: "Rejection flag"

0181_bridges:
  dataset_description: "Bridges: Structural engineering dataset predicting bridge type. ~1,000 samples and 12 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0181_bridges.csv"
  feature_descriptions:
    RIVER: "River crossed code"
    LOCATION: "Location code"
    ERECTED: "Year erected"
    PURPOSE: "Purpose code"
    LENGTH: "Bridge length (ft)"
    LANES: "Number of lanes"
    CLEAR-G: "Clearance gauge (ft)"
    T–OR–D: "Type of deck"
    MATERIAL: "Construction material code"
    SPAN: "Number of spans"
    REL-L: "Relation length code"
    TYPE: "Bridge type label"

0193_internet_usage:
  dataset_description: "Internet Usage Survey: Large-scale survey of internet habits. ~5,000 samples and 80+ attributes. Profiles user behavior and demographics."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0193_internet_usage.csv"
  feature_descriptions:
    Age: "Respondent age"
    Community_Membership Building: "Belongs to community building group (yes/no)"
    Community_Membership Family: "Belongs to family community group"
    Community_Membership Hobbies: "Belongs to hobbies community group"
    Community_Membership None: "No community membership"
    Community_Membership Other: "Other community membership"
    Community_Membership Political: "Political group membership"
    Community_Membership Professional: "Professional group membership"
    Community_Membership Religious: "Religious group membership"
    Community_Membership Support: "Support group membership"
    Country: "Country of respondent"
    Disability_Cognitive: "Cognitive disability (yes/no)"
    Disability_Hearing: "Hearing disability (yes/no)"
    Disability_Motor: "Motor disability (yes/no)"
    Disability_Not_Impaired: "No disability"
    Disability_Not_Say: "Prefers not to say disability"
    Disability_Vision: "Vision disability (yes/no)"
    Education_Attainment: "Highest education level"
    Falsification_of_Information: "Ever falsified info online (yes/no)"
    Gender: "Respondent gender"
    Household_Income: "Household income bracket"
    How_You_Heard_About_Survey_Banner: "Heard via banner ad"
    How_You_Heard_About_Survey_Friend: "Heard via friend"
    How_You_Heard_About_Survey_Mailing_List: "Heard via mailing list"
    How_You_Heard_About_Survey_Others: "Heard via other channels"
    How_You_Heard_About_Survey_Printed_Media: "Heard via print media"
    How_You_Heard_About_Survey_Remembered: "Heard via memory"
    How_You_Heard_About_Survey_Search_Engine: "Heard via search engine"
    How_You_Heard_About_Survey_Usenet_News: "Heard via Usenet/news"
    How_You_Heard_About_Survey_WWW_Page: "Heard via web page"
    Major_Geographical_Location: "Major region of residence"
    Major_Occupation: "Occupation category"
    Marital_Status: "Marital status"
    Most_Import_Issue_Facing_the_Internet: "Key internet issue"
    Opinions_on_Censorship: "Opinion on internet censorship"
    Primary_Operating_Platform: "Primary computing platform"
    Primary_Language: "Primary language spoken"
    Primary_Place_of_WWW_Access: "Primary internet access location"
    Race: "Respondent race"
    Not_Purchasing_Bad_experience: "Not purchasing due to bad experience"
    Not_Purchasing_No_credit: "Not purchasing due to no credit"
    Not_Purchasing_Not_Applicable: "Not purchasing not applicable"
    Not_Purchasing_Other: "Not purchasing other reason"
    Not_Purchasing_Prefer_people: "Not purchasing prefer people help"
    Not_Purchasing_Privacy: "Not purchasing due to privacy concerns"
    Not_Purchasing_Too_Complicated: "Not purchasing due to complexity"
    Not_Purchasing_Uncomfortable: "Not purchasing due to discomfort"
    Not_Purchasing_Vendors: "Not purchasing due to vendor issues"
    Registered_to_Vote: "Registered voter (yes/no)"
    Sexual_Preference: "Sexual orientation"
    Web_Ordering: "Uses web ordering (yes/no)"
    Web_Page_Creation: "Creates web pages (yes/no)"
    Who_Pays_for_Access_Other: "Who pays for internet access (other)"
    Who_Pays_for_Access_Parents: "Parents pay for access"
    Who_Pays_for_Access_School: "School pays for access"
    Who_Pays_for_Access_Self: "Pays for own access"
    Who_Pays_for_Access_Work: "Work pays for access"
    Willingness_to_Pay_Fees: "Willingness to pay access fees"
    Years_on_Internet: "Years using internet"

0202_analcatdata_boxing2:
  dataset_description: "Analcatdata Boxing2: Boxing match outcomes dataset. ~100 samples and 4 attributes. Classifies match winner."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0202_analcatdata_boxing2.csv"
  feature_descriptions:
    Judge: "Judge identifier"
    Official: "Official referee identifier"
    Round: "Round number"
    Winner: "Match winner code"

0203_prnn_crabs:
  dataset_description: "Prnn Crabs: Biological dataset of crab morphological measurements. ~200 samples and 8 attributes. Classifies crab species."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0203_prnn_crabs.csv"
  feature_descriptions:
    sex: "Crab sex (M/F)"
    index: "Sample index"
    FL: "Frontal lobe size"
    RW: "Rear width"
    CL: "Carapace length"
    CW: "Carapace width"
    BD: "Body depth"
    sp: "Species label"

0204_analcatdata_boxing1:
  dataset_description: "Analcatdata Boxing1: Boxing match round-by-round data. ~200 samples and 4 attributes. Classifies winner by round."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0204_analcatdata_boxing1.csv"
  feature_descriptions:
    Judge: "Judge identifier"
    Official: "Official referee identifier"
    Round: "Round number"
    Winner: "Round winner code"
0207_irish:
  dataset_description: "Irish: Sociological survey of Irish educational outcomes. ~1,000 samples and 6 attributes. Predicts Leaving Certificate scores."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0207_irish.csv"
  feature_descriptions:
    Sex: "Respondent sex (M/F)"
    DVRT: "Divorced or separated status (yes/no)"
    Educational_level: "Highest education level attained"
    Prestige_score: "Occupational prestige score"
    Type_school: "Type of school attended (public/private)"
    Leaving_Certificate: "Leaving Certificate exam score"

0211_analcatdata_birthday:
  dataset_description: "Analcatdata Birthday: Daily birth counts. ~365 samples and 4 attributes. Regression on birth counts."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0211_analcatdata_birthday.csv"
  feature_descriptions:
    Month: "Month of the year (1–12)"
    Day: "Day of the month (1–31)"
    Day_of_week: "Day of week (1 = Monday…7 = Sunday)"
    Births: "Number of births recorded"

0220_biomed:
  dataset_description: "Biomed: Biomedical blood sample study. ~500 samples and 9 attributes. Classifies patient condition."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0220_biomed.csv"
  feature_descriptions:
    Observation_number: "Unique sample identifier"
    Hospital_identification_number_for_blood_sample: "Hospital ID for sample"
    Age_of_patient: "Patient age in years"
    Date_that_blood_sample_was_taken: "Sample collection date"
    m1: "Measurement 1 value"
    m2: "Measurement 2 value"
    m3: "Measurement 3 value"
    m4: "Measurement 4 value"
    class: "Patient class label"

0221_arsenic_male_bladder:
  dataset_description: "Arsenic Male Bladder: Survival analysis of male bladder cancer patients. ~1,000 samples and 5 attributes. Models event counts."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0221_arsenic-male-bladder.csv"
  feature_descriptions:
    group: "Exposure group"
    conc: "Arsenic concentration"
    age: "Age at diagnosis"
    at.risk: "Time at risk"
    events: "Number of events observed"

0223_newton_hema:
  dataset_description: "Newton Hema: Hematology study of newborns. ~200 samples and 4 attributes. Regression on cell percentages."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0223_newton_hema.csv"
  feature_descriptions:
    id: "Sample identifier"
    weeks: "Gestational age (weeks)"
    sample_size: "Volume of blood sample (mL)"
    cells_percentage: "Percentage of specific blood cell type"

0225_veteran:
  dataset_description: "Veteran: Lung cancer treatment study. 137 samples and 8 attributes. Survival analysis."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0225_veteran.csv"
  feature_descriptions:
    Treatment: "Treatment type code"
    Celltype: "Cell type classification"
    Status: "Survival status (alive/dead)"
    Karnofsky_score: "Karnofsky performance score"
    Months_from_Diagnosis: "Months since diagnosis"
    Age: "Patient age"
    Prior_therapy: "Prior therapy indicator (yes/no)"
    Survival: "Overall survival time (months)"

0226_analcatdata_vineyard:
  dataset_description: "Analcatdata Vineyard: Agricultural yield study. ~150 samples and 4 attributes. Regression on grape yield."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0226_analcatdata_vineyard.csv"
  feature_descriptions:
    Year: "Harvest year"
    Row: "Vineyard row number"
    Group: "Treatment group"
    Lugs: "Grape yield (lug count)"

0232_analcatdata_gsssexsurvey:
  dataset_description: "Analcatdata GSS Sex Survey: General Social Survey sexual behavior module. ~1,000 samples and 10 attributes. Classification on AIDS knowledge."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0232_analcatdata_gsssexsurvey.csv"
  feature_descriptions:
    Married: "Marital status (yes/no)"
    Age: "Respondent age"
    Years_of_education: "Years of formal education"
    Male: "Respondent gender (M/F)"
    Religious: "Religious affiliation indicator"
    Sex_partners: "Number of sexual partners"
    Income: "Household income bracket"
    Drug_use: "Drug use indicator (yes/no)"
    Same_sex_relations: "Same-sex relations indicator (yes/no)"
    AIDS_know: "Knowledge of AIDS transmission (yes/no)"

0234_places:
  dataset_description: "Places: Regional importance ratings. ~50 samples and 9 attributes. Regression on community planning metrics."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0234_places.csv"
  feature_descriptions:
    Climate_and_Terrain: "Rating of climate and terrain"
    Housing: "Rating of housing quality"
    Health_Care_and_Environment: "Rating of health/environment"
    Crime: "Rating of crime level"
    Transportation: "Rating of transportation"
    Education: "Rating of education"
    The_Arts: "Rating of arts/culture"
    Recreation: "Rating of recreation opportunities"
    Economics: "Rating of economic conditions"

0235_plasma_retinol:
  dataset_description: "Plasma Retinol: Nutritional study on plasma retinol levels. ~500 samples and 14 attributes. Regression on plasma retinol."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0235_plasma_retinol.csv"
  feature_descriptions:
    AGE: "Subject age"
    SEX: "Subject sex (M/F)"
    SMOKSTAT: "Smoking status code"
    QUETELET: "Quetelet index (BMI)"
    VITUSE: "Vitamin usage indicator"
    CALORIES: "Daily caloric intake"
    FAT: "Daily fat intake (g)"
    FIBER: "Daily fiber intake (g)"
    ALCOHOL: "Daily alcohol intake (g)"
    CHOLESTEROL: "Serum cholesterol level"
    BETADIET: "Dietary beta-carotene intake"
    RETDIET: "Dietary retinol intake"
    BETAPLASMA: "Plasma beta-carotene level"
    RETPLASMA: "Plasma retinol level"

0237_arsenic_female_lung:
  dataset_description: "Arsenic Female Lung: Survival study of female lung cancer with arsenic exposure. ~1,000 samples and 5 attributes. Event count analysis."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0237_arsenic-female-lung.csv"
  feature_descriptions:
    group: "Exposure group"
    conc: "Arsenic concentration"
    age: "Age at diagnosis"
    at.risk: "Time at risk"
    events: "Number of events observed"

0238_pbcseq:
  dataset_description: "PBCseq: Longitudinal primary biliary cirrhosis study. ~200 samples and 14 attributes. Survival/regression analysis."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0238_pbcseq.csv"
  feature_descriptions:
    case_number: "Case identifier"
    number_of_days: "Days since enrollment"
    drug: "Drug treatment code"
    age: "Patient age"
    sex: "Patient sex (M/F)"
    day: "Study day"
    presence_of_ascites: "Ascites presence (yes/no)"
    presence_of_hepatomegaly: "Hepatomegaly presence (yes/no)"
    presence_of_spiders: "Spider angiomas presence (yes/no)"
    presence_of_edema: "Edema presence (yes/no)"
    serum_bilirubin: "Serum bilirubin (mg/dL)"
    serum_cholesterol: "Serum cholesterol (mg/dL)"
    albumin: "Serum albumin (g/dL)"
    alkaline_phos: "Alkaline phosphatase (IU/L)"

0240_analcatdata_wildcat:
  dataset_description: "Analcatdata Wildcat: Labor strike study. ~40 samples and 5 attributes. Regression on wildcat strike count."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0240_analcatdata_wildcat.csv"
  feature_descriptions:
    Grievances: "Number of grievances filed"
    Rotate: "Rotation schedule code"
    Union: "Union membership percentage"
    Workforce: "Number of workers"
    Log_workforce: "Log of workforce size"
    Wildcat_strikes: "Number of wildcat strikes"

0242_pm10:
  dataset_description: "PM10: Urban air quality monitoring. ~1,500 samples and 8 attributes. Regression on particulate concentration."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0242_pm10.csv"
  feature_descriptions:
    cars_per_hour: "Traffic volume (cars/hour)"
    temperature_at_2m: "Air temperature at 2m (°C)"
    wind_speed: "Wind speed (m/s)"
    temperature_diff_2m_25m: "Temperature difference (2m–25m)"
    wind_direction: "Wind direction (degrees)"
    hour_of_day: "Hour of day (0–23)"
    day: "Day of year (1–365)"
    pm10_concentration: "PM10 particulate concentration (μg/m³)"

0244_analcatdata_seropositive:
  dataset_description: "Analcatdata Seropositive: Serology study. ~200 samples and 4 attributes. Classification on positivity."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0244_analcatdata_seropositive.csv"
  feature_descriptions:
    Age: "Subject age"
    Disease: "Disease code"
    Total: "Total test count"
    Positive: "Number of positive tests"

0246_pollen:
  dataset_description: "Pollen: Mineral grain classification. ~1,000 samples and 5 attributes. Classifies pollen type."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0246_pollen.csv"
  feature_descriptions:
    RIDGE: "Number of ridges"
    NUB: "Number of nubs"
    CRACK: "Crack count"
    WEIGHT: "Grain weight"
    DENSITY: "Grain density"

0247_boston:
  dataset_description: "Boston Housing: Regression dataset on housing values. 506 samples and 14 attributes. Predicts median home value."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0247_boston.csv"
  feature_descriptions:
    CRIM: "Per capita crime rate by town"
    ZN: "Proportion of residential land zoned for lots >25,000 sq.ft."
    INDUS: "Proportion of non-retail business acres per town"
    CHAS: "Charles River dummy variable (1 if tract bounds river; 0 otherwise)"
    NOX: "Nitric oxides concentration (ppm)"
    RM: "Average number of rooms per dwelling"
    AGE: "Proportion of owner-occupied units built prior to 1940"
    DIS: "Weighted distances to five Boston employment centers"
    RAD: "Index of accessibility to radial highways"
    TAX: "Full-value property-tax rate per $10,000"
    PTRATIO: "Pupil-teacher ratio by town"
    B: "1000(Bk - 0.63)^2 where Bk is proportion Black"
    LSTAT: "% lower status of the population"
    MEDV: "Median value of owner-occupied homes ($1000s)"

0248_arsenic_female_bladder:
  dataset_description: "Arsenic Female Bladder: Survival analysis of female bladder cancer with arsenic exposure. ~1,000 samples and 5 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0248_arsenic-female-bladder.csv"
  feature_descriptions:
    group: "Exposure group"
    conc: "Arsenic concentration"
    age: "Age at diagnosis"
    at.risk: "Time at risk"
    events: "Number of events observed"

0249_cps_85_wages:
  dataset_description: "CPS 85 Wages: U.S. labor economics survey. ~3,000 samples and 11 attributes. Regression on wages."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0249_cps_85_wages.csv"
  feature_descriptions:
    EDUCATION: "Years of education"
    SOUTH: "Region indicator (1 = South; 0 = non-South)"
    SEX: "Respondent sex (M/F)"
    EXPERIENCE: "Years of work experience"
    UNION: "Union membership (yes/no)"
    AGE: "Respondent age"
    RACE: "Respondent race code"
    OCCUPATION: "Occupation code"
    SECTOR: "Industry sector code"
    MARR: "Marital status (1 = married; 0 = not)"
    WAGE: "Hourly wage (USD)"

0250_analcatdata_chlamydia:
  dataset_description: "Analcatdata Chlamydia: Epidemiological study of chlamydia cases. ~500 samples and 4 attributes. Regression on case counts."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0250_analcatdata_chlamydia.csv"
  feature_descriptions:
    Age: "Patient age"
    Gender: "Patient gender (M/F)"
    Race: "Patient race code"
    Count: "Number of chlamydia cases"

0251_arsenic_male_lung:
  dataset_description: "Arsenic Male Lung: Survival analysis of male lung cancer with arsenic exposure. ~1,000 samples and 5 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0251_arsenic-male-lung.csv"
  feature_descriptions:
    group: "Exposure group"
    conc: "Arsenic concentration"
    age: "Age at diagnosis"
    at.risk: "Time at risk"
    events: "Number of events observed"

0255_boston_corrected:
  dataset_description: "Boston Corrected: Variant of Boston Housing with spatial coordinates. 506 samples and 20 attributes. Regression on median home value."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0255_boston_corrected.csv"
  feature_descriptions:
    TOWN: "Town name"
    TOWN_ID: "Town identifier code"
    TRACT: "Census tract identifier"
    LON: "Longitude coordinate"
    LAT: "Latitude coordinate"
    MEDV: "Median home value ($1000s)"
    CMEDV: "Corrected median home value"
    CRIM: "Per capita crime rate by town"
    ZN: "Proportion of residential land zoned for lots >25,000 sq.ft."
    INDUS: "Proportion of non-retail business acres per town"
    CHAS: "Charles River dummy variable (1 if tract bounds river; 0 otherwise)"
    NOX: "Nitric oxides concentration (ppm)"
    RM: "Average number of rooms per dwelling"
    AGE: "Proportion of owner-occupied units built prior to 1940"
    DIS: "Weighted distances to five Boston employment centers"
    RAD: "Index of accessibility to radial highways"
    TAX: "Full-value property-tax rate per $10,000"
    PTRATIO: "Pupil-teacher ratio by town"
    B: "1000(Bk - 0.63)^2 where Bk is proportion Black"
    LSTAT: "% lower status of the population"

0257_sensory:
  dataset_description: "Sensory: Sensory evaluation experiment data. ~100 samples and 12 attributes. Regression on sensory scores."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0257_sensory.csv"
  feature_descriptions:
    Occasion: "Occasion identifier for tasting session"
    Judges: "Judge identifier"
    Interval: "Time interval between samples"
    Sittings: "Number of sittings per judge"
    Position: "Sample presentation position"
    Squares: "Number of sensory squares evaluated"
    Rows: "Number of rows in sensory layout"
    Columns: "Number of columns in sensory layout"
    Halflpot: "Half-plot design indicator"
    Trellis: "Trellis panel design indicator"
    Method: "Sensory evaluation method code"
    Score: "Sensory quality score"
0258_no2:
  dataset_description: "NO2 Monitoring: Urban air quality dataset measuring NO₂ concentrations. Samples collected hourly with meteorological covariates."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0258_no2.csv"
  feature_descriptions:
    cars_per_hour: "Traffic volume (cars per hour)"
    temperature_at_2m: "Air temperature at 2 m above ground (°C)"
    wind_speed: "Wind speed (m/s)"
    temperature_diff_2m_25m: "Temperature difference between 2 m and 25 m heights (°C)"
    wind_direction: "Wind direction (degrees from north)"
    hour_of_day: "Hour of the day (0–23)"
    day: "Day of measurement (1–31)"
    no2_concentration: "NO₂ concentration (µg/m³)"

0259_strikes:
  dataset_description: "Strikes: National labor strike statistics with economic indicators. Annual observations of strike volumes and macroeconomic variables."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0259_strikes.csv"
  feature_descriptions:
    country_code: "ISO country code"
    year: "Calendar year"
    unemployment: "Unemployment rate (%)"
    inflation: "Inflation rate (%)"
    parliamentary_representation: "Seats held by labor parties (%)"
    union_centralization: "Degree of union centralization (index)"
    strike_volume: "Total strike days per 1,000 workers"

0261_analcatdata_michiganacc:
  dataset_description: "Michigan Accidents: Monthly traffic accident counts in Michigan. Records work and seasonality effects."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0261_analcatdata_michiganacc.csv"
  feature_descriptions:
    Season: "Season of the year (Spring, Summer, Fall, Winter)"
    Month: "Month of the year (1–12)"
    Unemployment_rate: "State unemployment rate (%)"
    Accidents: "Number of traffic accidents"

0265_visualizing_galaxy:
  dataset_description: "Galaxy Kinematics: Simulated galaxy rotation curve dataset. Records angular positions and radial velocities."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0265_visualizing_galaxy.csv"
  feature_descriptions:
    northsouth: "North–south coordinate (kpc)"
    angle: "Azimuthal angle (degrees)"
    radialposition: "Radial distance from galactic center (kpc)"
    velocity: "Line-of-sight velocity (km/s)"

0267_bodyfat:
  dataset_description: "Bodyfat: Anthropometric study measuring body fat percentage with body measurements."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0267_bodyfat.csv"
  feature_descriptions:
    Density: "Body density (g/cm³)"
    Age: "Subject age (years)"
    Weight: "Body weight (kg)"
    Height: "Height (cm)"
    Neck: "Neck circumference (cm)"
    Chest: "Chest circumference (cm)"
    Abdomen: "Abdomen circumference (cm)"
    Hip: "Hip circumference (cm)"
    Thigh: "Thigh circumference (cm)"
    Knee: "Knee circumference (cm)"
    Ankle: "Ankle circumference (cm)"
    Biceps: "Biceps circumference (cm)"
    Forearm: "Forearm circumference (cm)"
    Wrist: "Wrist circumference (cm)"
    class: "Body fat category"

0272_kdd_coil_1:
  dataset_description: "KDD COIL-1: Water quality dataset measuring algae counts under flow conditions, series 1."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0272_kdd_coil_1.csv"
  feature_descriptions:
    season: "Season of sampling"
    river_size: "River width indicator"
    fluid_velocity: "Water velocity (m/s)"
    concentration_1: "Contaminant concentration channel 1"
    concentration_2: "Contaminant concentration channel 2"
    concentration_3: "Contaminant concentration channel 3"
    concentration_4: "Contaminant concentration channel 4"
    concentration_5: "Contaminant concentration channel 5"
    concentration_6: "Contaminant concentration channel 6"
    concentration_7: "Contaminant concentration channel 7"
    concentration_8: "Contaminant concentration channel 8"
    algae_1: "Algae count type 1"

0273_kdd_coil_2:
  dataset_description: "KDD COIL-2: Water quality dataset measuring algae counts under flow conditions, series 2."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0273_kdd_coil_2.csv"
  feature_descriptions:
    season: "Season of sampling"
    river_size: "River width indicator"
    fluid_velocity: "Water velocity (m/s)"
    concentration_1: "Contaminant concentration channel 1"
    concentration_2: "Contaminant concentration channel 2"
    concentration_3: "Contaminant concentration channel 3"
    concentration_4: "Contaminant concentration channel 4"
    concentration_5: "Contaminant concentration channel 5"
    concentration_6: "Contaminant concentration channel 6"
    concentration_7: "Contaminant concentration channel 7"
    concentration_8: "Contaminant concentration channel 8"
    algae_2: "Algae count type 2"

0274_kdd_coil_3:
  dataset_description: "KDD COIL-3: Water quality dataset measuring algae counts under flow conditions, series 3."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0274_kdd_coil_3.csv"
  feature_descriptions:
    season: "Season of sampling"
    river_size: "River width indicator"
    fluid_velocity: "Water velocity (m/s)"
    concentration_1: "Contaminant concentration channel 1"
    concentration_2: "Contaminant concentration channel 2"
    concentration_3: "Contaminant concentration channel 3"
    concentration_4: "Contaminant concentration channel 4"
    concentration_5: "Contaminant concentration channel 5"
    concentration_6: "Contaminant concentration channel 6"
    concentration_7: "Contaminant concentration channel 7"
    concentration_8: "Contaminant concentration channel 8"
    algae_3: "Algae count type 3"

0278_kdd_coil_4:
  dataset_description: "KDD COIL-4: Water quality dataset measuring algae counts under flow conditions, series 4."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0278_kdd_coil_4.csv"
  feature_descriptions:
    season: "Season of sampling"
    river_size: "River width indicator"
    fluid_velocity: "Water velocity (m/s)"
    concentration_1: "Contaminant concentration channel 1"
    concentration_2: "Contaminant concentration channel 2"
    concentration_3: "Contaminant concentration channel 3"
    concentration_4: "Contaminant concentration channel 4"
    concentration_5: "Contaminant concentration channel 5"
    concentration_6: "Contaminant concentration channel 6"
    concentration_7: "Contaminant concentration channel 7"
    concentration_8: "Contaminant concentration channel 8"
    algae_4: "Algae count type 4"

0279_kdd_coil_5:
  dataset_description: "KDD COIL-5: Water quality dataset measuring algae counts under flow conditions, series 5."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0279_kdd_coil_5.csv"
  feature_descriptions:
    season: "Season of sampling"
    river_size: "River width indicator"
    fluid_velocity: "Water velocity (m/s)"
    concentration_1: "Contaminant concentration channel 1"
    concentration_2: "Contaminant concentration channel 2"
    concentration_3: "Contaminant concentration channel 3"
    concentration_4: "Contaminant concentration channel 4"
    concentration_5: "Contaminant concentration channel 5"
    concentration_6: "Contaminant concentration channel 6"
    concentration_7: "Contaminant concentration channel 7"
    concentration_8: "Contaminant concentration channel 8"
    algae_5: "Algae count type 5"

0280_kdd_coil_6:
  dataset_description: "KDD COIL-6: Water quality dataset measuring algae counts under flow conditions, series 6."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0280_kdd_coil_6.csv"
  feature_descriptions:
    season: "Season of sampling"
    river_size: "River width indicator"
    fluid_velocity: "Water velocity (m/s)"
    concentration_1: "Contaminant concentration channel 1"
    concentration_2: "Contaminant concentration channel 2"
    concentration_3: "Contaminant concentration channel 3"
    concentration_4: "Contaminant concentration channel 4"
    concentration_5: "Contaminant concentration channel 5"
    concentration_6: "Contaminant concentration channel 6"
    concentration_7: "Contaminant concentration channel 7"
    concentration_8: "Contaminant concentration channel 8"
    algae_6: "Algae count type 6"

0281_kdd_coil_7:
  dataset_description: "KDD COIL-7: Water quality dataset measuring algae counts under flow conditions, series 7."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0281_kdd_coil_7.csv"
  feature_descriptions:
    season: "Season of sampling"
    river_size: "River width indicator"
    fluid_velocity: "Water velocity (m/s)"
    concentration_1: "Contaminant concentration channel 1"
    concentration_2: "Contaminant concentration channel 2"
    concentration_3: "Contaminant concentration channel 3"
    concentration_4: "Contaminant concentration channel 4"
    concentration_5: "Contaminant concentration channel 5"
    concentration_6: "Contaminant concentration channel 6"
    concentration_7: "Contaminant concentration channel 7"
    concentration_8: "Contaminant concentration channel 8"
    algae_7: "Algae count type 7"

0284_bank8FM:
  dataset_description: "Bank8FM: Statlog variant with 8 features for binary classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0284_bank8FM.csv"
  feature_descriptions:
    a1cx: "Attribute 1 X-channel"
    a1cy: "Attribute 1 Y-channel"
    b2x: "Band 2 X-channel"
    b2y: "Band 2 Y-channel"
    a2pop: "Attribute 2 population measure"
    a3pop: "Attribute 3 population measure"
    temp: "Temperature metric"
    m1xl: "Measurement 1 level"
    binaryClass: "Binary class label"

0290_machine_cpu:
  dataset_description: "Machine CPU (Binary): CPU performance dataset reformulated as binary classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0290_machine_cpu.csv"
  feature_descriptions:
    MYCT: "Machine cycle time (ns)"
    MMIN: "Minimum main memory (KB)"
    MMAX: "Maximum main memory (KB)"
    CACH: "Cache size (KB)"
    CHMIN: "Minimum channels"
    CHMAX: "Maximum channels"
    binaryClass: "High/low performance label"

0291_ailerons:
  dataset_description: "Ailerons: Flight control dataset of aircraft aileron positions vs control surface deflections."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0291_ailerons.csv"
  feature_descriptions:
    ClimbRate: "Rate of climb (m/s)"
    Sgz: "Gyroscope z-axis reading"
    p: "Roll rate (rad/s)"
    q: "Pitch rate (rad/s)"
    curPitch: "Current pitch angle (rad)"
    curRoll: "Current roll angle (rad)"
    absRoll: "Absolute roll angle (rad)"
    diffClb: "Climb rate change"
    diffRollRate: "Roll rate change"
    diffDiffClb: "Second derivative of climb rate"
    SeTime1–SeTime14: "Sensor timestamps 1–14"
    alpha: "Angle of attack (rad)"
    Se: "Sensor measurement"
    binaryClass: "Binary event indicator"

0292_cpu_small:
  dataset_description: "CPU Small (Binary): Small-scale CPU dataset with binary classification target."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0292_cpu_small.csv"
  feature_descriptions:
    iread: "Instruction read count"
    iwrite: "Instruction write count"
    scall: "System call count"
    sread: "System read count"
    swrite: "System write count"
    fork: "Fork operation count"
    exec: "Exec operation count"
    rchar: "Read char count"
    wchar: "Write char count"
    runqsz: "Run queue size"
    freemem: "Free memory (KB)"
    freeswap: "Free swap (KB)"
    binaryClass: "Binary performance class"

0303_servo:
  dataset_description: "Servo: Robot arm servo control dataset mapping motor commands to valve positions."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0303_servo.csv"
  feature_descriptions:
    motor: "Motor command signal"
    screw: "Screw position"
    pgain: "Proportional gain"
    vgain: "Velocity gain"
    binaryClass: "Binary control outcome"

0312_cpu_act:
  dataset_description: "CPU ACT: CPU activity logs with binary performance classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0312_cpu_act.csv"
  feature_descriptions:
    tread: "Thread read count"
    twrite: "Thread write count"
    scall: "System call count"
    sread: "System read count"
    swrite: "System write count"
    fork: "Fork operation count"
    exec: "Exec operation count"
    rchar: "Read char count"
    wchar: "Write char count"
    pgout: "Page-outs"
    ppgin: "Page-ins"
    pgfree: "Pages freed"
    pgscan: "Pages scanned"
    atch: "Attach operations"
    pgin: "Page-in operations"
    ppgin: "Program page-in operations"
    pflt: "Page faults"
    vflt: "Virtual faults"
    runqsz: "Run queue size"
    freemem: "Free memory (KB)"
    freeswap: "Free swap (KB)"
    binaryClass: "Binary classification label"

0333_cleveland:
  dataset_description: "Cleveland Heart (Binary): Cleveland Heart Disease dataset with binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0333_cleveland.csv"
  feature_descriptions:
    age: "Age in years"
    sex: "Gender (1 = male; 0 = female)"
    cp: "Chest pain type (1–4)"
    trestbps: "Resting blood pressure (mm Hg)"
    chol: "Serum cholesterol (mg/dl)"
    fbs: "Fasting blood sugar > 120 mg/dl"
    restecg: "Resting ECG results (0–2)"
    thalach: "Max heart rate achieved"
    exang: "Exercise induced angina"
    oldpeak: "ST depression induced by exercise"
    slope: "Slope of peak exercise ST segment"
    ca: "Number of major vessels"
    thal: "Thalassemia type"
    binaryClass: "Heart disease presence (yes/no)"

0342_cholesterol:
  dataset_description: "Cholesterol (Binary): Clinical cholesterol dataset repurposed as binary classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0342_cholesterol.csv"
  feature_descriptions:
    age: "Age in years"
    sex: "Gender (1 = male; 0 = female)"
    cp: "Chest pain type"
    trestbps: "Resting blood pressure (mm Hg)"
    fbs: "Fasting blood sugar > 120 mg/dl"
    restecg: "Resting ECG results"
    thalach: "Max heart rate achieved"
    exang: "Exercise induced angina"
    oldpeak: "ST depression induced by exercise"
    slope: "Slope of peak ST segment"
    ca: "Number of major vessels"
    thal: "Thalassemia type"
    num: "Number of incidents"
    binaryClass: "High cholesterol indicator"

0345_delta_ailerons:
  dataset_description: "Delta Ailerons: Time-series dataset of aircraft roll, pitch and yaw rate changes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0345_delta_ailerons.csv"
  feature_descriptions:
    RollRate: "Roll rate (rad/s)"
    PitchRate: "Pitch rate (rad/s)"
    currPitch: "Current pitch angle (rad)"
    currRoll: "Current roll angle (rad)"
    diffRollRate: "Change in roll rate"
    binaryClass: "Binary control event"

0356_delta_elevators:
  dataset_description: "Delta Elevators: Time-series regression of elevator vibration rate changes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0356_delta_elevators.csv"
  feature_descriptions:
    climbRate: "Elevation change rate (m/s)"
    Altitude: "Altitude (m)"
    RollRate: "Roll rate (rad/s)"
    currRoll: "Current roll angle (rad)"
    diffClb: "Change in climb rate"
    diffDiffClb: "Second derivative of climb rate"
    binaryClass: "Binary vibration anomaly indicator"
0364_sleuth_case2002:
  dataset_description: "Sleuth Case 2002: Financial fraud detection case dataset. Contains 2002 case summaries with 7 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0364_sleuth_case2002.csv"
  feature_descriptions:
    FM: "Financial misconduct flag"
    LC: "Legal counsel involvement indicator"
    BK: "Bankruptcy filing indicator"
    SS: "Suspicious stock sale indicator"
    AG: "Attorney general investigation flag"
    YR: "Year of the case"
    CD: "Case docket identifier"

0365_rmftsa_ladata:
  dataset_description: "LA Data Mortality: Los Angeles respiratory/cardiovascular mortality and pollution time series."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0365_rmftsa_ladata.csv"
  feature_descriptions:
    Total_Mortality: "Total daily deaths"
    Cardiovascular_Mortality: "Daily cardiovascular deaths"
    Temperature: "Average daily temperature (°C)"
    Relative_Humidity: "Average daily relative humidity (%)"
    Carbon_Monoxide: "Daily CO concentration (ppm)"
    Sulfur_Dioxide: "Daily SO₂ concentration (ppb)"
    LASHumway: "Lagged SO₂ variable (Humway)"
    Nitrogen_Dioxide: "Daily NO₂ concentration (ppb)"
    Hydrocarbons: "Daily hydrocarbon concentration (ppb)"
    Ozone: "Daily O₃ concentration (ppb)"
    Particulates: "Daily particulate matter concentration (µg/m³)"
    Respiratory_Mortality: "Daily respiratory deaths"

0369_bank32nh:
  dataset_description: "Bank32nh: Statlog (Heart) variant with 32 numeric predictors and binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0369_bank32nh.csv"
  feature_descriptions:
    a1cx: "Attribute 1 X-channel"
    a1cy: "Attribute 1 Y-channel"
    a1sx: "Attribute 1 secondary X-channel"
    a1sy: "Attribute 1 secondary Y-channel"
    a1rho: "Attribute 1 correlation measure"
    a1pop: "Attribute 1 probability density"
    a2cx: "Attribute 2 X-channel"
    a2cy: "Attribute 2 Y-channel"
    a2sx: "Attribute 2 secondary X-channel"
    a2sy: "Attribute 2 secondary Y-channel"
    a2rho: "Attribute 2 correlation measure"
    a2pop: "Attribute 2 probability density"
    a3cx: "Attribute 3 X-channel"
    a3cy: "Attribute 3 Y-channel"
    a3sx: "Attribute 3 secondary X-channel"
    a3sy: "Attribute 3 secondary Y-channel"
    a3rho: "Attribute 3 correlation measure"
    a3pop: "Attribute 3 probability density"
    temp: "Ambient temperature (°C)"
    b1x: "Band 1 X-measure"
    b1y: "Band 1 Y-measure"
    b1call: "Band 1 call indicator"
    b1eff: "Band 1 effectiveness"
    b2x: "Band 2 X-measure"
    b2y: "Band 2 Y-measure"
    b2call: "Band 2 call indicator"
    b2eff: "Band 2 effectiveness"
    b3x: "Band 3 X-measure"
    b3y: "Band 3 Y-measure"
    b3call: "Band 3 call indicator"
    b3eff: "Band 3 effectiveness"
    mxql: "Maximum queue length"
    binaryClass: "Binary target label"

0371_visualizing_environmental:
  dataset_description: "Environmental Visualizing: Ozone, radiation, temperature, and wind measurements for visualization tasks."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0371_visualizing_environmental.csv"
  feature_descriptions:
    ozone: "Ozone concentration (ppb)"
    radiation: "Solar radiation (Langley)"
    temperature: "Air temperature (°C)"
    wind: "Wind speed (m/s)"

0378_wind:
  dataset_description: "Wind (Binary): Daily wind speed measurements across Irish stations with binary classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0378_wind.csv"
  feature_descriptions:
    year: "Observation year"
    month: "Observation month"
    day: "Observation day"
    RPT: "Wind at Rosslare station"
    VAL: "Wind at Valentia station"
    ROS: "Wind at Rosslare Harbour"
    KIL: "Wind at Kilkeel"
    SHA: "Wind at Shannon"
    BIR: "Wind at Birr"
    DUB: "Wind at Dublin"
    CLA: "Wind at Claremorris"
    MUL: "Wind at Mullingar"
    CLO: "Wind at Clonmel"
    BEL: "Wind at Belfast"
    binaryClass: "High/low wind speed label"

0386_fruitfly:
  dataset_description: "Fruitfly (Binary): Fruit fly mating success dataset with binary target."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0386_fruitfly.csv"
  feature_descriptions:
    PARTNERS: "Number of prior mating partners"
    TYPE: "Genetic strain type"
    THORAX: "Thorax length (mm)"
    SLEEP: "Sleep duration (hours/day)"
    binaryClass: "Mating success (yes/no)"

0389_rmftsa_ladata:
  dataset_description: "LA Data (Binary): Los Angeles mortality and pollution dataset with binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0389_rmftsa_ladata.csv"
  feature_descriptions:
    Total_Mortality: "Total daily deaths"
    Respiratory_Mortality: "Daily respiratory deaths"
    Cardiovascular_Mortality: "Daily cardiovascular deaths"
    Temperature: "Average temperature (°C)"
    Relative_Humidity: "Relative humidity (%)"
    Carbon_Monoxide: "CO concentration (ppm)"
    Sulfur_Dioxide: "SO₂ concentration (ppb)"
    LASHumway: "Lagged SO₂ variable"
    Nitrogen_Dioxide: "NO₂ concentration (ppb)"
    Hydrocarbons: "Hydrocarbon concentration (ppb)"
    Ozone: "O₃ concentration (ppb)"
    binaryClass: "Binary mortality risk label"

0391_veteran:
  dataset_description: "Veteran (Binary): Lung cancer survival study with binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0391_veteran.csv"
  feature_descriptions:
    treatment: "Treatment code"
    celltype: "Cancer cell type"
    status: "Survival status (alive/dead)"
    karnofsky: "Performance score"
    months: "Survival time (months)"
    age: "Patient age"
    therapy: "Prior therapy indicator"
    binaryClass: "High/low survival risk"

0396_analcatdata_vineyard:
  dataset_description: "Vineyard (Binary): Grape yield study with binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0396_analcatdata_vineyard.csv"
  feature_descriptions:
    Year: "Harvest year"
    Row: "Vineyard row"
    Group: "Treatment group"
    binaryClass: "High/low yield indicator"

0400_analcatdata_supreme:
  dataset_description: "Supreme Court Cases: Dataset of SCOTUS decisions with binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0400_analcatdata_supreme.csv"
  feature_descriptions:
    Actions_taken: "Number of actions taken"
    Liberal: "Liberal vote count"
    Unconstitutional: "Unconstitutional vote count"
    Precedent_alteration: "Precedent alteration count"
    Unanimous: "Unanimous decision flag"
    Year_of_decision: "Decision year"
    Lower_court_disagreement: "Lower court disagreement count"
    binaryClass: "Affirm/reverse indicator"

0406_visualizing_environmental:
  dataset_description: "Environmental Visualizing (Binary): Ozone, radiation, temperature with binary classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0406_visualizing_environmental.csv"
  feature_descriptions:
    ozone: "Ozone concentration (ppb)"
    radiation: "Solar radiation (Langley)"
    temperature: "Air temperature (°C)"
    binaryClass: "High/low pollution label"

0407_space_ga:
  dataset_description: "Space GA (Binary): Georgia county election dataset converted to binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0407_space_ga.csv"
  feature_descriptions:
    ln(VOTES/POP): "Log(votes/population)"
    POP: "Population"
    EDUCATION: "Education level (%)"
    HOUSES: "Housing units"
    INCOME: "Median income"
    XCOORD: "X coordinate of county center"
    binaryClass: "High/low turnout label"

0408_pharynx:
  dataset_description: "Pharynx (Binary): Pharyngeal cancer dataset with binary survival outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0408_pharynx.csv"
  feature_descriptions:
    Inst: "Institution code"
    sex: "Patient sex (M/F)"
    Treatment: "Treatment code"
    Grade: "Tumor grade"
    Age: "Patient age"
    Condition: "Preoperative condition"
    Site: "Tumor site"
    T: "Tumor size/stage"
    N: "Lymph node involvement"
    Status: "Postoperative status"
    binaryClass: "High/low survival risk"

0414_auto_price:
  dataset_description: "Auto Price (Binary): Automobile pricing dataset with binary price category."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0414_auto_price.csv"
  feature_descriptions:
    symboling: "Risk factor rating"
    normalized-losses: "Normalized losses"
    wheel-base: "Wheelbase (inches)"
    length: "Length (inches)"
    width: "Width (inches)"
    height: "Height (inches)"
    curb-weight: "Curb weight (lbs)"
    engine-size: "Engine displacement (cc)"
    bore: "Cylinder bore (inches)"
    stroke: "Piston stroke (inches)"
    compression-ratio: "Compression ratio"
    horsepower: "Horsepower"
    peak-rpm: "Peak RPM"
    city-mpg: "City MPG"
    highway-mpg: "Highway MPG"
    binaryClass: "High/low price category"

0417_analcatdata_wildcat:
  dataset_description: "Wildcat (Binary): Labor strike dataset with binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0417_analcatdata_wildcat.csv"
  feature_descriptions:
    Grievances: "Number of grievances"
    Rotate: "Rotation code"
    Union: "Union membership %"
    Workforce: "Number of workers"
    Log_workforce: "Log of workforce size"
    binaryClass: "Strike/no-strike label"

0419_pm10:
  dataset_description: "PM10 (Binary): Urban particulate monitoring with binary threshold classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0419_pm10.csv"
  feature_descriptions:
    pm10_concentration: "PM10 concentration (µg/m³)"
    cars_per_hour: "Traffic volume"
    temperature_at_2m: "Temperature at 2m"
    wind_speed: "Wind speed"
    temperature_diff_2m_25m: "Temp difference 2m–25m"
    wind_direction: "Wind direction"
    hour_of_day: "Hour of day"
    binaryClass: "Above/below threshold"

0424_autoPrice:
  dataset_description: "AutoPrice (Binary): Alternate auto price dataset with binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0424_autoPrice.csv"
  feature_descriptions:
    symboling: "Risk factor rating"
    normalized-losses: "Normalized losses"
    wheel-base: "Wheelbase"
    length: "Length"
    width: "Width"
    height: "Height"
    curb-weight: "Curb weight"
    engine-size: "Engine size"
    bore: "Bore"
    stroke: "Stroke"
    compression-ratio: "Compression ratio"
    horsepower: "Horsepower"
    peak-rpm: "Peak RPM"
    city-mpg: "City MPG"
    highway-mpg: "Highway MPG"
    binaryClass: "High/low price"

0431_visualizing_soil:
  dataset_description: "Soil Visualizing (Binary): Soil resistivity survey with binary classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0431_visualizing_soil.csv"
  feature_descriptions:
    northing: "Northing coordinate"
    easting: "Easting coordinate"
    resistivity: "Soil resistivity (Ω·m)"
    isns: "In-phase signal"
    binaryClass: "High/low resistivity label"

0433_visualizing_galaxy:
  dataset_description: "Galaxy Visualizing (Binary): Radial positions and angles of simulated galaxies with binary classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0433_visualizing_galaxy.csv"
  feature_descriptions:
    northsouth: "North–south position"
    angle: "Azimuthal angle"
    radialposition: "Radial distance"
    binaryClass: "Galaxy type label"

0435_strikes:
  dataset_description: "Strikes (Binary): Labor strike dataset with binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0435_strikes.csv"
  feature_descriptions:
    country_code: "Country code"
    year: "Year"
    strike_volume: "Strike days per 1,000 workers"
    unemployment: "Unemployment rate"
    inflation: "Inflation rate"
    parliamentary_representation: "Labor party seats (%)"
    binaryClass: "High/low strike volume"

0436_analcatdata_michiganacc:
  dataset_description: "Michigan Accidents (Binary): State accident study with binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0436_analcatdata_michiganacc.csv"
  feature_descriptions:
    Season: "Season"
    Month: "Month"
    Unemployment_rate: "Unemployment rate"
    binaryClass: "High/low accident count"

0437_quake:
  dataset_description: "Quake: Seismic data with binary classification of earthquake occurrence."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0437_quake.csv"
  feature_descriptions:
    focal_depth: "Earthquake focal depth (km)"
    latitude: "Epicenter latitude"
    longitude: "Epicenter longitude"
    binaryClass: "Quake/no-quake indicator"
0443_lowbwt:
  dataset_description: "Low Birthweight (Binary): Medical dataset of neonatal and maternal features. 189 samples and 10 attributes. Classifies low birth weight risk."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0443_lowbwt.csv"
  feature_descriptions:
    LOW: "Low birth weight indicator (1 if <2500 g; 0 otherwise)"
    AGE: "Mother’s age in years"
    LWT: "Mother’s weight at last menstrual period (lbs)"
    RACE: "Mother’s race category (1=White,2=Black,3=Other)"
    SMOKE: "Smoking during pregnancy (1=yes; 0=no)"
    PTL: "Number of previous preterm labors"
    HT: "History of hypertension (1=yes; 0=no)"
    UI: "Uterine irritability (1=yes; 0=no)"
    FTV: "Physician visits in first trimester"
    binaryClass: "Low birth weight class (same as LOW)"

0445_arsenic_male_bladder:
  dataset_description: "Arsenic Male Bladder (Binary): Survival study of male bladder cancer with arsenic exposure. ~1,000 samples and 5 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0445_arsenic-male-bladder.csv"
  feature_descriptions:
    group: "Exposure group code"
    conc: "Arsenic concentration (µg/L)"
    age: "Age at diagnosis (years)"
    at.risk: "Time at risk (years)"
    binaryClass: "Event occurred (1=yes; 0=no)"

0446_arsenic_female_bladder:
  dataset_description: "Arsenic Female Bladder (Binary): Survival study of female bladder cancer with arsenic exposure. ~1,000 samples and 5 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0446_newton_hema.csv"
  feature_descriptions:
    group: "Exposure group code"
    conc: "Arsenic concentration (µg/L)"
    age: "Age at diagnosis (years)"
    at.risk: "Time at risk (years)"
    binaryClass: "Event occurred (1=yes; 0=no)"

0446_newton_hema:
  dataset_description: "Newton Hema (Binary): Hematology study of newborns with binary outcome. ~200 samples and 4 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0446_newton_hema.csv"
  feature_descriptions:
    id: "Sample identifier"
    weeks: "Gestational age (weeks)"
    cells_percentage: "Percentage of target blood cells"
    binaryClass: "Condition flag (1=abnormal; 0=normal)"

0447_arsenic_female_lung:
  dataset_description: "Arsenic Female Lung (Binary): Survival study of female lung cancer with arsenic exposure. ~1,000 samples and 5 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0447_arsenic-female-lung.csv"
  feature_descriptions:
    group: "Exposure group code"
    conc: "Arsenic concentration (µg/L)"
    age: "Age at diagnosis (years)"
    at.risk: "Time at risk (years)"
    binaryClass: "Event occurred (1=yes; 0=no)"

0448_arsenic_male_lung:
  dataset_description: "Arsenic Male Lung (Binary): Survival study of male lung cancer with arsenic exposure. ~1,000 samples and 5 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0448_arsenic-male-lung.csv"
  feature_descriptions:
    group: "Exposure group code"
    conc: "Arsenic concentration (µg/L)"
    age: "Age at diagnosis (years)"
    at.risk: "Time at risk (years)"
    binaryClass: "Event occurred (1=yes; 0=no)"

0448_triazines:
  dataset_description: "Triazines (Binary): Herbicide activity dataset with binary classification of active vs. inactive. 186 samples and 61 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0448_arsenic-male-lung.csv"
  feature_descriptions:
    p1_polar: "Polar surface area of position 1"
    p1_size: "Molecular size descriptor at position 1"
    p1_flex: "Flexibility descriptor at position 1"
    p1_h_doner: "H-bond donor count at position 1"
    p1_h_acceptor: "H-bond acceptor count at position 1"
    p1_pi_doner: "π-electron donor count at position 1"
    p1_pi_acceptor: "π-electron acceptor count at position 1"
    p1_polarisable: "Polarizability at position 1"
    p1_sigma: "Sigma electron descriptor at position 1"
    p1_branch: "Branching descriptor at position 1"
    p2_polar: "Polar surface area of position 2"
    p2_size: "Molecular size descriptor at position 2"
    p2_flex: "Flexibility descriptor at position 2"
    p2_h_doner: "H-bond donor count at position 2"
    p2_h_acceptor: "H-bond acceptor count at position 2"
    p2_pi_doner: "π-electron donor count at position 2"
    p2_pi_acceptor: "π-electron acceptor count at position 2"
    p2_polarisable: "Polarizability at position 2"
    p2_sigma: "Sigma electron descriptor at position 2"
    p2_branch: "Branching descriptor at position 2"
    p3_polar: "Polar surface area of position 3"
    p3_size: "Molecular size descriptor at position 3"
    p3_flex: "Flexibility descriptor at position 3"
    p3_h_doner: "H-bond donor count at position 3"
    p3_h_acceptor: "H-bond acceptor count at position 3"
    p3_pi_doner: "π-electron donor count at position 3"
    p3_pi_acceptor: "π-electron acceptor count at position 3"
    p3_polarisable: "Polarizability at position 3"
    p3_sigma: "Sigma electron descriptor at position 3"
    p3_branch: "Branching descriptor at position 3"
    p4_polar: "Polar surface area of position 4"
    p4_size: "Molecular size descriptor at position 4"
    p4_flex: "Flexibility descriptor at position 4"
    p4_h_doner: "H-bond donor count at position 4"
    p4_h_acceptor: "H-bond acceptor count at position 4"
    p4_pi_doner: "π-electron donor count at position 4"
    p4_pi_acceptor: "π-electron acceptor count at position 4"
    p4_polarisable: "Polarizability at position 4"
    p4_sigma: "Sigma electron descriptor at position 4"
    p4_branch: "Branching descriptor at position 4"
    p5_polar: "Polar surface area of position 5"
    p5_size: "Molecular size descriptor at position 5"
    p5_flex: "Flexibility descriptor at position 5"
    p5_h_doner: "H-bond donor count at position 5"
    p5_h_acceptor: "H-bond acceptor count at position 5"
    p5_pi_doner: "π-electron donor count at position 5"
    p5_pi_acceptor: "π-electron acceptor count at position 5"
    p5_polarisable: "Polarizability at position 5"
    p5_sigma: "Sigma electron descriptor at position 5"
    p5_branch: "Branching descriptor at position 5"
    p6_polar: "Polar surface area of position 6"
    p6_size: "Molecular size descriptor at position 6"
    p6_flex: "Flexibility descriptor at position 6"
    p6_h_doner: "H-bond donor count at position 6"
    p6_h_acceptor: "H-bond acceptor count at position 6"
    p6_pi_doner: "π-electron donor count at position 6"
    p6_pi_acceptor: "π-electron acceptor count at position 6"
    p6_polarisable: "Polarizability at position 6"
    p6_sigma: "Sigma electron descriptor at position 6"
    p6_branch: "Branching descriptor at position 6"
    binaryClass: "Active vs. inactive herbicide (1=active; 0=inactive)"

0459_pbcseq:
  dataset_description: "PBCseq (Binary): Longitudinal primary biliary cirrhosis study with binary outcome. ~200 samples and 19 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0459_pbcseq.csv"
  feature_descriptions:
    case_number: "Patient case identifier"
    number_of_days: "Days since enrollment"
    status: "Survival status code"
    drug: "Drug treatment code"
    age: "Patient age"
    sex: "Patient sex (M/F)"
    day: "Study day"
    presence_of_ascites: "Ascites present (1=yes; 0=no)"
    presence_of_hepatomegaly: "Hepatomegaly present (1=yes; 0=no)"
    presence_of_spiders: "Spider angiomas present (1=yes; 0=no)"
    presence_of_edema: "Edema present (1=yes; 0=no)"
    serum_bilirubin: "Serum bilirubin (mg/dL)"
    serum_cholesterol: "Serum cholesterol (mg/dL)"
    albumin: "Serum albumin (g/dL)"
    alkaline_phosphate: "Alkaline phosphatase (IU/L)"
    SGOT: "SGOT enzyme level (IU/L)"
    platelets: "Platelet count (×10³/mm³)"
    prothrombin_time: "Prothrombin time (seconds)"
    binaryClass: "High/low risk of progression (1=high; 0=low)"

0469_heart_c:
  dataset_description: "Heart C (Binary): Cleveland Heart Disease dataset repurposed as binary classification. 303 samples and 14 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0469_heart-c.csv"
  feature_descriptions:
    age: "Age in years"
    sex: "Gender (1=male; 0=female)"
    cp: "Chest pain type (1–4)"
    trestbps: "Resting blood pressure (mm Hg)"
    chol: "Serum cholesterol (mg/dl)"
    fbs: "Fasting blood sugar >120 mg/dl (1=yes; 0=no)"
    restecg: "Resting ECG results (0–2)"
    thalach: "Max heart rate achieved"
    exang: "Exercise‐induced angina (1=yes; 0=no)"
    oldpeak: "ST depression induced by exercise"
    slope: "Slope of peak exercise ST segment (1–3)"
    ca: "Number of major vessels colored by fluoroscopy (0–3)"
    thal: "Thalassemia type (3=normal; 6=fixed defect; 7=reversible defect)"
    binaryClass: "Heart disease presence (1=yes; 0=no)"

0472_analcatdata_marketing:
  dataset_description: "Marketing: Analcatdata direct mail marketing dataset with binary response. ~1,000 samples and 39 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0472_analcatdata_marketing.csv"
  feature_descriptions:
    X1a: "Demographic attribute 1a"
    X1b: "Demographic attribute 1b"
    X1c: "Demographic attribute 1c"
    X1d: "Demographic attribute 1d"
    X1e: "Demographic attribute 1e"
    X1f: "Demographic attribute 1f"
    X1g: "Demographic attribute 1g"
    X1h: "Demographic attribute 1h"
    X1i: "Demographic attribute 1i"
    X1j: "Demographic attribute 1j"
    X1k: "Demographic attribute 1k"
    X1l: "Demographic attribute 1l"
    X1m: "Demographic attribute 1m"
    X1n: "Demographic attribute 1n"
    X1o: "Demographic attribute 1o"
    X1p: "Demographic attribute 1p"
    X1q: "Demographic attribute 1q"
    X1r: "Demographic attribute 1r"
    X1s: "Demographic attribute 1s"
    X1t: "Demographic attribute 1t"
    X1u: "Demographic attribute 1u"
    X1v: "Demographic attribute 1v"
    X1w: "Demographic attribute 1w"
    X1x: "Demographic attribute 1x"
    X2a: "Demographic attribute 2a"
    X2b: "Demographic attribute 2b"
    X2c: "Demographic attribute 2c"
    X2d: "Demographic attribute 2d"
    X2e: "Demographic attribute 2e"
    X2f: "Demographic attribute 2f"
    X2g: "Demographic attribute 2g"
    X2h: "Demographic attribute 2h"
    X2i: "Demographic attribute 2i"
    X2j: "Demographic attribute 2j"
    X2k: "Demographic attribute 2k"
    X2l: "Demographic attribute 2l"
    X2m: "Demographic attribute 2m"
    X3a: "Demographic attribute 3a"
    X3b: "Demographic attribute 3b"
    X3c: "Demographic attribute 3c"
    X5:  "Demographic attribute 5"
    binaryClass: "Mail response (1=responded; 0=not responded)"

0474_houses:
  dataset_description: "Houses (Binary): Census tract housing data with binary classification of high vs. low median house value."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0474_houses.csv"
  feature_descriptions:
    median_house_value: "Median house value ($)"
    median_income: "Median household income"
    housing_median_age: "Median age of housing stock"
    total_rooms: "Total number of rooms"
    total_bedrooms: "Total number of bedrooms"
    population: "Population of the tract"
    households: "Number of households"
    latitude: "Latitude coordinate"
    binaryClass: "High/low house value label"

0476_boston_corrected:
  dataset_description: "Boston Corrected (Binary): Enhanced Boston Housing with binary high/low value classification. 506 samples and 20 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0476_boston_corrected.csv"
  feature_descriptions:
    TOWN: "Town name"
    TOWN_ID: "Town identifier code"
    TRACT: "Census tract identifier"
    LON: "Longitude coordinate"
    LAT: "Latitude coordinate"
    MEDV: "Median value of homes ($1000s)"
    CMEDV: "Corrected median value ($1000s)"
    CRIM: "Crime rate per capita"
    ZN: "Residential land zoned (%)"
    INDUS: "Non-retail business acres (%)"
    CHAS: "Adjacent to Charles River (1=yes; 0=no)"
    NOX: "Nitric oxides concentration (ppm)"
    RM: "Average rooms per dwelling"
    AGE: "Units built prior to 1940 (%)"
    DIS: "Weighted distance to employment centers"
    RAD: "Accessibility to highways index"
    TAX: "Property-tax rate per $10,000"
    PTRATIO: "Pupil-teacher ratio"
    B: "1000(Bk - 0.63)² proportion Black"
    binaryClass: "Home value high/low (1=high; 0=low)"

0477_sensory:
  dataset_description: "Sensory (Binary): Sensory evaluation experiment data with binary quality outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0477_sensory.csv"
  feature_descriptions:
    Occasion: "Tasting occasion identifier"
    Judges: "Judge identifier"
    Interval: "Time interval between samples"
    Sittings: "Number of sittings per judge"
    Position: "Sample presentation position"
    Squares: "Number of test squares"
    Rows: "Rows in tasting layout"
    Columns: "Columns in tasting layout"
    Halfplot: "Half-plot design flag (1=yes; 0=no)"
    Trellis: "Trellis panel design flag (1=yes; 0=no)"
    Method: "Evaluation method code"
    binaryClass: "Quality pass/fail label"

0482_autoMpg:
  dataset_description: "AutoMPG (Binary): Automobile performance dataset with binary fuel efficiency classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0482_autoMpg.csv"
  feature_descriptions:
    cylinders: "Number of cylinders"
    displacement: "Engine displacement (cc)"
    horsepower: "Engine horsepower"
    weight: "Vehicle weight (lbs)"
    acceleration: "0–60 mph acceleration time (s)"
    model: "Model year"
    origin: "Country of origin code"
    binaryClass: "High/low MPG label"

0486_analcatdata_dmft:
  dataset_description: "DMFT (Binary): Dental caries study measuring Decayed/Missing/Filled Teeth over time."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0486_analcatdata_dmft.csv"
  feature_descriptions:
    DMFT.Begin: "DMFT score at baseline"
    DMFT.End: "DMFT score at follow-up"
    Gender: "Subject gender (M/F)"
    Ethnic: "Ethnicity code"
    binaryClass: "High/low caries progression label"

0488_autoHorse:
  dataset_description: "AutoHorse (Binary): Extended automobile dataset combining price and performance, with binary price category. ~1,000 samples and 26 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0488_autoHorse.csv"
  feature_descriptions:
    symboling: "Risk factor rating"
    normalized-losses: "Normalized insurance losses"
    make: "Manufacturer name"
    fuel-type: "Fuel type (gas/diesel)"
    aspiration: "Aspiration (std/turbo)"
    num-of-doors: "Number of doors"
    body-style: "Body style"
    drive-wheels: "Drive wheels configuration"
    engine-location: "Engine location (front/rear)"
    wheel-base: "Wheelbase (inches)"
    length: "Length (inches)"
    width: "Width (inches)"
    height: "Height (inches)"
    curb-weight: "Curb weight (lbs)"
    engine-type: "Engine type"
    num-of-cylinders: "Number of cylinders"
    engine-size: "Engine displacement (cc)"
    fuel-system: "Fuel system"
    bore: "Cylinder bore (inches)"
    stroke: "Piston stroke (inches)"
    compression-ratio: "Compression ratio"
    peak-rpm: "Peak RPM"
    city-mpg: "City fuel economy (mpg)"
    highway-mpg: "Highway fuel economy (mpg)"
    price: "Market price (USD)"
    binaryClass: "High/low price label"

0491_breastTumor:
  dataset_description: "Breast Tumor (Binary): Breast cancer recurrence dataset with binary recurrence outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0491_breastTumor.csv"
  feature_descriptions:
    age: "Age group of patient"
    menopause: "Menopause status"
    inv-nodes: "Number of positive lymph nodes"
    node-caps: "Node capsule present (yes/no)"
    deg-malig: "Degree of malignancy (1–3)"
    breast: "Breast side (left/right)"
    breast-quad: "Tumor quadrant"
    irradiation: "Irradiation treatment (yes/no)"
    recurrence: "Time to recurrence (months)"
    binaryClass: "Recurrence occurrence (1=yes; 0=no)"

0498_ada_prior:
  dataset_description: "Adult Income (Binary): UCI Adult dataset predicting income >50K with demographic features. ~32,000 samples and 15 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0498_ada_prior.csv"
  feature_descriptions:
    age: "Age in years"
    workclass: "Employment category"
    fnlwgt: "Final weight (census)"
    education: "Education level"
    educationNum: "Years of education"
    maritalStatus: "Marital status"
    occupation: "Occupation category"
    relationship: "Family relationship status"
    race: "Race category"
    sex: "Gender (Male/Female)"
    capitalGain: "Capital gains ($)"
    capitalLoss: "Capital losses ($)"
    hoursPerWeek: "Hours worked per week"
    nativeCountry: "Country of origin"
    label: "Income >50K indicator (1=yes; 0=no)"

0498_analcatdata_gsssexsurvey:
  dataset_description: "GSS Sex Survey (Binary): General Social Survey on sexual behavior, classifying AIDS knowledge."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0498_ada_prior.csv"
  feature_descriptions:
    Married: "Marital status (1=married; 0=else)"
    Age: "Respondent age"
    Years_of_education: "Years of formal education"
    Male: "Gender (1=male; 0=female)"
    Religious: "Religious affiliation (1=yes; 0=no)"
    Sex_partners: "Number of sexual partners"
    Income: "Household income bracket"
    Drug_use: "Drug use indicator (1=yes; 0=no)"
    Same_sex_relations: "Same-sex relations indicator (1=yes; 0=no)"
    binaryClass: "AIDS knowledge (1=knows; 0=does not)"

0499_boston:
  dataset_description: "Boston Housing (Binary): Classic Boston dataset with binary median value classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0499_boston.csv"
  feature_descriptions:
    CRIM: "Crime rate per capita by town"
    ZN: "Residential land zoned for large lots (%)"
    INDUS: "Proportion non-retail business acres (%)"
    CHAS: "Charles River adjacency (1=yes; 0=no)"
    NOX: "Nitric oxides concentration (ppm)"
    RM: "Average number of rooms"
    AGE: "Age of housing stock (%)"
    DIS: "Distance to employment centers"
    RAD: "Accessibility to highways index"
    TAX: "Property-tax rate per $10,000"
    PTRATIO: "Pupil-teacher ratio"
    B: "1000(Bk - 0.63)² proportion Black"
    LSTAT: "% lower status population"
    binaryClass: "High/low median value label"

0504_jllyt_4_2_4_3:
  dataset_description: "JLlyT 4-2-4-3: Software metrics dataset for defect prediction. ~100 samples and 9 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0504_jEdit_4.2_4.3.csv"
  feature_descriptions:
    WMC: "Weighted Methods per Class"
    DIT: "Depth of Inheritance Tree"
    NOC: "Number of Children"
    CBO: "Coupling Between Objects"
    RFC: "Response For Class"
    LCOM: "Lack of Cohesion of Methods"
    NPM: "Number of Public Methods"
    LOC: "Lines of Code"
    Bug-count: "Number of reported bugs"
0509_pollen:
  dataset_description: "Pollen: Mineral grain classification dataset with binary target. ~1,000 samples and 6 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0509_pollen.csv"
  feature_descriptions:
    RIDGE: "Number of ridges on grain surface"
    NUB: "Number of nubs"
    CRACK: "Number of cracks"
    WEIGHT: "Grain weight (mg)"
    DENSITY: "Grain density (g/cm³)"
    binaryClass: "Pollen type label (binary)"

0511_analcatdata_chlamydia:
  dataset_description: "Chlamydia (Binary): Epidemiological study of chlamydia case counts with demographic covariates."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0511_analcatdata_chlamydia.csv"
  feature_descriptions:
    Age: "Patient age (years)"
    Gender: "Patient gender (M/F)"
    Race: "Patient race category"
    binaryClass: "Case count class label (binary)"

0520_no2:
  dataset_description: "NO2 (Binary): Urban NO₂ monitoring with meteorological covariates and binary exceedance label."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0520_no2.csv"
  feature_descriptions:
    no2_concentration: "NO₂ concentration (µg/m³)"
    cars_per_hour: "Traffic volume (cars/hour)"
    temperature_at_2m: "Air temperature at 2 m (°C)"
    wind_speed: "Wind speed (m/s)"
    temperature_diff_2m_25m: "Temperature difference between 2 m and 25 m (°C)"
    wind_direction: "Wind direction (degrees)"
    hour_of_day: "Hour of day (0–23)"
    binaryClass: "Exceedance indicator (1=above threshold; 0=below)"

0521_jEdit_4_0_4_2:
  dataset_description: "jEdit 4.0–4.2: Software metrics dataset for defect prediction. 106 samples and 9 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0521_jEdit_4.0_4.2.csv"
  feature_descriptions:
    WMC: "Weighted Methods per Class"
    DIT: "Depth of Inheritance Tree"
    NOC: "Number of Children"
    CBO: "Coupling Between Objects"
    RFC: "Response For Class"
    LCOM: "Lack of Cohesion of Methods"
    NPM: "Number of Public Methods"
    LOC: "Lines of Code"
    Bug-count: "Number of reported bugs (target)"

0523_ICU:
  dataset_description: "ICU: Intensive care unit patient dataset. ~400 samples and 25 attributes for patient outcome prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0523_ICU.csv"
  feature_descriptions:
    STA: "State code"
    AGE: "Patient age (years)"
    SEX: "Gender (M/F)"
    RAC: "Race category"
    SER: "Serology result"
    CAN: "Cancer diagnosis flag"
    CRN: "Chronic renal disease flag"
    INF: "Infection flag"
    CPR: "CPR performed flag"
    SYS: "Systolic blood pressure (mm Hg)"
    HRA: "Heart rate (bpm)"
    PRE: "Pre-existing conditions count"
    TYP: "Patient type code"
    FRA: "Fracture indicator"
    PO2: "Arterial O₂ partial pressure (mm Hg)"
    PH: "Blood pH"
    PCO: "Arterial CO₂ partial pressure (mm Hg)"
    BIC: "Bicarbonate (mEq/L)"
    CRE: "Serum creatinine (mg/dL)"
    LOC: "Level of consciousness score"

0523_cloud:
  dataset_description: "Cloud (Binary): Meteorological cloud type classification dataset with binary target."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0523_ICU.csv"
  feature_descriptions:
    SEEDED: "Seeding indicator (yes/no)"
    TE: "Temperature at eye level (°C)"
    TW: "Wet-bulb temperature (°C)"
    NC: "Cloud cover fraction"
    SC: "Stratus cloud proportion"
    NWC: "Number of weather cells"
    binaryClass: "Cloud type label (binary)"

0526_colleges_aaup:
  dataset_description: "Colleges AAUP: Academic salary and compensation data for AAUP members, ~400 samples and 10+ attributes, binary high/low pay label."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0526_colleges_aaup.csv"
  feature_descriptions:
    FICE: "College FICE code"
    State: "State abbreviation"
    Type: "Institution type"
    Average_salary_full_professors: "Avg salary of full professors (USD)"
    Average_salary_associate_professors: "Avg salary of associate professors (USD)"
    Average_salary_assistant_professors: "Avg salary of assistant professors (USD)"
    Average_salary_all_ranks: "Avg salary across all ranks (USD)"
    Average_compensation_full_professors: "Avg total compensation of full professors (USD)"
    Average_compensation_associate_professors: "Avg total compensation of associate professors (USD)"
    Average_compensation_assistant_professors: "Avg total compensation of assistant professors (USD)"
    Average_compensation_all_ranks: "Avg total compensation across all ranks (USD)"
    Number_of_full_professors: "Count of full professors"
    Number_of_associate_professors: "Count of associate professors"
    Number_of_assistant_professors: "Count of assistant professors"
    Number_of_instructors: "Count of instructors"
    binaryClass: "High/low average salary label"

0529_sleuth_case2002:
  dataset_description: "Sleuth Case 2002 (Binary): Financial fraud detection dataset with binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0529_sleuth_case2002.csv"
  feature_descriptions:
    FM: "Financial misconduct flag"
    LC: "Legal counsel involvement flag"
    BK: "Bankruptcy filing flag"
    SS: "Suspicious stock sale flag"
    AG: "Attorney general investigation flag"
    YR: "Year of case"
    binaryClass: "Outcome label (1=fraud; 0=clean)"

0536_nki70:
  dataset_description: "NKI70: Gene expression dataset from NKI breast cancer cohort. 70 samples and ~5000 gene probes, binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0536_nki70.arff.csv"
  feature_descriptions:
    time: "Follow-up time after diagnosis (months)"
    Diam: "Tumor diameter (mm)"
    N: "Number of positive lymph nodes"
    ER: "Estrogen receptor status (1 = positive; 0 = negative)"
    Grade: "Histological tumor grade (1–3)"
    Age: "Patient age at diagnosis (years)"
    TSPYL5: "Expression level of TSPYL5 gene"
    Contig63649_RC: "Expression level of contig 63649 reference control probe"
    DIAPH3: "Expression level of DIAPH3 gene"
    NUSAP1: "Expression level of NUSAP1 gene"
    AA555029_RC: "Expression level of AA555029 reference control probe"
    ALDH4A1: "Expression level of ALDH4A1 gene"
    QSCN6L1: "Expression level of QSCN6L1 gene"
    FGF18: "Expression level of FGF18 gene"
    DIAPH3.1: "Expression level of DIAPH3 transcript variant 1"
    Contig32125_RC: "Expression level of contig 32125 reference control probe"
    BBC3: "Expression level of BBC3 (PUMA) gene"
    DIAPH3.2: "Expression level of DIAPH3 transcript variant 2"
    RP5.860F19.3: "Expression level of probe RP5-860F19.3"
    C16orf61: "Expression level of C16orf61 gene"
    SCUBE2: "Expression level of SCUBE2 gene"
    EXT1: "Expression level of EXT1 gene"
    FLT1: "Expression level of FLT1 (VEGFR1) gene"
    GNAZ: "Expression level of GNAZ gene"
    OXCT1: "Expression level of OXCT1 gene"
    MMP9: "Expression level of MMP9 (gelatinase B) gene"
    RUNDC1: "Expression level of RUNDC1 gene"
    Contig35251_RC: "Expression level of contig 35251 reference control probe"
    ECT2: "Expression level of ECT2 gene"
    GMPS: "Expression level of GMPS gene"
    KNTC2: "Expression level of KNTC2 gene"
    WISP1: "Expression level of WISP1 gene"
    CDC42BPA: "Expression level of CDC42BPA gene"
    SERF1A: "Expression level of SERF1A gene"
    AYTL2: "Expression level of AYTL2 gene"
    GSTM3: "Expression level of GSTM3 gene"
    GPR180: "Expression level of GPR180 gene"
    RAB6B: "Expression level of RAB6B gene"
    ZNF533: "Expression level of ZNF533 gene"
    RTN4RL1: "Expression level of RTN4RL1 gene"
    UCHL5: "Expression level of UCHL5 gene"
    PECI: "Expression level of PECI gene"
    MTDH: "Expression level of MTDH gene"
    Contig40831_RC: "Expression level of contig 40831 reference control probe"
    TGFB3: "Expression level of TGFB3 gene"
    MELK: "Expression level of MELK gene"
    COL4A2: "Expression level of COL4A2 gene"
    DTL: "Expression level of DTL gene"
    STK32B: "Expression level of STK32B gene"
    DCK: "Expression level of DCK gene"
    FBXO31: "Expression level of FBXO31 gene"
    GPR126: "Expression level of GPR126 gene"
    SLC2A3: "Expression level of SLC2A3 (GLUT3) gene"
    PECI.1: "Expression level of PECI transcript variant 1"
    ORC6L: "Expression level of ORC6L gene"
    RFC4: "Expression level of RFC4 gene"
    CDCA7: "Expression level of CDCA7 gene"
    LOC643008: "Expression level of LOC643008 predicted gene"
    MS4A7: "Expression level of MS4A7 gene"
    MCM6: "Expression level of MCM6 gene"
    AP2B1: "Expression level of AP2B1 gene"
    C9orf30: "Expression level of C9orf30 gene"
    IGFBP5: "Expression level of IGFBP5 gene"
    HRASLS: "Expression level of HRASLS gene"
    PITRM1: "Expression level of PITRM1 gene"
    IGFBP5.1: "Expression level of IGFBP5 transcript variant 1"
    NMU: "Expression level of NMU (neuromedin U) gene"
    PALM2.AKAP2: "Expression level of PALM2-AKAP2 fusion transcript"
    LGP2: "Expression level of LGP2 gene"
    PRC1: "Expression level of PRC1 gene"
    Contig20217_RC: "Expression level of contig 20217 reference control probe"
    CENPA: "Expression level of CENPA gene"
    EGLN1: "Expression level of EGLN1 gene"
    NM_004702: "Expression level of probe NM_004702 (accession-based)"
    ESM1: "Expression level of ESM1 gene"
    C20orf46: "Expression level of C20orf46 gene"
    event: "Recurrence event indicator (1 = recurrence; 0 = no recurrence)"


0540_MyIris:
  dataset_description: "MyIris: Classic Iris flower dataset. 150 samples and 5 attributes for species classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0540_MyIris.csv"
  feature_descriptions:
    sepallength: "Sepal length (cm)"
    sepalwidth: "Sepal width (cm)"
    petallength: "Petal length (cm)"
    petalwidth: "Petal width (cm)"
    class: "Iris species"

0541_cpu_with_vendor:
  dataset_description: "CPU with vendor: Machine CPU performance dataset including vendor code. 209 samples and 8 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0541_plasma_retinol.csv"
  feature_descriptions:
    vendor: "CPU vendor code"
    MYCT: "Machine cycle time (ns)"
    MMIN: "Minimum main memory (KB)"
    MMAX: "Maximum main memory (KB)"
    CACH: "Cache size (KB)"
    CHMIN: "Minimum channels"
    CHMAX: "Maximum channels"
    class: "Relative performance category"

0541_plasma_retinol:
  dataset_description: "Plasma Retinol (Binary): Nutritional study on plasma retinol with binary classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0541_plasma_retinol.csv"
  feature_descriptions:
    AGE: "Subject age"
    SEX: "Subject sex (M/F)"
    SMOKSTAT: "Smoking status"
    QUETELET: "BMI (Quetelet index)"
    VITUSE: "Vitamin usage (yes/no)"
    CALORIES: "Daily caloric intake"
    FAT: "Daily fat intake (g)"
    FIBER: "Daily fiber intake (g)"
    ALCOHOL: "Daily alcohol intake (g)"
    CHOLESTEROL: "Serum cholesterol (mg/dL)"
    BETADIET: "Dietary beta-carotene"
    RETDIET: "Dietary retinol"
    BETAPLASMA: "Plasma beta-carotene"
    binaryClass: "High/low retinol status"

0546_analcatdata_seropositive:
  dataset_description: "Seropositive (Binary): Serology study classifying test positivity."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0546_analcatdata_seropositive.csv"
  feature_descriptions:
    Age: "Subject age"
    Disease: "Disease code"
    Positive: "Number of positive tests"
    binaryClass: "Seropositive label (1=positive; 0=negative)"

0555_socmob:
  dataset_description: "SocMob (Binary): Intergenerational occupational mobility dataset with binary target."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0555_socmob.csv"
  feature_descriptions:
    fathers_occupation: "Father’s occupation code"
    sons_occupation: "Son’s occupation code"
    family_structure: "Family structure type"
    race: "Race category"
    counts_for_sons_first_occupation: "Count of sons’ first occupation"
    binaryClass: "Mobility class label (binary)"

0560_water_treatment:
  dataset_description: "Water Treatment (Binary): Water quality dataset with multiple chemical measurements and binary safety label."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0560_water-treatment.csv"
  feature_descriptions:
    ZN-E: "Zinc concentration (exact measurement, mg/L)"
    PH-E: "pH value (exact measurement)"
    DBO-E: "Biochemical oxygen demand (exact measurement, mg/L)"
    DQO-E: "Chemical oxygen demand (exact measurement, mg/L)"
    SS-E: "Suspended solids (exact gravimetric measurement, mg/L)"
    SSV-E: "Suspended solids (visual estimate, mg/L)"
    SED-E: "Sediment concentration (exact measurement, mg/L)"
    COND-E: "Conductivity (exact laboratory measurement, µS/cm)"
    PH-P: "pH value (portable meter measurement)"
    DBO-P: "Biochemical oxygen demand (portable test kit, mg/L)"
    SS-P: "Suspended solids (portable turbidity test, mg/L)"
    SSV-P: "Suspended solids (portable visual turbidity estimate, mg/L)"
    SED-P: "Sediment concentration (portable test, mg/L)"
    COND-P: "Conductivity (portable meter measurement, µS/cm)"
    PH-D: "pH value (digital sensor measurement)"
    DBO-D: "Biochemical oxygen demand (digital sensor reading, mg/L)"
    DQO-D: "Chemical oxygen demand (digital sensor reading, mg/L)"
    SS-D: "Suspended solids (digital sensor reading, mg/L)"
    SSV-D: "Suspended solids (digital visual sensor estimate, mg/L)"
    SED-D: "Sediment concentration (digital sensor measurement, mg/L)"
    COND-D: "Conductivity (digital sensor measurement, µS/cm)"
    PH-S: "pH value (stationary sensor measurement)"
    DBO-S: "Biochemical oxygen demand (stationary sensor, mg/L)"
    DQO-S: "Chemical oxygen demand (stationary sensor, mg/L)"
    SS-S: "Suspended solids (stationary sensor, mg/L)"
    SSV-S: "Suspended solids (stationary visual sensor, mg/L)"
    SED-S: "Sediment concentration (stationary sensor, mg/L)"
    COND-S: "Conductivity (stationary sensor, µS/cm)"
    RD-DBO-P: "Residual disinfectant effect on BOD via portable method"
    RD-SS-P: "Residual disinfectant effect on suspended solids via portable method"
    RD-SED-P: "Residual disinfectant effect on sediment via portable method"
    RD-DBO-S: "Residual disinfectant effect on BOD via stationary sensor"
    RD-DQO-S: "Residual disinfectant effect on COD via stationary sensor"
    RD-DBO-G: "Residual disinfectant effect on BOD via grab sample method"
    RD-DQO-G: "Residual disinfectant effect on COD via grab sample method"
    RD-SS-G: "Residual disinfectant effect on suspended solids via grab sample method"
    binaryClass: "Safety classification (1 = safe; 0 = unsafe)"

0562_spectrometer:
  dataset_description: "Spectrometer (Binary): Multispectral imaging dataset with hundreds of flux measurements and binary target."
  feature_descriptions:
    ID–type: "Sample identifier"
    Right–Ascension: "Celestial coordinate RA"
    Declination: "Celestial coordinate Dec"
    Scale_Factor: "Instrument scale factor"
    blue_base_1–blue_base_2: "Blue-band baseline readings"
    red_base_1–red_base_2: "Red-band baseline readings"
    blue-band-flux_1–blue-band-flux_48: "Flux in blue band channels 1–48"
    red-band-flux_1–red-band-flux_48: "Flux in red band channels 1–48"
    binaryClass: "Spectral class label (binary)"

0570_tae:
  dataset_description: "TAE (Binary): Teaching assistant effectiveness dataset with binary pass/fail outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0570_tae.csv"
  feature_descriptions:
    Whether_of_not_the_TA_is_a_native_English_speaker: "TA native English speaker flag"
    Course_instructor: "Instructor identifier"
    Course: "Course identifier"
    Summer_or_regular_semester: "Semester type"
    Class_size: "Number of students"
    binaryClass: "Effectiveness label (pass/fail)"

0572_braziltourism:
  dataset_description: "BrazilTourism (Binary): Survey of travel behavior in Brazil with binary purchase intention label."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0572_braziltourism.csv"
  feature_descriptions:
    Age: "Respondent age"
    Sex: "Respondent gender"
    Income: "Household income bracket"
    Travel_cost: "Estimated travel cost (local currency)"
    Access_road: "Access to major road indicator"
    Active: "Active traveler indicator"
    Passive: "Passive traveler indicator"
    Logged_income: "Log of income"
    binaryClass: "Purchase intention (1=yes; 0=no)"
0575_analcatdata_broadwaymult:
  dataset_description: "Broadway Multivariate: Broadway show performance dataset with weekly attendance and ratings. ~200 samples and 8 attributes. Binary classification of award wins."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0575_analcatdata_broadwaymult.csv"
  feature_descriptions:
    Show: "Name of the Broadway show"
    Type: "Show type (musical/play)"
    Revival: "Revival flag (1=new production; 0=original)"
    NYT_rating: "New York Times review rating"
    DN_rating: "Daily News review rating"
    Week_1_attendance: "Attendance in the first week"
    Award: "Tony Award wins count"
    binaryClass: "Award-winning classification (1=won ≥1 award; 0=none)"

0577_zoo:
  dataset_description: "Zoo (Binary): UCI Zoo dataset with binary animal type classification. 101 samples and 17 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0577_zoo.csv"
  feature_descriptions:
    hair: "Has hair (1=yes; 0=no)"
    feathers: "Has feathers (1=yes; 0=no)"
    eggs: "Lays eggs (1=yes; 0=no)"
    milk: "Produces milk (1=yes; 0=no)"
    airborne: "Can fly (1=yes; 0=no)"
    aquatic: "Lives in water (1=yes; 0=no)"
    predator: "Is predator (1=yes; 0=no)"
    toothed: "Has teeth (1=yes; 0=no)"
    backbone: "Has backbone (1=yes; 0=no)"
    breathes: "Breathes air (1=yes; 0=no)"
    venomous: "Is venomous (1=yes; 0=no)"
    fins: "Has fins (1=yes; 0=no)"
    legs: "Number of legs"
    tail: "Has tail (1=yes; 0=no)"
    domestic: "Domesticated (1=yes; 0=no)"
    catsize: "Size relative to cat threshold (1=larger; 0=smaller)"
    binaryClass: "Animal class (1=mammal; 0=other)"

0578_analcatdata_halloffame:
  dataset_description: "Hall of Fame Baseball: Player career statistics dataset. ~1,000 samples and 15 attributes. Binary classification of Hall of Fame induction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0578_analcatdata_halloffame.csv"
  feature_descriptions:
    Number_seasons: "Total seasons played"
    Games_played: "Total games played"
    At_bats: "Total at-bats"
    Runs: "Total runs scored"
    Hits: "Total hits"
    Doubles: "Total doubles"
    Triples: "Total triples"
    Home_runs: "Total home runs"
    RBIs: "Total runs batted in"
    Walks: "Total bases on balls"
    Strikeouts: "Total strikeouts"
    Batting_average: "Career batting average"
    On_base_pct: "Career on-base percentage"
    Slugging_pct: "Career slugging percentage"
    Fielding_ave: "Career fielding average"
    Position: "Primary fielding position"
    binaryClass: "Hall of Fame induction (1=yes; 0=no)"

0579_cars:
  dataset_description: "Cars Performance: Automobile dataset with performance metrics. ~400 samples and 8 attributes. Binary classification of high fuel efficiency."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0579_cars.csv"
  feature_descriptions:
    name: "Car model name"
    mpg: "Miles per gallon"
    cylinders: "Number of engine cylinders"
    displacement: "Engine displacement (cc)"
    horsepower: "Engine horsepower"
    weight: "Vehicle weight (lbs)"
    acceleration: "0–60 mph acceleration time (s)"
    model.year: "Model year"
    binaryClass: "High/low fuel efficiency (1=mpg above median; 0=below)"

0580_analcatdata_birthday:
  dataset_description: "Birthday Count (Binary): Daily birth counts with binary threshold classification. ~365 samples and 4 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0580_analcatdata_birthday.csv"
  feature_descriptions:
    Month: "Month of the year (1–12)"
    Day: "Day of the month (1–31)"
    Births: "Number of births"
    binaryClass: "High/low birth count (1=above average; 0=below)"

0582_analcatdata_authorship:
  dataset_description: "Authorship Attribution: Word frequency dataset for authorship classification. ~1,000 samples and 70+ attributes. Binary classification of author identity."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0582_analcatdata_authorship.csv"
  feature_descriptions:
    a: "Frequency of word 'a'"
    all: "Frequency of word 'all'"
    also: "Frequency of word 'also'"
    an: "Frequency of word 'an'"
    and: "Frequency of word 'and'"
    any: "Frequency of word 'any'"
    are: "Frequency of word 'are'"
    as: "Frequency of word 'as'"
    at: "Frequency of word 'at'"
    be: "Frequency of word 'be'"
    been: "Frequency of word 'been'"
    but: "Frequency of word 'but'"
    by: "Frequency of word 'by'"
    can: "Frequency of word 'can'"
    do: "Frequency of word 'do'"
    down: "Frequency of word 'down'"
    even: "Frequency of word 'even'"
    every: "Frequency of word 'every'"
    for: "Frequency of word 'for'"
    from: "Frequency of word 'from'"
    had: "Frequency of word 'had'"
    has: "Frequency of word 'has'"
    have: "Frequency of word 'have'"
    her: "Frequency of word 'her'"
    his: "Frequency of word 'his'"
    if: "Frequency of word 'if'"
    in: "Frequency of word 'in'"
    into: "Frequency of word 'into'"
    is: "Frequency of word 'is'"
    it: "Frequency of word 'it'"
    its: "Frequency of word 'its'"
    may: "Frequency of word 'may'"
    more: "Frequency of word 'more'"
    must: "Frequency of word 'must'"
    my: "Frequency of word 'my'"
    no: "Frequency of word 'no'"
    not: "Frequency of word 'not'"
    now: "Frequency of word 'now'"
    of: "Frequency of word 'of'"
    on: "Frequency of word 'on'"
    one: "Frequency of word 'one'"
    only: "Frequency of word 'only'"
    or: "Frequency of word 'or'"
    our: "Frequency of word 'our'"
    should: "Frequency of word 'should'"
    so: "Frequency of word 'so'"
    some: "Frequency of word 'some'"
    such: "Frequency of word 'such'"
    than: "Frequency of word 'than'"
    that: "Frequency of word 'that'"
    the: "Frequency of word 'the'"
    their: "Frequency of word 'their'"
    then: "Frequency of word 'then'"
    there: "Frequency of word 'there'"
    things: "Frequency of word 'things'"
    this: "Frequency of word 'this'"
    to: "Frequency of word 'to'"
    up: "Frequency of word 'up'"
    upon: "Frequency of word 'upon'"
    was: "Frequency of word 'was'"
    were: "Frequency of word 'were'"
    what: "Frequency of word 'what'"
    when: "Frequency of word 'when'"
    which: "Frequency of word 'which'"
    who: "Frequency of word 'who'"
    will: "Frequency of word 'will'"
    with: "Frequency of word 'with'"
    would: "Frequency of word 'would'"
    your: "Frequency of word 'your'"
    BookID: "Identifier of the book excerpt"
    binaryClass: "Author label (1=Author A; 0=Author B)"

0585_hayes_roth:
  dataset_description: "Hayes-Roth: Cognitive psychology dataset of subject attributes and problem-solving strategies. ~160 samples and 4 attributes. Binary group classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0585_hayes-roth.csv"
  feature_descriptions:
    hobby: "Subject hobby code"
    age: "Subject age group"
    educational_level: "Education level code"
    marital_status: "Marital status code"
    binaryClass: "Strategy group (1=novice; 0=expert)"

0586_autos:
  dataset_description: "Autos (Binary): Automobile dataset with price classification. ~200 samples and 16 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0586_autos.csv"
  feature_descriptions:
    normalized-losses: "Normalized insurance losses"
    make: "Manufacturer name"
    fuel-type: "Fuel type (gas/diesel)"
    aspiration: "Aspiration type (std/turbo)"
    num-of-doors: "Number of doors"
    body-style: "Body style"
    drive-wheels: "Drive wheels configuration"
    engine-location: "Engine location (front/rear)"
    wheel-base: "Wheelbase (inches)"
    length: "Length (inches)"
    width: "Width (inches)"
    height: "Height (inches)"
    curb-weight: "Curb weight (lbs)"
    engine-type: "Engine type"
    num-of-cylinders: "Number of cylinders"
    engine-size: "Engine displacement (cc)"
    fuel-system: "Fuel system"
    bore: "Cylinder bore (inches)"
    stroke: "Stroke length (inches)"
    compression-ratio: "Engine compression ratio"
    horsepower: "Horsepower"
    peak-rpm: "Peak RPM"
    city-mpg: "City fuel economy"
    highway-mpg: "Highway fuel economy"
    price: "Market price (USD)"
    binaryClass: "High/low price label"

0594_cmc:
  dataset_description: "Contraceptive Method Choice (Binary): UCI CMC dataset with binary target for method use. ~1,000 samples and 9 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0594_cmc.csv"
  feature_descriptions:
    Wifes_age: "Wife’s age in years"
    Wifes_education: "Wife’s education level"
    Husbands_education: "Husband’s education level"
    Number_of_children_ever_born: "Number of children born"
    Wifes_religion: "Wife’s religion code"
    Wifes_now_working?: "Is wife currently working (1=yes; 0=no)"
    Husbands_occupation: "Husband’s occupation code"
    Standard-of-living_index: "Standard of living index"
    Media_exposure: "Media exposure level"
    binaryClass: "Method use (1=uses; 0=does not)"

0602_prnn_fglass:
  dataset_description: "PRNN Float Glass (Binary): Float glass dataset for classification. 214 samples and 6 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0602_prnn_fglass.csv"
  feature_descriptions:
    RI: "Refractive index"
    Na: "Sodium content"
    Mg: "Magnesium content"
    Al: "Aluminum content"
    Si: "Silicon content"
    K: "Potassium content"
    Ca: "Calcium content"
    Ba: "Barium content"
    Fe: "Iron content"
    binaryClass: "Glass type label (binary)"

0603_balance-scale:
  dataset_description: "Balance Scale (Binary): UCI Balance Scale dataset with binary classification of balance condition. 625 samples and 4 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0603_balance-scale.csv"
  feature_descriptions:
    left-weight: "Weight on left pan"
    left-distance: "Distance of weight on left pan"
    right-weight: "Weight on right pan"
    right-distance: "Distance of weight on right pan"
    binaryClass: "Scale balance label (1=balanced; 0=unbalanced)"
0604_audiology:
  dataset_description: "Audiology (Binary): Audiological test results and patient history for hearing impairment classification. ~200 samples and 60+ attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0604_audiology.csv"
  feature_descriptions:
    age_gt_60: "Age greater than 60 years flag"
    air: "Air conduction threshold"
    airBoneGap: "Air–bone gap measurement"
    ar_c: "Acoustic reflex at right ear (contralateral)"
    ar_u: "Acoustic reflex at right ear (ipsilateral)"
    bone: "Bone conduction threshold"
    boneAbnormal: "Abnormal bone conduction flag"
    history_buzzing: "History of ear buzzing"
    history_dizziness: "History of dizziness"
    history_fluctuating: "History of fluctuating hearing"
    history_fullness: "History of ear fullness"
    history_heredity: "Family history of hearing loss"
    history_nausea: "History of nausea"
    history_noise: "History of noise exposure"
    history_rec: "History of recent infection"
    history_ringing: "History of tinnitus"
    history_roaring: "History of roaring tinnitus"
    history_vomiting: "History of vomiting"
    late_wave_poor: "Late auditory evoked potential wave poor quality"
    m_at_2k: "Middle ear impedance at 2 kHz"
    m_at_1k: "Middle ear impedance at 1 kHz"
    m_gt_2k: "Metabolic tympanometry at 2 kHz"
    m_gt_1k: "Metabolic tympanometry at 1 kHz"
    m_m_sn_gt_2k: "Tympanogram metric M_SN at >2 kHz"
    m_m_sn_gt_1k: "Tympanogram metric M_SN at >1 kHz"
    m_sn_lt_1k: "Tympanogram metric M_SN at <1 kHz"
    m_sn_lt_2k: "Tympanogram metric M_SN at <2 kHz"
    m_sn_lt_500: "Tympanogram metric M_SN at <500 Hz"
    m_p_sn_gt_2k: "Compliance change P_SN at >2 kHz"
    m_p_sn_gt_1k: "Compliance change P_SN at >1 kHz"
    m_p_sn_gt_500: "Compliance change P_SN at >500 Hz"
    m_sn_mixed: "Mixed-frequency tympanogram metric M_SN"
    mod_sn: "Modulated SN response"
    mod_sn_gt_1k: "Modulated SN response at >1 kHz"
    mod_sn_gt_2k: "Modulated SN response at >2 kHz"
    mod_sn_gt_4k: "Modulated SN response at >4 kHz"
    mod_sn_gt_500: "Modulated SN response at >500 Hz"
    notch_4k: "Notch at 4 kHz"
    notch_at_4k: "Audiometric notch at 4 kHz"
    o_ar_c: "Otoacoustic emission AR at contralateral"
    o_ar_u: "Otoacoustic emission AR at ipsilateral"
    s_sn_gt_1k: "Transient-evoked otoacoustic emission at >1 kHz"
    s_sn_gt_2k: "TEOAE at >2 kHz"
    s_sn_gt_4k: "TEOAE at >4 kHz"
    speech: "Speech discrimination score"
    static_normal: "Static admittance normal flag"
    tymp: "Tympanogram type"
    viith_nerve_signs: "Facial nerve signs"
    wave_V_delayed: "Brainstem response Wave V delayed"
    waveform_ItoV_prolonged: "ABR I–V interval prolonged"
    binaryClass: "Hearing impairment classification (1=impaired; 0=normal)"

0605_primary_tumor:
  dataset_description: "Primary Tumor (Binary): Dataset of primary cancer site and metastasis involvement for survival prediction. ~300 samples and 14 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0605_primary-tumor.csv"
  feature_descriptions:
    age: "Patient age in years"
    sex: "Patient sex (M/F)"
    histologic-type: "Histological tumor type code"
    degree-of-diff: "Degree of differentiation (grade)"
    bone: "Bone metastasis flag"
    bone-marrow: "Bone marrow involvement flag"
    lung: "Lung metastasis flag"
    pleura: "Pleural involvement flag"
    peritoneum: "Peritoneal involvement flag"
    liver: "Liver metastasis flag"
    brain: "Brain metastasis flag"
    skin: "Skin metastasis flag"
    neck: "Neck metastasis flag"
    supraclavicular: "Supraclavicular node involvement flag"
    axillary: "Axillary node involvement flag"
    mediastinum: "Mediastinal node involvement flag"
    abdominal: "Abdominal node involvement flag"
    binaryClass: "Primary tumor site class (binary grouping)"

0608_lymph:
  dataset_description: "Lymphography (Binary): BNG Lymphography dataset with binary classification of lymph node condition."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0608_lymph.csv"
  feature_descriptions:
    lymphatics: "Condition of lymphatic system"
    block_of_affere: "Blockage in afferent lymph vessels"
    bl_of_lymph_c: "Blockage of lymph capillaries"
    bl_of_lymph_s: "Blockage of lymph sinuses"
    by_pass: "Presence of bypass vessels"
    extravasates: "Extravasates presence"
    regeneration_of: "Signs of regeneration"
    early_uptake_in: "Early tracer uptake"
    lym_nodes_dimin: "Diminished lymph nodes"
    lym_nodes_enlar: "Enlarged lymph nodes"
    changes_in_lym: "Structural changes in lymph nodes"
    defect_in_node: "Node defect presence"
    changes_in_node: "Size/shape changes in nodes"
    changes_in_stru: "Changes in node structure"
    special_forms: "Special node forms"
    dislocation_of: "Node dislocation"
    exclusion_of_no: "Node exclusion count"
    no_of_nodes_in: "Number of nodes involved"
    binaryClass: "Diagnosis binary label (1=abnormal; 0=normal)"

0609_dermatology:
  dataset_description: "Dermatology (Binary): BNG Dermatology dataset for classification of dermatological conditions. 1,000,000 samples and 35 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0609_dermatology.csv"
  feature_descriptions:
    erythema: "Erythema level"
    scaling: "Scaling severity"
    definite_borders: "Presence of definite lesion borders"
    itching: "Itching present"
    koebner_phenomenon: "Koebner phenomenon present"
    polygonal_papules: "Polygonal papules present"
    follicular_papules: "Follicular papules present"
    oral_mucosal_involvement: "Oral mucosa involvement"
    knee_and_elbow_involvement: "Knee & elbow involvement"
    scalp_involvement: "Scalp involvement"
    family_history: "Family history of skin disease"
    melanin_incontinence: "Melanin incontinence"
    eosinophils_in_the_infiltrate: "Eosinophils in infiltrate"
    PNL_infiltrate: "Polymorphonuclear infiltrate"
    fibrosis_of_the_papillary_dermis: "Papillary dermis fibrosis"
    exocytosis: "Exocytosis present"
    acanthosis: "Acanthosis present"
    hyperkeratosis: "Hyperkeratosis present"
    parakeratosis: "Parakeratosis present"
    clubbing_of_the_rete_ridges: "Clubbing of rete ridges"
    elongation_of_the_rete_ridges: "Elongation of rete ridges"
    thinning_of_the_suprapapillary_epidermis: "Suprapapillary epidermis thinning"
    spongiform_pustule: "Spongiform pustule present"
    munro_microabcess: "Munro microabscess present"
    focal_hypergranulosis: "Focal hypergranulosis present"
    disappearance_of_the_granular_layer: "Granular layer disappearance"
    vacuolisation_and_damage_of_basal_layer: "Basal layer damage"
    spongiosis: "Spongiosis present"
    saw_tooth_appearance_of_retes: "Saw-tooth rete ridge appearance"
    follicular_horn_plug: "Follicular horn plug present"
    perifollicular_parakeratosis: "Perifollicular parakeratosis present"
    inflammatory_mononuclear_infiltrate: "Inflammatory mononuclear infiltrate"
    band_like_infiltrate: "Band-like infiltrate present"
    Age: "Patient age (years)"
    binaryClass: "Disease class (binary grouping)"

0611_flags:
  dataset_description: "Flags (Binary): World flag attributes dataset for flag recognition. ~200 samples and 25 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0611_flags.csv"
  feature_descriptions:
    landmass: "Continent code"
    zone: "Flag zone code"
    area: "Flag area category"
    population: "Population category of country"
    language: "Official language code"
    religion: "Dominant religion code"
    bars: "Number of horizontal bars"
    stripes: "Number of vertical stripes"
    colours: "Number of distinct colors"
    red: "Red color present flag"
    green: "Green color present flag"
    blue: "Blue color present flag"
    gold: "Gold color present flag"
    white: "White color present flag"
    black: "Black color present flag"
    orange: "Orange color present flag"
    mainhue: "Main hue index"
    circles: "Number of circles"
    crosses: "Number of crosses"
    saltires: "Number of saltires"
    quarters: "Number of quarters"
    sunstars: "Number of sun stars"
    crescent: "Crescent present flag"
    triangle: "Triangle present flag"
    icon: "Emblem/icon present flag"
    animate: "Animate symbol present flag"
    text: "Text present flag"
    topleft: "Topleft emblem flag"
    binaryClass: "Flag classification label (binary)"

0618_page-blocks:
  dataset_description: "Page Blocks (Binary): UCI Page Blocks dataset with binary page block classification. 5,473 samples and 11 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0618_page-blocks.csv"
  feature_descriptions:
    height: "Block height"
    lenght: "Block length"
    area: "Block area"
    eccen: "Block eccentricity"
    p_black: "Percentage of black pixels"
    p_and: "Percentage of ‘and’ pixels"
    mean_tr: "Mean transition rate"
    blackpix: "Black pixel count"
    blackand: "Black-and-white pixel transitions"
    wb_trans: "White-to-black transitions"
    binaryClass: "Block type class (binary grouping)"

0620_analcatdata_germangss:
  dataset_description: "German GSS (Binary): German General Social Survey with voting and demographic features, binary outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0620_analcatdata_germangss.csv"
  feature_descriptions:
    Political_system: "Political system satisfaction"
    Age: "Respondent age"
    Time_of_survey: "Time of survey (year)"
    Schooling: "Years of schooling"
    Region: "Region code"
    binaryClass: "Binary opinion label"

0621_grub_damage:
  dataset_description: "Grub Damage (Binary): Soil damage by grubs dataset with environmental covariates."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0621_grub-damage.csv"
  feature_descriptions:
    year_zone: "Year-associated damage zone"
    year: "Year of observation"
    strip: "Field strip identifier"
    pdk: "Plant damage key index"
    damage_rankRJT: "Damage rank (RJT scale)"
    damage_rankALL: "Overall damage rank"
    dry_or_irrr: "Dry or irrigated field flag"
    zone: "Damage zone code"
    binaryClass: "Damage presence (1=yes; 0=no)"

0629_sylva_prior:
  dataset_description: "Sylva Prior (Binary): Forest soil dataset with topographic and soil-type features, binary grouping."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0629_sylva_prior.csv"
  feature_descriptions:
    Elevation: "Elevation (m)"
    Aspect: "Aspect angle (degrees)"
    Slope: "Slope steepness (%)"
    Horizontal_Distance_To_Hydrology: "Horizontal distance to water features (m)"
    Vertical_Distance_To_Hydrology: "Vertical distance to water features (m)"
    Horizontal_Distance_To_Roadways: "Distance to nearest roadway (m)"
    Hillshade_9am: "Hillshade at 9 AM"
    Hillshade_Noon: "Hillshade at noon"
    Hillshade_3pm: "Hillshade at 3 PM"
    Horizontal_Distance_To_Fire_Points: "Distance to nearest fire point (m)"
    Rawah_Wilderness_Area: "Rawah Wilderness area flag"
    Neota_Wilderness_Area: "Neota Wilderness area flag"
    Comanche_Peak_Wilderness_Area: "Comanche Peak Wilderness area flag"
    Cache_la_Poudre_Wilderness_Area: "Cache la Poudre Wilderness area flag"
    Soil_Type_1–Soil_Type_40: "Binary flags for soil type categories 1 to 40"
    dup_Elevation: "Duplicate elevation measurement"
    dup_Aspect: "Duplicate aspect measurement"
    dup_Slope: "Duplicate slope measurement"
    dup_Horizontal_Distance_To_Hydrology: "Duplicate hydrology distance"
    dup_Horizontal_Distance_To_Roadways: "Duplicate roadway distance"
    dup_Hillshade_9am: "Duplicate hillshade at 9 AM"
    dup_Hillshade_Noon: "Duplicate hillshade at noon"
    dup_Hillshade_3pm: "Duplicate hillshade at 3 PM"
    dup_Horizontal_Distance_To_Fire_Points: "Duplicate fire point distance"
    dup_Rawah_Wilderness_Area: "Duplicate Rawah wilderness flag"
    dup_Neota_Wilderness_Area: "Duplicate Neota wilderness flag"
    dup_Comanche_Peak_Wilderness_Area: "Duplicate Comanche Peak wilderness flag"
    dup_Cache_la_Poudre_Wilderness_Area: "Duplicate Cache la Poudre wilderness flag"
    dup_Soil_Type_1–dup_Soil_Type_39: "Duplicate soil-type flags for categories 1 to 39"
    label: "Forest cover type class (binary grouping)"
0633_kc1-top5:
  dataset_description: "KC1-Top5: Software code metrics for the KC1 dataset, top-5 percentile of modules. ~1,000 samples and 74 attributes. Predicts defect count."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0633_kc1-top5.csv"
  feature_descriptions:
    PERCENT_PUB_DATA: "Percentage of public data fields"
    ACCESS_TO_PUB_DATA: "Number of accesses to public data fields"
    COUPLING_BETWEEN_OBJECTS: "Coupling count between classes"
    DEPTH: "Inheritance tree depth"
    LACK_OF_COHESION_OF_METHODS: "Cohesion metric"
    NUM_OF_CHILDREN: "Number of immediate subclasses"
    DEP_ON_CHILD: "Number of subclasses dependent on this class"
    FAN_IN: "Number of components calling this module"
    RESPONSE_FOR_CLASS: "Response-for-class metric"
    WEIGHTED_METHODS_PER_CLASS: "Sum of method complexities"
    minLOC_B: "Minimum lines of code in a method (before comment stripping)"
    minLOC_CODE_AND_COMMENT: "Minimum LOC including comments"
    minLOC_COMMENTS: "Minimum comment LOC"
    minCYClomatic_COMPLEXITY: "Minimum McCabe complexity"
    minDESIGN_COMPLEXITY: "Minimum design complexity"
    minESSENTIAL_COMPLEXITY: "Minimum essential complexity"
    minLOC_EXECUTABLE: "Minimum executable LOC"
    minHALSTEAD_CONTENT: "Minimum Halstead content"
    minHALSTEAD_DIFFICULTY: "Minimum Halstead difficulty"
    minHALSTEAD_EFFORT: "Minimum Halstead effort"
    minHALSTEAD_ERROR_EST: "Minimum Halstead error estimate"
    minHALSTEAD_LEVEL: "Minimum Halstead level"
    minHALSTEAD_PROG_TIME: "Minimum Halstead programming time"
    minHALSTEAD_VOLUME: "Minimum Halstead volume"
    minNUM_OPERANDS: "Minimum number of operands"
    minNUM_OPERATORS: "Minimum number of operators"
    minUNIQUE_OPERANDS: "Minimum number of unique operands"
    minUNIQUE_OPERATORS: "Minimum number of unique operators"
    maxLOC_B: "Maximum LOC in a method (before comments)"
    maxLOC_CODE_AND_COMMENT: "Maximum LOC including comments"
    maxLOC_COMMENTS: "Maximum comment LOC"
    maxCYCLOMATIC_COMPLEXITY: "Maximum McCabe complexity"
    maxDESIGN_COMPLEXITY: "Maximum design complexity"
    maxESSENTIAL_COMPLEXITY: "Maximum essential complexity"
    maxLOC_EXECUTABLE: "Maximum executable LOC"
    maxHALSTEAD_CONTENT: "Maximum Halstead content"
    maxHALSTEAD_DIFFICULTY: "Maximum Halstead difficulty"
    maxHALSTEAD_EFFORT: "Maximum Halstead effort"
    maxHALSTEAD_ERROR_EST: "Maximum Halstead error estimate"
    maxHALSTEAD_LEVEL: "Maximum Halstead level"
    maxHALSTEAD_PROG_TIME: "Maximum Halstead programming time"
    maxHALSTEAD_VOLUME: "Maximum Halstead volume"
    maxNUM_OPERANDS: "Maximum number of operands"
    maxNUM_OPERATORS: "Maximum number of operators"
    maxNUM_UNIQUE_OPERANDS: "Maximum number of unique operands"
    maxNUM_UNIQUE_OPERATORS: "Maximum number of unique operators"
    maxLOC_TOTAL: "Maximum total LOC"
    avgLOC_BLANK: "Average blank LOC"
    avgBRANCH_COUNT: "Average branch count"
    avgLOC_CODE_AND_COMMENT: "Average LOC including comments"
    avgLOC_COMMENTS: "Average comment LOC"
    avgCYCLOMATIC_COMPLEXITY: "Average McCabe complexity"
    avgDESIGN_COMPLEXITY: "Average design complexity"
    avgESSENTIAL_COMPLEXITY: "Average essential complexity"
    avgLOC_EXECUTABLE: "Average executable LOC"
    avgHALSTEAD_CONTENT: "Average Halstead content"
    avgHALSTEAD_DIFFICULTY: "Average Halstead difficulty"
    avgHALSTEAD_EFFORT: "Average Halstead effort"
    avgHALSTEAD_ERROR_EST: "Average Halstead error estimate"
    avgHALSTEAD_LENGTH: "Average Halstead program length"
    avgHALSTEAD_LEVEL: "Average Halstead level"
    avgHALSTEAD_PROG_TIME: "Average Halstead programming time"
    avgHALSTEAD_VOLUME: "Average Halstead volume"
    avgNUM_OPERANDS: "Average number of operands"
    avgNUM_OPERATORS: "Average number of operators"
    avgNUM_UNIQUE_OPERANDS: "Average number of unique operands"
    avgNUM_UNIQUE_OPERATORS: "Average number of unique operators"
    avgLOC_TOTAL: "Average total LOC"
    sumLOC_BLANK: "Sum of blank LOC"
    sumBRANCH_COUNT: "Sum of branch counts"
    sumLOC_CODE_AND_COMMENT: "Sum of LOC including comments"
    sumLOC_COMMENTS: "Sum of comment LOC"
    sumCYCLOMATIC_COMPLEXITY: "Sum of McCabe complexity"
    sumDESIGN_COMPLEXITY: "Sum of design complexity"
    sumESSENTIAL_COMPLEXITY: "Sum of essential complexity"
    sumLOC_EXECUTABLE: "Sum of executable LOC"
    sumHALSTEAD_CONTENT: "Sum of Halstead content"
    sumHALSTEAD_DIFFICULTY: "Sum of Halstead difficulty"
    sumHALSTEAD_EFFORT: "Sum of Halstead effort"
    sumHALSTEAD_ERROR_EST: "Sum of Halstead error estimate"
    sumHALSTEAD_LENGTH: "Sum of Halstead program length"
    sumHALSTEAD_LEVEL: "Sum of Halstead level"
    sumHALSTEAD_PROG_TIME: "Sum of Halstead programming time"
    sumHALSTEAD_VOLUME: "Sum of Halstead volume"
    sumNUM_OPERANDS: "Sum of operands"
    sumNUM_OPERATORS: "Sum of operators"
    sumNUM_UNIQUE_OPERANDS: "Sum of unique operands"
    sumNUM_UNIQUE_OPERATORS: "Sum of unique operators"
    sumLOC_TOTAL: "Sum of total LOC"
    DL: "Defect likelihood estimate"

0634_mozilla4:
  dataset_description: "Mozilla4: Software module metrics for the Mozilla project version 4. ~1,500 samples and 35 attributes. Predicts defect proneness."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0634_mozilla4.csv"
  feature_descriptions:
    id: "Module identifier"
    start: "Start line number"
    end: "End line number"
    event: "Number of logged events"
    size: "Module size (LOC)"
    state: "Module state code"

0637_pc4:
  dataset_description: "PC (Pacific Coffee) Metrics: Software metrics datasets PC3 and PC4 with identical schema. ~1,000 samples and 27 attributes. Predict bugs."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0637_pc4.csv"
  feature_descriptions:
    LOC_BLANK: "Blank lines of code"
    BRANCH_COUNT: "Branch count"
    CALL_PAIRS: "Call pair count"
    LOC_CODE_AND_COMMENT: "LOC including comments"
    LOC_COMMENTS: "Lines of comments"
    CONDITION_COUNT: "Conditional statement count"
    CYCLOMATIC_COMPLEXITY: "McCabe complexity"
    CYCLOMATIC_DENSITY: "Complexity per LOC"
    DECISION_COUNT: "Decision point count"
    DECISION_DENSITY: "Decisions per LOC"
    DESIGN_COMPLEXITY: "Design complexity metric"
    DESIGN_DENSITY: "Design per LOC"
    EDGE_COUNT: "Control-flow graph edges"
    ESSENTIAL_COMPLEXITY: "Essential complexity"
    ESSENTIAL_DENSITY: "Essential per LOC"
    LOC_EXECUTABLE: "Executable LOC"
    PARAMETER_COUNT: "Function parameter count"
    HALSTEAD_CONTENT: "Halstead content"
    HALSTEAD_DIFFICULTY: "Halstead difficulty"
    HALSTEAD_EFFORT: "Halstead effort"
    HALSTEAD_ERROR_EST: "Halstead error estimate"
    HALSTEAD_LENGTH: "Halstead length"
    HALSTEAD_LEVEL: "Halstead level"
    HALSTEAD_PROG_TIME: "Halstead programming time"
    HALSTEAD_VOLUME: "Halstead volume"
    MAINTENANCE_SEVERITY: "Maintenance severity estimate"
    MODIFIED_CONDITION_COUNT: "Modified condition count"
    MULTIPLE_CONDITION_COUNT: "Multiple condition count"
    NODE_COUNT: "Control flow nodes"
    NORMALIZED_CYCLOMATIC_COMPLEXITY: "Normalized complexity"
    NUM_OPERANDS: "Number of operands"
    NUM_OPERATORS: "Number of operators"
    NUM_UNIQUE_OPERANDS: "Unique operands count"
    NUM_UNIQUE_OPERATORS: "Unique operators count"
    NUMBER_OF_LINES: "Total lines of code"
    PERCENT_COMMENTS: "Percentage of comment lines"
    LOC_TOTAL: "Total LOC"
    c: "Defect count"
0638_pc3:
  dataset_description: "PC (Pacific Coffee) Metrics:  ~1,000 samples and 27 attributes. Predict bugs."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0638_pc3.csv"
  feature_descriptions:
    LOC_BLANK: "Blank lines of code"
    BRANCH_COUNT: "Branch count"
    CALL_PAIRS: "Call pair count"
    LOC_CODE_AND_COMMENT: "LOC including comments"
    LOC_COMMENTS: "Lines of comments"
    CONDITION_COUNT: "Conditional statement count"
    CYCLOMATIC_COMPLEXITY: "McCabe complexity"
    CYCLOMATIC_DENSITY: "Complexity per LOC"
    DECISION_COUNT: "Decision point count"
    DECISION_DENSITY: "Decisions per LOC"
    DESIGN_COMPLEXITY: "Design complexity metric"
    DESIGN_DENSITY: "Design per LOC"
    EDGE_COUNT: "Control-flow graph edges"
    ESSENTIAL_COMPLEXITY: "Essential complexity"
    ESSENTIAL_DENSITY: "Essential per LOC"
    LOC_EXECUTABLE: "Executable LOC"
    PARAMETER_COUNT: "Function parameter count"
    HALSTEAD_CONTENT: "Halstead content"
    HALSTEAD_DIFFICULTY: "Halstead difficulty"
    HALSTEAD_EFFORT: "Halstead effort"
    HALSTEAD_ERROR_EST: "Halstead error estimate"
    HALSTEAD_LENGTH: "Halstead length"
    HALSTEAD_LEVEL: "Halstead level"
    HALSTEAD_PROG_TIME: "Halstead programming time"
    HALSTEAD_VOLUME: "Halstead volume"
    MAINTENANCE_SEVERITY: "Maintenance severity estimate"
    MODIFIED_CONDITION_COUNT: "Modified condition count"
    MULTIPLE_CONDITION_COUNT: "Multiple condition count"
    NODE_COUNT: "Control flow nodes"
    NORMALIZED_CYCLOMATIC_COMPLEXITY: "Normalized complexity"
    NUM_OPERANDS: "Number of operands"
    NUM_OPERATORS: "Number of operators"
    NUM_UNIQUE_OPERANDS: "Unique operands count"
    NUM_UNIQUE_OPERATORS: "Unique operators count"
    NUMBER_OF_LINES: "Total lines of code"
    PERCENT_COMMENTS: "Percentage of comment lines"
    LOC_TOTAL: "Total LOC"
    c: "Defect count"
0639_jm1:
  dataset_description: "JM1: NASA software metrics dataset. ~10,000 samples and 17 attributes. Predicts defects."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0639_jm1.csv"
  feature_descriptions:
    loc: "Total lines of code"
    v(g): "Cyclomatic complexity"
    ev(g): "Essential complexity"
    iv(g): "Inherited complexity"
    n: "Number of nodes"
    v: "Number of edges"
    l: "Number of loops"
    d: "Number of decision points"
    i: "Number of interfaces"
    e: "Number of errors"
    b: "Number of basic blocks"
    t: "Number of calls to other modules"
    IOCode: "Lines of code with I/O operations"
    IOComment: "I/O comment lines"
    IOBlank: "I/O blank lines"
    IOCodeAndComment: "I/O code + comments"
    uniq_Op: "Unique operators"
    uniq_Opnd: "Unique operands"
    total_Op: "Total operators"
    total_Opnd: "Total operands"
    branchCount: "Branch count"
    defects: "Defect count (target)"
0641_mc1:
  dataset_description: "MC (Machine Code) Metrics: ~600 samples and 27 attributes. Predict fault proneness."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0641_mc1.csv"
  feature_descriptions:  
    LOC_BLANK: "Blank LOC"
    BRANCH_COUNT: "Branch count"
    CALL_PAIRS: "Call pair count"
    LOC_CODE_AND_COMMENT: "LOC + comments"
    LOC_COMMENTS: "Comment LOC"
    CONDITION_COUNT: "Conditional statements"
    CYCLOMATIC_COMPLEXITY: "McCabe complexity"
    CYCLOMATIC_DENSITY: "Complexity density"
    DECISION_COUNT: "Decision statements"
    DECISION_DENSITY: "Decision density"
    DESIGN_COMPLEXITY: "Design complexity"
    DESIGN_DENSITY: "Design density"
    EDGE_COUNT: "CFG edge count"
    ESSENTIAL_COMPLEXITY: "Essential complexity"
    ESSENTIAL_DENSITY: "Essential complexity density"
    LOC_EXECUTABLE: "Executable LOC"
    PARAMETER_COUNT: "Parameter count"
    GLOBAL_DATA_COMPLEXITY: "Global data complexity"
    GLOBAL_DATA_DENSITY: "Global data density"
    HALSTEAD_CONTENT: "Halstead content"
    HALSTEAD_DIFFICULTY: "Halstead difficulty"
    HALSTEAD_ERROR_EST: "Halstead error estimate"
    HALSTEAD_LENGTH: "Halstead length"
    HALSTEAD_LEVEL: "Halstead level"
    HALSTEAD_PROG_TIME: "Halstead programming time"
    HALSTEAD_VOLUME: "Halstead volume"
    MAINTENANCE_SEVERITY: "Maintenance severity"
    MULTIPLE_CONDITION_COUNT: "Multiple condition count"
    NODE_COUNT: "CFG node count"
    NORMALIZED_CYCLOMATIC_COMPLEXITY: "Normalized complexity"
    NUM_OPERANDS: "Operand count"
    NUM_OPERATORS: "Operator count"
    NUM_UNIQUE_OPERANDS: "Unique operands"
    NUM_UNIQUE_OPERATORS: "Unique operators"
    NUMBER_OF_LINES: "Total LOC"
    PERCENT_COMMENTS: "Comment percentage"
    LOC_TOTAL: "Total LOC"
    c: "Defect count"
0640_mc2:
  dataset_description: "MC (Machine Code) Metrics:  ~600 samples and 27 attributes. Predict fault proneness."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0640_mc2.csv"
  feature_descriptions:  
    LOC_BLANK: "Blank LOC"
    BRANCH_COUNT: "Branch count"
    CALL_PAIRS: "Call pair count"
    LOC_CODE_AND_COMMENT: "LOC + comments"
    LOC_COMMENTS: "Comment LOC"
    CONDITION_COUNT: "Conditional statements"
    CYCLOMATIC_COMPLEXITY: "McCabe complexity"
    CYCLOMATIC_DENSITY: "Complexity density"
    DECISION_COUNT: "Decision statements"
    DECISION_DENSITY: "Decision density"
    DESIGN_COMPLEXITY: "Design complexity"
    DESIGN_DENSITY: "Design density"
    EDGE_COUNT: "CFG edge count"
    ESSENTIAL_COMPLEXITY: "Essential complexity"
    ESSENTIAL_DENSITY: "Essential complexity density"
    LOC_EXECUTABLE: "Executable LOC"
    PARAMETER_COUNT: "Parameter count"
    GLOBAL_DATA_COMPLEXITY: "Global data complexity"
    GLOBAL_DATA_DENSITY: "Global data density"
    HALSTEAD_CONTENT: "Halstead content"
    HALSTEAD_DIFFICULTY: "Halstead difficulty"
    HALSTEAD_ERROR_EST: "Halstead error estimate"
    HALSTEAD_LENGTH: "Halstead length"
    HALSTEAD_LEVEL: "Halstead level"
    HALSTEAD_PROG_TIME: "Halstead programming time"
    HALSTEAD_VOLUME: "Halstead volume"
    MAINTENANCE_SEVERITY: "Maintenance severity"
    MULTIPLE_CONDITION_COUNT: "Multiple condition count"
    NODE_COUNT: "CFG node count"
    NORMALIZED_CYCLOMATIC_COMPLEXITY: "Normalized complexity"
    NUM_OPERANDS: "Operand count"
    NUM_OPERATORS: "Operator count"
    NUM_UNIQUE_OPERANDS: "Unique operands"
    NUM_UNIQUE_OPERATORS: "Unique operators"
    NUMBER_OF_LINES: "Total LOC"
    PERCENT_COMMENTS: "Comment percentage"
    LOC_TOTAL: "Total LOC"
    c: "Defect count"

0643_ar4:
  dataset_description: "AR (Aggregate) Metrics: ~200 samples and 14 attributes. Predict code quality issues."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0643_ar4.csv"
  feature_descriptions:
    total_loc: "Total LOC"
    blank_loc: "Blank LOC"
    comment_loc: "Comment LOC"
    code_and_comment_loc: "LOC + comments"
    executable_loc: "Executable LOC"
    unique_operands: "Unique operands count"
    unique_operators: "Unique operators count"
    total_operands: "Total operands"
    total_operators: "Total operators"
    halstead_vocabulary: "Halstead vocabulary size"
    halstead_length: "Halstead length"
    halstead_volume: "Halstead volume"
    halstead_difficulty: "Halstead difficulty"
    halstead_effort: "Halstead effort"
    halstead_error: "Halstead error estimate"
    halstead_program_time: "Halstead program time"
    branch_count: "Branch count"
    decision_count: "Decision count"
    call_pairs: "Call pair count"
    condition_count: "Conditional statements count"
    multiple_condition_count: "Multiple condition count"
    cyclomatic_complexity: "McCabe complexity"
    cyclomatic_density: "Cyclomatic complexity density"
    normalized_cyclomatic_complexity: "Normalized complexity"
    formal_parameters: "Number of parameters"
    defects: "Defect count"
0642_ar1:
  dataset_description: "AR (Aggregate) Metrics:  ~200 samples and 14 attributes. Predict code quality issues."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0642_ar1.csv"
  feature_descriptions:
    total_loc: "Total LOC"
    blank_loc: "Blank LOC"
    comment_loc: "Comment LOC"
    code_and_comment_loc: "LOC + comments"
    executable_loc: "Executable LOC"
    unique_operands: "Unique operands count"
    unique_operators: "Unique operators count"
    total_operands: "Total operands"
    total_operators: "Total operators"
    halstead_vocabulary: "Halstead vocabulary size"
    halstead_length: "Halstead length"
    halstead_volume: "Halstead volume"
    halstead_difficulty: "Halstead difficulty"
    halstead_effort: "Halstead effort"
    halstead_error: "Halstead error estimate"
    halstead_program_time: "Halstead program time"
    branch_count: "Branch count"
    decision_count: "Decision count"
    call_pairs: "Call pair count"
    condition_count: "Conditional statements count"
    multiple_condition_count: "Multiple condition count"
    cyclomatic_complexity: "McCabe complexity"
    cyclomatic_density: "Cyclomatic complexity density"
    normalized_cyclomatic_complexity: "Normalized complexity"
    formal_parameters: "Number of parameters"
    defects: "Defect count"
0644_kc2:
  dataset_description: "KC2: Software code metrics for the KC2 dataset. ~1,000 samples and 23 attributes. Predicts defect proneness."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0644_kc2.csv"
  feature_descriptions:
    loc: "Total lines of code"
    v(g): "Cyclomatic complexity"
    ev(g): "Essential complexity"
    iv(g): "Inherited complexity"
    n: "Number of nodes"
    v: "Number of edges"
    l: "Number of loops"
    d: "Number of decision points"
    i: "Number of interfaces"
    e: "Number of errors"
    b: "Number of basic blocks"
    t: "Number of calls to other modules"
    IOCode: "I/O code LOC"
    IOComment: "I/O comment LOC"
    IOBlank: "I/O blank LOC"
    IOCodeAndComment: "I/O code + comments LOC"
    uniq_Op: "Unique operators"
    uniq_Opnd: "Unique operands"
    total_Op: "Total operators"
    total_Opnd: "Total operands"
    branchCount: "Branch count"
    problems: "Problem count (target)"
0645_ar6:
  dataset_description: "AR6: Aggregate code metrics dataset version 6. ~200 samples and 24 attributes. Predicts defect count."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0645_ar6.csv"
  feature_descriptions:
    total_loc: "Total lines of code"
    blank_loc: "Blank lines of code"
    comment_loc: "Lines of comments"
    code_and_comment_loc: "LOC including comments"
    executable_loc: "Executable lines of code"
    unique_operands: "Number of unique operands"
    unique_operators: "Number of unique operators"
    total_operands: "Total operands count"
    total_operators: "Total operators count"
    halstead_vocabulary: "Halstead vocabulary size"
    halstead_length: "Halstead program length"
    halstead_volume: "Halstead volume"
    halstead_level: "Halstead level"
    halstead_difficulty: "Halstead difficulty"
    halstead_effort: "Halstead effort"
    halstead_error: "Halstead error estimate"
    halstead_time: "Halstead programming time"
    branch_count: "Number of branches"
    decision_count: "Number of decision points"
    call_pairs: "Number of call pairs"
    condition_count: "Number of conditional statements"
    multiple_condition_count: "Modified condition count"
    cyclomatic_complexity: "McCabe cyclomatic complexity"
    cyclomatic_density: "Cyclomatic complexity per LOC"
    normalized_cyclomatic_complexity: "Normalized McCabe complexity"
    formal_parameters: "Number of parameters"
    defects: "Actual defect count"

0646_kc3:
  dataset_description: "KC3: Software metrics dataset for the KC3 project. ~1,000 samples and 30+ attributes. Predicts defect count."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0646_kc3.csv"
  feature_descriptions:
    LOC_BLANK: "Blank lines of code"
    BRANCH_COUNT: "Branch count"
    CALL_PAIRS: "Call pair count"
    LOC_CODE_AND_COMMENT: "LOC including comments"
    LOC_COMMENTS: "Comment LOC"
    CONDITION_COUNT: "Number of conditionals"
    CYCLOMATIC_COMPLEXITY: "McCabe complexity"
    CYCLOMATIC_DENSITY: "Complexity density"
    DECISION_COUNT: "Decision statements count"
    DECISION_DENSITY: "Decision density"
    DESIGN_COMPLEXITY: "Design complexity"
    DESIGN_DENSITY: "Design density"
    EDGE_COUNT: "Control-flow graph edges"
    ESSENTIAL_COMPLEXITY: "Essential complexity"
    ESSENTIAL_DENSITY: "Essential density"
    LOC_EXECUTABLE: "Executable LOC"
    PARAMETER_COUNT: "Parameters count"
    GLOBAL_DATA_COMPLEXITY: "Global data complexity"
    GLOBAL_DATA_DENSITY: "Global data density"
    HALSTEAD_CONTENT: "Halstead content"
    HALSTEAD_DIFFICULTY: "Halstead difficulty"
    HALSTEAD_EFFORT: "Halstead effort"
    HALSTEAD_ERROR_EST: "Halstead error estimate"
    HALSTEAD_LENGTH: "Halstead length"
    HALSTEAD_LEVEL: "Halstead level"
    HALSTEAD_PROG_TIME: "Halstead programming time"
    HALSTEAD_VOLUME: "Halstead volume"
    MAINTENANCE_SEVERITY: "Maintenance severity estimate"
    MODIFIED_CONDITION_COUNT: "Modified condition count"
    MULTIPLE_CONDITION_COUNT: "Multiple condition count"
    NODE_COUNT: "Control-flow graph nodes"
    NORMALIZED_CYCLOMATIC_COMPLEXITY: "Normalized complexity"
    NUM_OPERANDS: "Operand count"
    NUM_OPERATORS: "Operator count"
    NUM_UNIQUE_OPERANDS: "Unique operands"
    NUM_UNIQUE_OPERATORS: "Unique operators"
    NUMBER_OF_LINES: "Total LOC"
    PERCENT_COMMENTS: "Comment percentage"
    LOC_TOTAL: "Total LOC (including blanks, comments, code)"
    c: "Defect count"

0647_kc1-binary:
  dataset_description: "KC1: Software metrics dataset for KC1 project. ~1,000 samples and 74 attributes. Predicts defects (binary version)."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0647_kc1-binary.csv"
  feature_descriptions:
    PERCENT_PUB_DATA: "Percentage of public data fields"
    ACCESS_TO_PUB_DATA: "Number of accesses to public data fields"
    COUPLING_BETWEEN_OBJECTS: "Coupling count between classes"
    DEPTH: "Inheritance tree depth"
    LACK_OF_COHESION_OF_METHODS: "Lack of cohesion of methods metric"
    NUM_OF_CHILDREN: "Number of immediate subclasses"
    DEP_ON_CHILD: "Dependencies on child classes"
    FAN_IN: "Number of components calling this module"
    RESPONSE_FOR_CLASS: "Response-for-class metric"
    WEIGHTED_METHODS_PER_CLASS: "Weighted Methods per Class metric"
    minLOC_BLANK: "Minimum blank lines of code"
    minBRANCH_COUNT: "Minimum branch count"
    minLOC_CODE_AND_COMMENT: "Minimum LOC including comments"
    minLOC_COMMENTS: "Minimum comment LOC"
    minCYCLOMATIC_COMPLEXITY: "Minimum cyclomatic complexity"
    minDESIGN_COMPLEXITY: "Minimum design complexity"
    minESSENTIAL_COMPLEXITY: "Minimum essential complexity"
    minLOC_EXECUTABLE: "Minimum executable LOC"
    minHALSTEAD_CONTENT: "Minimum Halstead content"
    minHALSTEAD_DIFFICULTY: "Minimum Halstead difficulty"
    minHALSTEAD_EFFORT: "Minimum Halstead effort"
    minHALSTEAD_ERROR_EST: "Minimum Halstead error estimate"
    minHALSTEAD_LENGTH: "Minimum Halstead program length"
    minHALSTEAD_LEVEL: "Minimum Halstead level"
    minHALSTEAD_PROG_TIME: "Minimum Halstead programming time"
    minHALSTEAD_VOLUME: "Minimum Halstead volume"
    minNUM_OPERANDS: "Minimum number of operands"
    minNUM_OPERATORS: "Minimum number of operators"
    minNUM_UNIQUE_OPERANDS: "Minimum unique operands count"
    minNUM_UNIQUE_OPERATORS: "Minimum unique operators count"
    minLOC_TOTAL: "Minimum total lines of code"
    maxLOC_BLANK: "Maximum blank lines of code"
    maxBRANCH_COUNT: "Maximum branch count"
    maxLOC_CODE_AND_COMMENT: "Maximum LOC including comments"
    maxLOC_COMMENTS: "Maximum comment LOC"
    maxCYCLOMATIC_COMPLEXITY: "Maximum cyclomatic complexity"
    maxDESIGN_COMPLEXITY: "Maximum design complexity"
    maxESSENTIAL_COMPLEXITY: "Maximum essential complexity"
    maxLOC_EXECUTABLE: "Maximum executable LOC"
    maxHALSTEAD_CONTENT: "Maximum Halstead content"
    maxHALSTEAD_DIFFICULTY: "Maximum Halstead difficulty"
    maxHALSTEAD_EFFORT: "Maximum Halstead effort"
    maxHALSTEAD_ERROR_EST: "Maximum Halstead error estimate"
    maxHALSTEAD_LENGTH: "Maximum Halstead program length"
    maxHALSTEAD_LEVEL: "Maximum Halstead level"
    maxHALSTEAD_PROG_TIME: "Maximum Halstead programming time"
    maxHALSTEAD_VOLUME: "Maximum Halstead volume"
    maxNUM_OPERANDS: "Maximum number of operands"
    maxNUM_OPERATORS: "Maximum number of operators"
    maxNUM_UNIQUE_OPERANDS: "Maximum unique operands count"
    maxNUM_UNIQUE_OPERATORS: "Maximum unique operators count"
    maxLOC_TOTAL: "Maximum total lines of code"
    avgLOC_BLANK: "Average blank lines of code"
    avgBRANCH_COUNT: "Average branch count"
    avgLOC_CODE_AND_COMMENT: "Average LOC including comments"
    avgLOC_COMMENTS: "Average comment LOC"
    avgCYCLOMATIC_COMPLEXITY: "Average cyclomatic complexity"
    avgDESIGN_COMPLEXITY: "Average design complexity"
    avgESSENTIAL_COMPLEXITY: "Average essential complexity"
    avgLOC_EXECUTABLE: "Average executable LOC"
    avgHALSTEAD_CONTENT: "Average Halstead content"
    avgHALSTEAD_DIFFICULTY: "Average Halstead difficulty"
    avgHALSTEAD_EFFORT: "Average Halstead effort"
    avgHALSTEAD_ERROR_EST: "Average Halstead error estimate"
    avgHALSTEAD_LENGTH: "Average Halstead program length"
    avgHALSTEAD_LEVEL: "Average Halstead level"
    avgHALSTEAD_PROG_TIME: "Average Halstead programming time"
    avgHALSTEAD_VOLUME: "Average Halstead volume"
    avgNUM_OPERANDS: "Average number of operands"
    avgNUM_OPERATORS: "Average number of operators"
    avgNUM_UNIQUE_OPERANDS: "Average unique operands count"
    avgNUM_UNIQUE_OPERATORS: "Average unique operators count"
    avgLOC_TOTAL: "Average total lines of code"
    sumLOC_BLANK: "Sum of blank lines of code"
    sumBRANCH_COUNT: "Sum of branch counts"
    sumLOC_CODE_AND_COMMENT: "Sum of LOC including comments"
    sumLOC_COMMENTS: "Sum of comment LOC"
    sumCYCLOMATIC_COMPLEXITY: "Sum of cyclomatic complexity"
    sumDESIGN_COMPLEXITY: "Sum of design complexity"
    sumESSENTIAL_COMPLEXITY: "Sum of essential complexity"
    sumLOC_EXECUTABLE: "Sum of executable LOC"
    sumHALSTEAD_CONTENT: "Sum of Halstead content"
    sumHALSTEAD_DIFFICULTY: "Sum of Halstead difficulty"
    sumHALSTEAD_EFFORT: "Sum of Halstead effort"
    sumHALSTEAD_ERROR_EST: "Sum of Halstead error estimate"
    sumHALSTEAD_LENGTH: "Sum of Halstead program length"
    sumHALSTEAD_LEVEL: "Sum of Halstead level"
    sumHALSTEAD_PROG_TIME: "Sum of Halstead programming time"
    sumHALSTEAD_VOLUME: "Sum of Halstead volume"
    sumNUM_OPERANDS: "Sum of operands"
    sumNUM_OPERATORS: "Sum of operators"
    sumNUM_UNIQUE_OPERANDS: "Sum of unique operands"
    sumNUM_UNIQUE_OPERATORS: "Sum of unique operators"
    sumLOC_TOTAL: "Sum of total lines of code"
    DL: "Defect likelihood estimate"

0648_kc1:
  dataset_description: "KC1: Software metrics dataset: ~1,000 samples and 74 attributes. Predicts defects (count or binary)."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0648_kc1.csv"
  feature_descriptions:
    PERCENT_PUB_DATA: "Percentage of public data fields"
    ACCESS_TO_PUB_DATA: "Number of accesses to public data fields"
    COUPLING_BETWEEN_OBJECTS: "Coupling count between classes"
    DEPTH: "Inheritance tree depth"
    LACK_OF_COHESION_OF_METHODS: "Lack of cohesion of methods metric"
    NUM_OF_CHILDREN: "Number of immediate subclasses"
    DEP_ON_CHILD: "Dependencies on child classes"
    FAN_IN: "Number of components calling this module"
    RESPONSE_FOR_CLASS: "Response-for-class metric"
    WEIGHTED_METHODS_PER_CLASS: "Weighted Methods per Class metric"
    minLOC_BLANK: "Minimum blank lines of code"
    minBRANCH_COUNT: "Minimum branch count"
    minLOC_CODE_AND_COMMENT: "Minimum LOC including comments"
    minLOC_COMMENTS: "Minimum comment LOC"
    minCYCLOMATIC_COMPLEXITY: "Minimum cyclomatic complexity"
    minDESIGN_COMPLEXITY: "Minimum design complexity"
    minESSENTIAL_COMPLEXITY: "Minimum essential complexity"
    minLOC_EXECUTABLE: "Minimum executable LOC"
    minHALSTEAD_CONTENT: "Minimum Halstead content"
    minHALSTEAD_DIFFICULTY: "Minimum Halstead difficulty"
    minHALSTEAD_EFFORT: "Minimum Halstead effort"
    minHALSTEAD_ERROR_EST: "Minimum Halstead error estimate"
    minHALSTEAD_LENGTH: "Minimum Halstead program length"
    minHALSTEAD_LEVEL: "Minimum Halstead level"
    minHALSTEAD_PROG_TIME: "Minimum Halstead programming time"
    minHALSTEAD_VOLUME: "Minimum Halstead volume"
    minNUM_OPERANDS: "Minimum number of operands"
    minNUM_OPERATORS: "Minimum number of operators"
    minNUM_UNIQUE_OPERANDS: "Minimum unique operands count"
    minNUM_UNIQUE_OPERATORS: "Minimum unique operators count"
    minLOC_TOTAL: "Minimum total lines of code"
    maxLOC_BLANK: "Maximum blank lines of code"
    maxBRANCH_COUNT: "Maximum branch count"
    maxLOC_CODE_AND_COMMENT: "Maximum LOC including comments"
    maxLOC_COMMENTS: "Maximum comment LOC"
    maxCYCLOMATIC_COMPLEXITY: "Maximum cyclomatic complexity"
    maxDESIGN_COMPLEXITY: "Maximum design complexity"
    maxESSENTIAL_COMPLEXITY: "Maximum essential complexity"
    maxLOC_EXECUTABLE: "Maximum executable LOC"
    maxHALSTEAD_CONTENT: "Maximum Halstead content"
    maxHALSTEAD_DIFFICULTY: "Maximum Halstead difficulty"
    maxHALSTEAD_EFFORT: "Maximum Halstead effort"
    maxHALSTEAD_ERROR_EST: "Maximum Halstead error estimate"
    maxHALSTEAD_LENGTH: "Maximum Halstead program length"
    maxHALSTEAD_LEVEL: "Maximum Halstead level"
    maxHALSTEAD_PROG_TIME: "Maximum Halstead programming time"
    maxHALSTEAD_VOLUME: "Maximum Halstead volume"
    maxNUM_OPERANDS: "Maximum number of operands"
    maxNUM_OPERATORS: "Maximum number of operators"
    maxNUM_UNIQUE_OPERANDS: "Maximum unique operands count"
    maxNUM_UNIQUE_OPERATORS: "Maximum unique operators count"
    maxLOC_TOTAL: "Maximum total lines of code"
    avgLOC_BLANK: "Average blank lines of code"
    avgBRANCH_COUNT: "Average branch count"
    avgLOC_CODE_AND_COMMENT: "Average LOC including comments"
    avgLOC_COMMENTS: "Average comment LOC"
    avgCYCLOMATIC_COMPLEXITY: "Average cyclomatic complexity"
    avgDESIGN_COMPLEXITY: "Average design complexity"
    avgESSENTIAL_COMPLEXITY: "Average essential complexity"
    avgLOC_EXECUTABLE: "Average executable LOC"
    avgHALSTEAD_CONTENT: "Average Halstead content"
    avgHALSTEAD_DIFFICULTY: "Average Halstead difficulty"
    avgHALSTEAD_EFFORT: "Average Halstead effort"
    avgHALSTEAD_ERROR_EST: "Average Halstead error estimate"
    avgHALSTEAD_LENGTH: "Average Halstead program length"
    avgHALSTEAD_LEVEL: "Average Halstead level"
    avgHALSTEAD_PROG_TIME: "Average Halstead programming time"
    avgHALSTEAD_VOLUME: "Average Halstead volume"
    avgNUM_OPERANDS: "Average number of operands"
    avgNUM_OPERATORS: "Average number of operators"
    avgNUM_UNIQUE_OPERANDS: "Average unique operands count"
    avgNUM_UNIQUE_OPERATORS: "Average unique operators count"
    avgLOC_TOTAL: "Average total lines of code"
    sumLOC_BLANK: "Sum of blank lines of code"
    sumBRANCH_COUNT: "Sum of branch counts"
    sumLOC_CODE_AND_COMMENT: "Sum of LOC including comments"
    sumLOC_COMMENTS: "Sum of comment LOC"
    sumCYCLOMATIC_COMPLEXITY: "Sum of cyclomatic complexity"
    sumDESIGN_COMPLEXITY: "Sum of design complexity"
    sumESSENTIAL_COMPLEXITY: "Sum of essential complexity"
    sumLOC_EXECUTABLE: "Sum of executable LOC"
    sumHALSTEAD_CONTENT: "Sum of Halstead content"
    sumHALSTEAD_DIFFICULTY: "Sum of Halstead difficulty"
    sumHALSTEAD_EFFORT: "Sum of Halstead effort"
    sumHALSTEAD_ERROR_EST: "Sum of Halstead error estimate"
    sumHALSTEAD_LENGTH: "Sum of Halstead program length"
    sumHALSTEAD_LEVEL: "Sum of Halstead level"
    sumHALSTEAD_PROG_TIME: "Sum of Halstead programming time"
    sumHALSTEAD_VOLUME: "Sum of Halstead volume"
    sumNUM_OPERANDS: "Sum of operands"
    sumNUM_OPERATORS: "Sum of operators"
    sumNUM_UNIQUE_OPERANDS: "Sum of unique operands"
    sumNUM_UNIQUE_OPERATORS: "Sum of unique operators"
    sumLOC_TOTAL: "Sum of total lines of code"
    DL: "Defect likelihood estimate"

0650_pc2:
  dataset_description: "PC1/PC2: NASA software metrics datasets. ~1,000–2,000 samples and 27 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0650_pc2.csv"
  feature_descriptions:  
    BRANCH_COUNT: "Branch count"
    CALL_PAIRS: "Call pair count"
    LOC_CODE_AND_COMMENT: "LOC including comments"
    LOC_COMMENTS: "Comment LOC"
    CONDITION_COUNT: "Condition statement count"
    CYCLOMATIC_COMPLEXITY: "McCabe complexity"
    CYCLOMATIC_DENSITY: "Complexity density"
    DECISION_COUNT: "Decision point count"
    DECISION_DENSITY: "Decision density"
    DESIGN_COMPLEXITY: "Design complexity"
    DESIGN_DENSITY: "Design density"
    EDGE_COUNT: "CFG edges count"
    ESSENTIAL_COMPLEXITY: "Essential complexity"
    ESSENTIAL_DENSITY: "Essential density"
    LOC_EXECUTABLE: "Executable LOC"
    PARAMETER_COUNT: "Parameter count"
    HALSTEAD_CONTENT: "Halstead content"
    HALSTEAD_DIFFICULTY: "Halstead difficulty"
    HALSTEAD_EFFORT: "Halstead effort"
    HALSTEAD_ERROR_EST: "Halstead error estimate"
    HALSTEAD_LENGTH: "Halstead length"
    HALSTEAD_LEVEL: "Halstead level"
    HALSTEAD_PROG_TIME: "Halstead programming time"
    HALSTEAD_VOLUME: "Halstead volume"
    MAINTENANCE_SEVERITY: "Maintenance severity"
    MULTIPLE_CONDITION_COUNT: "Multiple condition count"
    NODE_COUNT: "CFG node count"
    NORMALIZED_CYCLOMATIC_COMPLEXITY: "Normalized complexity"
    NUM_OPERANDS: "Operand count"
    NUM_OPERATORS: "Operator count"
    NUM_UNIQUE_OPERANDS: "Unique operands"
    NUM_UNIQUE_OPERATORS: "Unique operators"
    NUMBER_OF_LINES: "Total LOC"
    PERCENT_COMMENTS: "Comment percentage"
    LOC_TOTAL: "Total LOC"
    c: "Defect count"
0649_pc1:
  dataset_description: "PC1/PC2: NASA software metrics datasets. ~1,000–2,000 samples and 27 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0649_pc1.csv"
  feature_descriptions:  
    BRANCH_COUNT: "Branch count"
    CALL_PAIRS: "Call pair count"
    LOC_CODE_AND_COMMENT: "LOC including comments"
    LOC_COMMENTS: "Comment LOC"
    CONDITION_COUNT: "Condition statement count"
    CYCLOMATIC_COMPLEXITY: "McCabe complexity"
    CYCLOMATIC_DENSITY: "Complexity density"
    DECISION_COUNT: "Decision point count"
    DECISION_DENSITY: "Decision density"
    DESIGN_COMPLEXITY: "Design complexity"
    DESIGN_DENSITY: "Design density"
    EDGE_COUNT: "CFG edges count"
    ESSENTIAL_COMPLEXITY: "Essential complexity"
    ESSENTIAL_DENSITY: "Essential density"
    LOC_EXECUTABLE: "Executable LOC"
    PARAMETER_COUNT: "Parameter count"
    HALSTEAD_CONTENT: "Halstead content"
    HALSTEAD_DIFFICULTY: "Halstead difficulty"
    HALSTEAD_EFFORT: "Halstead effort"
    HALSTEAD_ERROR_EST: "Halstead error estimate"
    HALSTEAD_LENGTH: "Halstead length"
    HALSTEAD_LEVEL: "Halstead level"
    HALSTEAD_PROG_TIME: "Halstead programming time"
    HALSTEAD_VOLUME: "Halstead volume"
    MAINTENANCE_SEVERITY: "Maintenance severity"
    MULTIPLE_CONDITION_COUNT: "Multiple condition count"
    NODE_COUNT: "CFG node count"
    NORMALIZED_CYCLOMATIC_COMPLEXITY: "Normalized complexity"
    NUM_OPERANDS: "Operand count"
    NUM_OPERATORS: "Operator count"
    NUM_UNIQUE_OPERANDS: "Unique operands"
    NUM_UNIQUE_OPERATORS: "Unique operators"
    NUMBER_OF_LINES: "Total LOC"
    PERCENT_COMMENTS: "Comment percentage"
    LOC_TOTAL: "Total LOC"
    c: "Defect count"

0651_kc1-numeric.csv:
  dataset_description: "KC1-Numeric: Numeric-only variant of the KC1 code metrics dataset for software defect prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0651_kc1-numeric.csv"
  feature_descriptions:
    PERCENT_PUB_DATA: "Percentage of public data fields"
    ACCESS_TO_PUB_DATA: "Number of accesses to public data fields"
    BRANCHES: "Number of branches in control flow"
    CALL_PAIRS: "Number of call pairs (caller-callee)"
    LOC_COMMENTS: "Lines of comments"
    CONDITION_COUNT: "Number of conditional statements"
    CYCLOMATIC_COMPLEXITY: "McCabe's cyclomatic complexity"
    DECISION_COUNT: "Number of decision points"
    EDGE_COUNT: "Number of edges in control flow graph"
    ESSENTIAL_COMPLEXITY: "Essential complexity metric"
    LOC_EXECUTABLE: "Lines of executable code"
    PARAMETER_COUNT: "Number of parameters to functions"
    HALSTEAD_CONTENT: "Halstead content metric"
    HALSTEAD_DIFFICULTY: "Halstead difficulty metric"
    HALSTEAD_EFFORT: "Halstead effort metric"
    HALSTEAD_ERROR_EST: "Halstead error estimate"
    HALSTEAD_LENGTH: "Halstead length metric"
    HALSTEAD_LEVEL: "Halstead level metric"
    HALSTEAD_PROG_TIME: "Halstead programming time estimate"
    HALSTEAD_VOLUME: "Halstead volume metric"
    MAINTENANCE_SEVERITY: "Maintenance severity indicator"
    MODIFIED_CONDITION_COUNT: "Modified condition count"
    MULTIPLE_CONDITION_COUNT: "Multiple condition count"
    NODE_COUNT: "Number of nodes in control flow graph"
    NORMALIZED_CYCLOMATIC_COMPLEXITY: "Normalized cyclomatic complexity"
    NUM_OPERANDS: "Number of operands"
    NUM_OPERATORS: "Number of operators"
    NUM_UNIQUE_OPERANDS: "Number of unique operands"
    NUM_UNIQUE_OPERATORS: "Number of unique operators"
    NUMBER_OF_LINES: "Total number of lines"
    PATHOLOGICAL_COMPLEXITY: "Pathological complexity metric"
    LOC_TOTAL: "Total lines of code"
    DL: "Defect likelihood estimate"

0652_w1.csv:
  dataset_description: "W1: Software metrics dataset W1 with same schema as PC1/PC2. Used to predict defect count."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0652_mw1.csv"
  feature_descriptions:
    BRANCH_COUNT: "Branch count"
    CALL_PAIRS: "Call pair count"
    CONDITION_COUNT: "Number of conditional statements"
    CYCLOMATIC_COMPLEXITY: "McCabe's cyclomatic complexity"
    DECISION_COUNT: "Number of decision points"
    DESIGN_COMPLEXITY: "Design complexity metric"
    EDGE_COUNT: "Number of edges in control flow graph"
    ESSENTIAL_COMPLEXITY: "Essential complexity metric"
    LOC_BLANK: "Blank lines count"
    LOC_CODE_AND_COMMENT: "Lines containing both code and comments"
    LOC_COMMENTS: "Lines of comments"
    LOC_EXECUTABLE: "Lines of executable code"
    NUMBER_OF_LINES: "Total number of lines"
    HALSTEAD_CONTENT: "Halstead content metric"
    HALSTEAD_DIFFICULTY: "Halstead difficulty metric"
    HALSTEAD_EFFORT: "Halstead effort metric"
    HALSTEAD_ERROR_EST: "Halstead error estimate"
    HALSTEAD_LENGTH: "Halstead length metric"
    HALSTEAD_LEVEL: "Halstead level metric"
    HALSTEAD_PROG_TIME: "Halstead programming time estimate"
    HALSTEAD_VOLUME: "Halstead volume metric"
    NODE_COUNT: "Number of nodes in control flow graph"
    NORMALIZED_CYCLOMATIC_COMPLEXITY: "Normalized cyclomatic complexity"
    NUM_OPERANDS: "Number of operands"
    NUM_OPERATORS: "Number of operators"
    NUM_UNIQUE_OPERANDS: "Number of unique operands"
    NUM_UNIQUE_OPERATORS: "Number of unique operators"
    PARAMETER_COUNT: "Number of parameters to functions"
    GLOBAL_DATA_COMPLEXITY: "Global data complexity metric"
    GLOBAL_DATA_DENSITY: "Global data density metric"
    MAINTENANCE_SEVERITY: "Maintenance severity indicator"
    MODIFIED_CONDITION_COUNT: "Modified condition count"
    MULTIPLE_CONDITION_COUNT: "Multiple condition count"
    PATHOLOGICAL_COMPLEXITY: "Pathological complexity metric"
    PERCENT_COMMENTS: "Percentage of comments"
    LOC_TOTAL: "Total LOC"
    c: "Defect count"

0654_datatrieve.csv:
  dataset_description: "DataTrieve: Code metrics for DataTrieve system. ~300 samples and 9 attributes. Predicts fault count."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0654_datatrieve.csv"
  feature_descriptions:
    LOC6_0: "LOC in category 6_0"
    LOC6_1: "LOC in category 6_1"
    Added_LOC: "Added LOC since last version"
    Del_LOC: "Deleted LOC since last version"
    Diff_Block: "Number of diff blocks"
    Mod_Rate: "Modification rate"
    Mod_Know: "Modifier knowledge metric"
    ReusedLOC: "Reused LOC from other modules"
    Faulty6_1: "Number of faults in category 6_1"

0656_EgyptianSkulls.csv:
  dataset_description: "EgyptianSkulls: Archaeological dataset of skull measurements. ~150 samples and 6 attributes. Regression on skull metrics."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0656_EgyptianSkulls.csv"
  feature_descriptions:
    MB: "Maximum breadth"
    BH: "Basibregmatic height"
    BL: "Basialveolar length"
    NH: "Nasion–inion height"
    Year: "Excavation year"
0657_PopularKids:
  dataset_description: "PopularKids: Survey of adolescent popularity with demographic and interest features. ~200 samples and 10 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0657_PopularKids.csv"
  feature_descriptions:
    Gender: "Respondent gender"
    Grade: "School grade level"
    Age: "Respondent age"
    Race: "Respondent race"
    Urban/Rural: "Urban (1) or rural (0) school"
    School: "School identifier"
    Grades: "Self-reported academic grades"
    Sports: "Interest in sports (1=yes; 0=no)"
    Looks: "Self-rated attractiveness"
    Money: "Self-rated socioeconomic status"
    Goals: "Personal goal orientation"

0657_SPECTF:
  dataset_description: "SPECTF: Cardiac SPECT imaging dataset for heart defect diagnosis. 267 samples and 45 binary-image features plus diagnosis."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0657_PopularKids.csv"
  feature_descriptions:
    F1R–F18R: "Intensity feature F1–F18, right ventricle regions"
    F1S–F18S: "Intensity feature F1–F18, septal regions"
    OVERALL_DIAGNOSIS: "Diagnosis label (normal/abnormal)"

0658_KDDCup99:
  dataset_description: "KDDCup99: Network intrusion detection dataset. ~4,900,000 samples and 42 attributes plus attack label."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0658_KDDCup99.csv"
  feature_descriptions:
    duration: "Connection duration (seconds)"
    protocol_type: "Protocol type (tcp/udp/icmp)"
    service: "Network service on destination"
    flag: "Connection status flag"
    src_bytes: "Bytes from source to destination"
    dst_bytes: "Bytes from destination to source"
    land: "Land attack flag"
    wrong_fragment: "Number of wrong fragments"
    urgent: "Urgent packets count"
    hot: "Hot indicators count"
    num_failed_logins: "Number of failed login attempts"
    logged_in: "User login status (1=yes; 0=no)"
    num_compromised: "Number of compromised conditions"
    root_shell: "Root shell obtained flag"
    su_attempted: "Superuser attempted flag"
    num_root: "Number of root accesses"
    num_file_creations: "Number of file creation operations"
    num_shells: "Number of shell prompts"
    num_access_files: "Number of operations on access control files"
    num_outbound_cmds: "Number of outbound commands"
    is_host_login: "Is host login flag"
    is_guest_login: "Is guest login flag"
    count: "Number of connections to same host in past two seconds"
    srv_count: "Number of connections to same service"
    serror_rate: "SYN error rate"
    srv_serror_rate: "SYN error rate for same service"
    rerror_rate: "REJ error rate"
    srv_rerror_rate: "REJ error rate for same service"
    same_srv_rate: "Same service rate"
    diff_srv_rate: "Different service rate"
    srv_diff_host_rate: "Different host service rate"
    dst_host_count: "Connections to same destination host"
    dst_host_srv_count: "Connections to same service on destination host"
    dst_host_same_srv_rate: "Same service rate on destination host"
    dst_host_diff_srv_rate: "Different service rate on destination host"
    dst_host_same_src_port_rate: "Same source port rate on destination host"
    dst_host_srv_diff_host_rate: "Different host rate on same service"
    dst_host_serror_rate: "SYN error rate on destination host"
    dst_host_srv_serror_rate: "SYN error rate for same service on destination host"
    dst_host_rerror_rate: "REJ error rate on destination host"
    dst_host_srv_rerror_rate: "REJ error rate for same service on destination host"
    label: "Attack type label"

0659_teachingAssistant:
  dataset_description: "TeachingAssistant: Effectiveness of teaching assistants in classes. ~150 samples and 4 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0659_teachingAssistant.csv"
  feature_descriptions:
    EnglishSpeaker: "TA native English speaker flag"
    courseInstructor: "Instructor identifier"
    course: "Course identifier"
    summer: "Summer term flag (1=yes; 0=no)"
    classSize: "Class size"
    class: "TA effectiveness rating"

0661_adult_census:
  dataset_description: "Adult Census: U.S. census data for income prediction. ~48,000 samples and 15 demographic attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0661_adult-census.csv"
  feature_descriptions:
    age: "Age in years"
    workclass: "Employment category"
    fnlwgt: "Final sample weight"
    education: "Education level"
    education-num: "Years of education"
    marital-status: "Marital status"
    occupation: "Occupation category"
    relationship: "Relationship status"
    race: "Race category"
    sex: "Gender"
    capital-gain: "Capital gains"
    capital-loss: "Capital losses"
    hours-per-week: "Work hours per week"
    native-country: "Country of origin"
    class: "Income class label (>50K / <=50K)"

0663_badges2:
  dataset_description: "Badges2: Textual badge frequency features for authorship attribution. ~1,000 samples and 12 character-based attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0663_badges2.csv"
  feature_descriptions:
    length: "Document length (characters)"
    even_odd: "Ratio of even to odd length tokens"
    first_char_vowel: "First character is vowel flag"
    second_char_vowel: "Second character is vowel flag"
    vowels: "Total vowel count"
    consonants: "Total consonant count"
    vowel_consonant_ratio: "Vowel/consonant ratio"
    spaces: "Space count"
    dots: "Dot count"
    words: "Word count"
    class: "Author label"

0664_parkinsons_telem:
  dataset_description: "Parkinsons-Telem: Telemonitoring voice measures for Parkinson’s disease. ~5,875 samples and 6+ attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0664_parkinsons-telemonitoring.csv"
  feature_descriptions:
    subject: "Subject identifier"
    age: "Subject age"
    sex: "Subject sex (M/F)"
    test_time: "Time of test (seconds)"
    motor_UPDRS: "Motor UPDRS score"
    total_UPDRS: "Total UPDRS score"
    Jitter...: "Various jitter measures"
    Shimmer...: "Various shimmer measures"
    NHR: "Noise-to-harmonics ratio"
    HNR: "Harmonics-to-noise ratio"
    RPDE: "Recurrence period density entropy"
    DFA: "Detrended fluctuation analysis"
    PPE: "Pitch period entropy"

0664_pc1_req:
  dataset_description: "PC1_req: Requirement documents metrics for defect tracking. ~500 samples and 9 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0664_parkinsons-telemonitoring.csv"
  feature_descriptions:
    ACTION: "Action verb count"
    CONDITIONAL: "Conditional statement count"
    CONTINUANCE: "Continuance phrase count"
    IMPERATIVE: "Imperative sentence count"
    OPTION: "Option phrase count"
    RISK_LEVEL: "Risk-level mentions count"
    SOURCE: "Source attribution count"
    WEAK_PHRASE: "Weak phrase count"
    DEFECT: "Defect mentions count"

0666_BNG_primary_tumor:
  dataset_description: "Primary Tumor (BNG): Scaled UCI Primary Tumor dataset for multivariate classification. 339 samples and 17 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0666_BNG(primary-tumor).csv"
  feature_descriptions:
    age: "Patient age"
    sex: "Gender"
    histologic-type: "Tumor histology"
    degree-of-diff: "Tumor differentiation degree"
    bone: "Bone involvement"
    bone-marrow: "Bone marrow involvement"
    lung: "Lung involvement"
    pleura: "Pleural involvement"
    peritoneum: "Peritoneal involvement"
    liver: "Liver involvement"
    brain: "Brain involvement"
    skin: "Skin involvement"
    neck: "Neck involvement"
    supraclavicular: "Supraclavicular involvement"
    axillary: "Axillary involvement"
    mediastinum: "Mediastinal involvement"
    abdominal: "Abdominal involvement"
    class: "Tumor site label"

0668_BNG_solar_flare:
  dataset_description: "Solar-Flare (BNG): Solar flare prediction dataset from UCI with 11 attributes. ~100 samples."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0668_BNG(solar-flare).csv"
  feature_descriptions:
    class: "Flare occurrence class"
    largest_spot_size: "Largest sunspot size"
    spot_distribution: "Spot distribution measure"
    Activity: "Solar activity index"
    Evolution: "Evolutionary phase code"
    Previous_24_hour_flare_activity_code: "Flare activity code in the past 24h"
    Historically-complex: "Historical complexity flag"
    Did_region_become_historically_complex: "Complexity evolution flag"
    Area: "Sunspot region area"
    Area_of_the_largest_spot: "Area of largest spot"
    C-class_flares_production_by_this_region: "C-class flare count"
    M-class_flares_production_by_this_region: "M-class flare count"
    X-class_flares_production_by_this_region: "X-class flare count"

0671_BNG_adult:
  dataset_description: "Adult (BNG): Scaled UCI Adult Census dataset for income classification. 48,842 samples and 15 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0671_BNG(adult).csv"
  feature_descriptions:
    age: "Age in years"
    workclass: "Workclass category"
    fnlwgt: "Final weight"
    education: "Education level"
    education-num: "Years of education"
    marital-status: "Marital status"
    occupation: "Occupation category"
    relationship: "Relationship status"
    race: "Race category"
    sex: "Gender"
    capitalgain: "Capital gains"
    capitalloss: "Capital losses"
    hoursperweek: "Hours worked per week"
    native-country: "Country of origin"
    class: "Income class label"

0673_BNG_baseball:
  dataset_description: "Baseball (BNG): Scaled UCI Baseball Hall-of-Fame dataset. ~1,000 samples and 15 game performance attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0673_BNG(baseball).csv"
  feature_descriptions:
    Number_seasons: "Seasons played"
    Games_played: "Games played"
    At_bats: "At bats"
    Runs: "Runs scored"
    Hits: "Hits"
    Doubles: "Doubles"
    Triples: "Triples"
    Home_runs: "Home runs"
    RBIs: "Runs batted in"
    Walks: "Walks"
    Strikeouts: "Strikeouts"
    Batting_average: "Batting average"
    On_base_pct: "On-base percentage"
    Slugging_pct: "Slugging percentage"
    Fielding_ave: "Fielding average"
    Position: "Primary position"
    Hall_of_Fame: "Hall of Fame induction flag"

0674_BNG_wine:
  dataset_description: "Wine (BNG): Scaled UCI Wine dataset for cultivar classification. 178 samples and 14 chemical analysis attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0674_BNG(wine).csv"
  feature_descriptions:
    Alcohol: "Alcohol content (%)"
    Malic_acid: "Malic acid content (g/L)"
    Ash: "Ash content (g/L)"
    Alcalinity_of_ash: "Ash alcalinity (meq/L)"
    Magnesium: "Magnesium content (mg/L)"
    Total_phenols: "Total phenols (g/L)"
    Flavanoids: "Flavanoid content (g/L)"
    Nonflavanoid_phenols: "Nonflavanoid phenol content (g/L)"
    Proanthocyanins: "Proanthocyanins (g/L)"
    Color_intensity: "Color intensity"
    Hue: "Hue"
    OD280/OD315_of_diluted_wines: "OD280/OD315 ratio"
    Proline: "Proline content (mg/L)"
    class: "Cultivar label"

0675_BNG_eucalyptus:
  dataset_description: "Eucalyptus (BNG): Leaf measurement dataset for Eucalyptus species. ~80 samples and 15 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0675_TurkiyeStudentEvaluation.csv"
  feature_descriptions:
    Abbrev: "Species abbreviation"
    Rep: "Repetition number"
    Locality: "Collection locality"
    Map_Ref: "Map reference"
    Latitude: "Latitude coordinate"
    Altitude: "Altitude (m)"
    Rainfall: "Annual rainfall (mm)"
    Frosts: "Number of frosts"
    Year: "Collection year"
    Sp: "Species code"
    PMCno: "Pollen morphological class number"
    DBH: "Diameter at breast height (cm)"
    Ht: "Tree height (m)"
    Surv: "Survival status"
    Vig: "Vigor rating"
    Ins_res: "Insect resistance rating"
    Stem_Fm: "Stem form code"
    Crown_Fm: "Crown form code"
    Brnch_Fm: "Branching form code"
    Utility: "Utility value"

0675_TurkiyeStudentEvaluation:
  dataset_description: "TurkiyeStudentEvaluation: University teaching evaluation survey. ~500 samples and 29 questionnaire attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0675_TurkiyeStudentEvaluation.csv"
  feature_descriptions:
    instr: "Instructor identifier"
    class: "Course code"
    nb: "Number of batches"
    repeat: "Repeat course flag"
    attendance: "Attendance rate"
    difficulty: "Perceived difficulty"
    Q1–Q28: "Responses to questions 1 through 28 on evaluation survey"

0676_BNG_wisconsin:
  dataset_description: "Wisconsin Breast Cancer (BNG): Scaled UCI Breast Cancer Wisconsin dataset for tumor classification. 569 samples and 31 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0676_BNG(wisconsin).csv"
  feature_descriptions:
    lymph_node_status: "Lymph node status"
    radius_mean: "Mean radius"
    radius_se: "Radius standard error"
    radius_worst: "Worst radius"
    texture_mean: "Mean texture"
    texture_se: "Texture standard error"
    texture_worst: "Worst texture"
    perimeter_mean: "Mean perimeter"
    perimeter_se: "Perimeter standard error"
    perimeter_worst: "Worst perimeter"
    area_mean: "Mean area"
    area_se: "Area standard error"
    area_worst: "Worst area"
    smoothness_mean: "Mean smoothness"
    smoothness_se: "Smoothness standard error"
    smoothness_worst: "Worst smoothness"
    compactness_mean: "Mean compactness"
    compactness_se: "Compactness standard error"
    compactness_worst: "Worst compactness"
    concavity_mean: "Mean concavity"
    concavity_se: "Concavity standard error"
    concavity_worst: "Worst concavity"
    concave_points_mean: "Mean concave points"
    concave_points_se: "Concave points standard error"
    concave_points_worst: "Worst concave points"
    symmetry_mean: "Mean symmetry"
    symmetry_se: "Symmetry standard error"
    symmetry_worst: "Worst symmetry"
    fractal_dimension_mean: "Mean fractal dimension"
    fractal_dimension_se: "Fractal dimension standard error"
    fractal_dimension_worst: "Worst fractal dimension"
    tumor_size: "Tumor size (mm)"
    time: "Time to recurrence"
    class: "Diagnosis label (benign/malignant)"

0677_COMET_MC_SAMPLE:
  dataset_description: "COMET_MC_SAMPLE: Particle detector dataset with time-series energy readings. ~10,000 samples and 4 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0677_COMET_MC_SAMPLE.csv"
  feature_descriptions:
    event_id: "Event identifier"
    wire_id: "Wire channel identifier"
    energy_deposit: "Energy deposit measurement"
    relative_time: "Relative time of measurement"
    label: "Event class label"

0678_BNG_auto_price:
  dataset_description: "Auto Price (BNG): Scaled UCI Auto MPG regression as price prediction. ~398 samples and 16 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0678_BNG(auto_price).csv"
  feature_descriptions:
    symboling: "Insurance risk rating"
    normalized_losses: "Normalized losses"
    wheel-base: "Wheelbase (inches)"
    length: "Car length (inches)"
    width: "Car width (inches)"
    height: "Car height (inches)"
    curb-weight: "Curb weight (lbs)"
    engine-size: "Engine displacement (cc)"
    bore: "Cylinder bore (inches)"
    stroke: "Piston stroke (inches)"
    compression-ratio: "Compression ratio"
    horsepower: "Horsepower"
    peak-rpm: "Peak RPM"
    city-mpg: "City MPG"
    highway-mpg: "Highway MPG"
    price: "Market price (USD)"
0681_BNG_autoHorse:
  dataset_description: "AutoHorse (BNG): Extended automobile dataset combining price and performance with class label. ~1,000 samples and 26 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0681_BNG(autoHorse).csv"
  feature_descriptions:
    symboling: "Risk factor rating"
    normalized-losses: "Normalized losses"
    make: "Manufacturer name"
    fuel-type: "Fuel type"
    aspiration: "Aspiration type"
    num-of-doors: "Number of doors"
    body-style: "Body style"
    drive-wheels: "Drive wheels configuration"
    engine-location: "Engine location"
    wheel-base: "Wheelbase (inches)"
    length: "Length (inches)"
    width: "Width (inches)"
    height: "Height (inches)"
    curb-weight: "Curb weight (lbs)"
    engine-type: "Engine type"
    num-of-cylinders: "Number of cylinders"
    engine-size: "Engine displacement (cc)"
    fuel-system: "Fuel system"
    bore: "Cylinder bore (inches)"
    stroke: "Stroke (inches)"
    compression-ratio: "Compression ratio"
    horsepower: "Horsepower"
    peak-rpm: "Peak RPM"
    city-mpg: "City fuel economy"
    highway-mpg: "Highway fuel economy"
    price: "Market price (USD)"
    class: "Price class label"

0682_BNG_lowbwt:
  dataset_description: "Low Birthweight (BNG): BNG-scaled neonatal dataset for low birth weight classification. 189 samples and 10 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0682_BNG(lowbwt).csv"
  feature_descriptions:
    LOW: "Low birth weight flag"
    AGE: "Mother’s age"
    LWT: "Mother’s weight"
    RACE: "Mother’s race category"
    SMOKE: "Smoking flag"
    PTL: "Previous preterm labors"
    HT: "Hypertension history"
    UI: "Uterine irritability"
    FTV: "First trimester visits"
    class: "Low birth weight class"

0684_BNG_autoPrice:
  dataset_description: "AutoPrice (BNG): BNG-scaled automobile price dataset. ~400 samples and 16 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0684_BNG(autoPrice).csv"
  feature_descriptions: *0681_BNG_autoHorse.feature_descriptions  # same schema as AutoHorse above

0685_BNG_pharynx:
  dataset_description: "Pharynx (BNG): BNG-scaled pharyngeal cancer dataset for survival classification. 90 samples and 10 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0685_BNG(pharynx).csv"
  feature_descriptions:
    Inst: "Institution code"
    sex: "Patient sex"
    Treatment: "Treatment code"
    Grade: "Tumor grade"
    Age: "Patient age"
    Condition: "Preoperative condition"
    Site: "Tumor site"
    T: "Tumor stage"
    N: "Lymph node involvement"
    Status: "Postoperative status"
    class: "Survival class label"

0688_BNG_echoMonths:
  dataset_description: "EchoMonths (BNG): Cardiac echocardiography time-series dataset. ~150 samples and 7 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0688_BNG(echoMonths).csv"
  feature_descriptions:
    still_alive: "Survival flag"
    age: "Patient age"
    pericardial: "Pericardial effusion measure"
    fractional: "Fractional shortening"
    epss: "End-systolic septal separation"
    lvdd: "Left ventricular end-diastolic dimension"
    wall_score: "Wall motion score"
    wall_index: "Wall motion index"
    alive_at_1: "Alive at 1 year flag"
    class: "Survival class"

0690_BNG_breastTumor:
  dataset_description: "Breast Tumor (BNG): BNG-scaled breast cancer recurrence dataset. 286 samples and 10 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0690_BNG(breastTumor).csv"
  feature_descriptions:
    age: "Patient age group"
    menopause: "Menopause status"
    inv-nodes: "Positive lymph nodes"
    node-caps: "Node capsule presence"
    deg-malig: "Degree of malignancy"
    breast: "Breast side"
    breast-quad: "Tumor quadrant"
    irradiation: "Irradiation treatment"
    recurrence: "Recurrence time"
    class: "Recurrence class label"

0693_BNG_wine_quality:
  dataset_description: "Wine Quality (BNG): BNG-scaled wine chemical analysis dataset. 1,599 samples and 12 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0693_BNG(wine_quality).csv"
  feature_descriptions:
    fixed.acidity: "Fixed acidity (g/dm³)"
    volatile.acidity: "Volatile acidity (g/dm³)"
    citric.acid: "Citric acid (g/dm³)"
    residual.sugar: "Residual sugar (g/dm³)"
    chlorides: "Chloride content (g/dm³)"
    free.sulfur.dioxide: "Free SO₂ (mg/dm³)"
    total.sulfur.dioxide: "Total SO₂ (mg/dm³)"
    density: "Density (g/cm³)"
    pH: "pH value"
    sulphates: "Sulphate content (g/dm³)"
    alcohol: "Alcohol content (%)"
    quality: "Quality score (0–10)"

0710_Agrawal1:
  dataset_description: "Agrawal1: Synthetic benchmark for data mining. 1,000,000 samples and 8 attributes. Classification task."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0710_Agrawal1.csv"
  feature_descriptions:
    salary: "Employee salary"
    commission: "Commission rate"
    age: "Age in years"
    elevel: "Education level"
    car: "Car owner flag"
    zipcode: "Zip code area"
    hvalue: "House value"
    hyears: "House age"
    loan: "Loan amount"
    class: "Binary class label"

0711_Stagger1:
  dataset_description: "Stagger (1–3): Word sense disambiguation datasets. Small, 4 attributes each."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0711_Stagger1.csv"
  feature_descriptions:
    size: "Context window size"
    color: "Color feature"
    shape: "Shape feature"
    class: "Target class label"
0712_Stagger2:
  dataset_description: "Stagger (1–3): Word sense disambiguation datasets. Small, 4 attributes each."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0712_Stagger2.csv"
  feature_descriptions:
    size: "Context window size"
    color: "Color feature"
    shape: "Shape feature"
    class: "Target class label"
0713_Stagger3:
  dataset_description: "Stagger (1–3): Word sense disambiguation datasets. Small, 4 attributes each."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0713_Stagger3.csv"
  feature_descriptions:
    size: "Context window size"
    color: "Color feature"
    shape: "Shape feature"
    class: "Target class label"
0716_lungcancer_shedden:
  dataset_description: "LungCancer_Shedden: Microarray gene expression dataset for lung cancer survival. 256 samples and 5,000+ probes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0716_lungcancer_shedden.csv"
  feature_descriptions:
    OS_event: "Overall survival event flag"
    histology: "Histological subtype"
    age: "Patient age"
    sex: "Patient sex"
    g_202387_at–g_206926_s_at: "Expression values for various probes"
    AFFX_HUMGAPDH_M33197_5_at:  & AFFX_HUMGAPDH_M33197_M_at: "Control probe expressions"
    OS_years: "Overall survival time (years)"

06726_BNG_anneal_ORIG:  (multiple variants):
  dataset_description: "Anneal (BNG): Steel annealing dataset, original BNG versions (varying sampling). 798 samples and 38 attributes."
  feature_descriptions:
    family: "Steel family code"
    product-type: "Product type"
    steel: "Steel grade"
    carbon: "Carbon content"
    hardness: "Hardness rating"
    temper-rolling: "Temper rolling code"
    condition: "Annealing condition"
    formability: "Formability level"
    strength: "Tensile strength"
    non-ageing: "Non-ageing alloy flag"
    surface-finish: "Surface finish code"
    surface-quality: "Surface quality code"
    enamelability: "Enamelability rating"
    bc, bf, bt, bw%2Fme, bl, m, chrom, phos, cbond, marvi, exptl, ferro, corr, dfblue%2Fbright%2Fvarn%2Fclean, lustre, jurofm, s, p, shape, thick, width, len, oil, bore, packing: "Various material and process attributes"
    class: "Steel defect class"

0730–0734_BNG_anneal_ORIG:
  dataset_description: "Anneal ORIG (BNG): Steel annealing data from BNG with various sampling sizes. All share the same 38-attribute schema. Predicts steel defect class."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0730_BNG(anneal.ORIG,5000,5).csv"
  feature_descriptions:
    family: "Steel family code"
    product-type: "Product type code"
    steel: "Steel grade identifier"
    carbon: "Carbon content (%)"
    hardness: "Hardness rating"
    temper-rolling: "Temper rolling code"
    condition: "Annealing condition"
    formability: "Formability rating"
    strength: "Tensile strength"
    non-ageing: "Non-ageing alloy flag"
    surface-finish: "Surface finish code"
    surface-quality: "Surface quality code"
    enamelability: "Enamelability rating"
    bc: "Process attribute bc"
    bf: "Process attribute bf"
    bt: "Process attribute bt"
    bw%2Fme: "Process attribute bw/me"
    bl: "Process attribute bl"
    m: "Process attribute m"
    chrom: "Chromium content"
    phos: "Phosphorus content"
    cbond: "Carbon bonding metric"
    marvi: "Martensite variant code"
    exptl: "Experimental factor"
    ferro: "Ferrite percentage"
    corr: "Corrosion rate"
    blue%2Fbright%2Fvarn%2Fclean: "Surface appearance code"
    lustre: "Lustre rating"
    jurofm: "Jug of material factor"
    s: "Shape code"
    p: "Pack code"
    shape: "Shape descriptor"
    thick: "Thickness (mm)"
    width: "Width (mm)"
    len: "Length (mm)"
    oil: "Oil content (%)"
    bore: "Bore diameter (mm)"
    packing: "Packing code"
    class: "Steel defect class label"

0730_cleveland_nominal:
  dataset_description: "Cleveland Heart Nominal: Cleveland Heart Disease dataset with nominal coding. 303 samples and 14 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0730_BNG(anneal.ORIG,5000,5).csv"
  feature_descriptions:
    sex: "Patient sex"
    cp: "Chest pain type"
    fbs: "Fasting blood sugar flag"
    restecg: "Resting ECG result"
    exang: "Exercise-induced angina flag"
    slope: "Slope of peak exercise ST segment"
    thal: "Thalassemia type"
    class: "Disease presence label"

0736_collins:
  dataset_description: "Collins: Narrative event data for story comprehension tasks. ~300 samples and 18 attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0736_collins.csv"
  feature_descriptions:
    FirstPerson: "First person perspective flag"
    InnerThinking: "Inner thinking event count"
    ThinkPositive: "Positive thought event count"
    ThinkNegative: "Negative thought event count"
    ThinkAhead: "Future thought event count"
    ThinkBack: "Past thought event count"
    Reasoning: "Reasoning event count"
    Share_SocTies: "Social sharing event count"
    Direct_Activity: "Direct activity event count"
    Interacting: "Interaction event count"
    Notifying: "Notification event count"
    LinearGuidance: "Linear guidance event count"
    WordPicture: "Word-picture event count"
    SpaceInterval: "Spatial interval event count"
    Motion: "Motion event count"
    PastEvents: "Past events event count"
    TimeInterval: "Time interval event count"
    ShiftingEvents: "Shifting events event count"
    Text_Coverage: "Text coverage metric"
    Corp.Genre: "Corpus genre label"

0738_Bike:
  dataset_description: "Bike: Unsupervised bicycle ride data for clustering. Contains power, speed, terrain, and environmental features."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0738_Bike.csv"
  feature_descriptions:
    Power(watts): "Power output in watts"
    Distance(metres): "Distance traveled in metres"
    Heartrate(BPM): "Heart rate in beats per minute"
    Speed(m/s): "Speed in metres per second"
    WindSpeed(m/s): "Wind speed in metres per second"
    Cadence(revs/s): "Pedal cadence in revolutions per second"
    Elevation(meters): "Elevation in metres"
    Hill:  slope(%): "Hill slope percentage"
    Temperature(C): "Ambient temperature in °C"
    Penalty: "Penalty metric to minimize"
    Cluster: "Cluster assignment from unsupervised learning"

0744–0752_BNG_letter:
  dataset_description: "Letter Recognition (BNG): BNG-scaled variants of the UCI Letter dataset with varying sample sizes and class counts. 16 attributes each."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0744_BNG(letter,1000,1).csv"
  feature_descriptions:
    x-box: "Horizontal position of box"
    y-box: "Vertical position of box"
    width: "Width of box"
    high: "Height of box"
    onpix: "Number of on-pixels"
    x-bar: "Mean x position of on-pixels"
    y-bar: "Mean y position of on-pixels"
    x2bar: "Second order moment about x-bar"
    y2bar: "Second order moment about y-bar"
    xybar: "Cross-correlation moment"
    x2ybr: "Second order cross moment"
    xy2br: "Alternate cross moment"
    x-ege: "Edge count in x-direction"
    xegvy: "Variance of x-edge"
    y-ege: "Edge count in y-direction"
    yegvx: "Variance of y-edge"
    class: "Letter class label (A–Z)"

All:  subsequent 'anneal.ORIG' variants, 'letter' variants, and other BNG-prefixed repeats share these schemas; no further unique feature descriptions are needed.```
0753–0758_BNG_audiology:
  dataset_description: "Audiology (BNG): BNG-scaled audiological test dataset with various sampling sizes. All share the same ~60-feature schema for hearing impairment classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0753_BNG(audiology,1000,1).csv"
  feature_descriptions:
    age_gt_60: "Age > 60 years flag"
    air: "Air conduction threshold"
    airBoneGap: "Air–bone gap (dB)"
    ar_c: "Acoustic reflex contralateral"
    ar_u: "Acoustic reflex ipsilateral"
    bone: "Bone conduction threshold"
    boneAbnormal: "Abnormal bone conduction flag"
    history_buzzing: "History of buzzing"
    history_dizziness: "History of dizziness"
    history_fluctuating: "History of fluctuating hearing"
    history_fullness: "History of ear fullness"
    history_heredity: "Family history of hearing loss"
    history_nausea: "History of nausea"
    history_noise: "Noise exposure history"
    history_recruitment: "Recruitment history"
    history_ringing: "History of tinnitus ringing"
    history_roaring: "History of roaring tinnitus"
    history_vomiting: "History of vomiting"
    late_wave_poor: "Poor late wave V"
    m_at_2k: "Middle ear at 2 kHz"
    m_cond_lt_1k: "Middle ear conduction <1 kHz"
    m_gt_1k: "Middle ear >1 kHz"
    m_gt_2k: "Middle ear >2 kHz"
    m_sn_gt_1k: "SN ratio >1 kHz"
    m_sn_gt_2k: "SN ratio >2 kHz"
    m_sn_gt_3k: "SN ratio >3 kHz"
    m_sn_gt_4k: "SN ratio >4 kHz"
    m_sn_gt_500: "SN ratio >500 Hz"
    m_sn_gt_6k: "SN ratio >6 kHz"
    m_sn_lt_1k: "SN ratio <1 kHz"
    m_sn_lt_2k: "SN ratio <2 kHz"
    m_sn_lt_3k: "SN ratio <3 kHz"
    m_sn_lt_4k: "SN ratio <4 kHz"
    m_sn_lt_500: "SN ratio <500 Hz"
    m_p_sn_gt_*: "Compliance P_SN at various frequencies"
    m_sn_mixed: "Mixed SN ratio"
    mod_s_*: "Modulated SN at various frequencies"
    notch_4k: "Notch at 4 kHz"
    notch_at_4k: "Audiometric notch at 4 kHz"
    o_ar_c: "Otoacoustic emission contralateral"
    o_ar_u: "Otoacoustic emission ipsilateral"
    s_sn_gt_*: "TEOAE at various frequencies"
    speech: "Speech discrimination score"
    static_normal: "Static admittance normal flag"
    tymp: "Tympanogram type"
    viith_nerve_signs: "VIIth nerve signs"
    wave_V_delayed: "Wave V delayed flag"
    waveform_ItoV_prolonged: "ABR I–V interval prolonged"
    class: "Hearing impairment class"

0759_BNG_autos:
  dataset_description: "Autos (BNG): BNG-scaled automobile dataset with binary price classification. Identical schema across sample sizes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0759_BNG(autos,1000,1).csv"
  feature_descriptions: *0681_BNG_autoHorse.feature_descriptions

0761_Students:
  dataset_description: "TurkiyeStudentEvaluation: University teaching evaluation survey with 29 questionnaire items and class label."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0761_Students.csv"
  feature_descriptions:
    instr: "Instructor ID"
    class: "Course code"
    nb.repeat: "Times course repeated"
    attendance: "Attendance rate"
    difficulty: "Perceived difficulty"
    Q1–Q28: "Survey responses 1–28"
    class: "Evaluation class label"

0762_BNG_autos:
  dataset_description: "Autos (BNG): Same as BNG_autos above, different sampling."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0762_BNG(autos,5000,1).csv"
  feature_descriptions: *0759_BNG_autos.feature_descriptions
0763_BNG(autos,5000,5).csv:
  dataset_description: "Automobile dataset (BNG scaled) with 5000 instances and compression ratio 5. Contains vehicle specifications and attributes for price prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0763_BNG(autos,5000,5).csv"
  feature_descriptions:
    normalized-losses: "Average loss payment per insured vehicle year, normalized for the specific dataset"
    make: "Car manufacturer brand"
    fuel-type: "Type of fuel (gas, diesel, etc.)"
    aspiration: "Engine aspiration type (std, turbo)"
    num-of-doors: "Number of doors on the vehicle"
    body-style: "Body configuration (sedan, hatchback, etc.)"
    drive-wheels: "Type of drive wheels (fwd, rwd, 4wd)"
    engine-location: "Location of the engine (front, rear)"
    wheel-base: "Distance between centers of front and rear wheels (inches)"
    length: "Overall length of vehicle (inches)"
    width: "Overall width of vehicle (inches)"
    height: "Overall height of vehicle (inches)"
    curb-weight: "Weight of vehicle without occupants or baggage (lbs)"
    engine-type: "Type of engine design"
    num-of-cylinders: "Number of cylinders in the engine"
    engine-size: "Engine displacement (cc)"
    fuel-system: "Fuel delivery system type"
    bore: "Cylinder bore diameter (inches)"
    stroke: "Piston stroke length (inches)"
    compression-ratio: "Ratio of cylinder volume (compression)"
    horsepower: "Engine power output (hp)"
    peak-rpm: "Engine speed at peak horsepower (rpm)"
    city-mpg: "Fuel consumption in city driving (mpg)"
    highway-mpg: "Fuel consumption in highway driving (mpg)"
    price: "Market price of the vehicle"
    symboling: "Insurance risk rating (-3 to +3)"

0764_BNG(autos,5000,10).csv:
  dataset_description: "Automobile dataset (BNG scaled) with 5000 instances and compression ratio 10. Same schema as 0763 with different compression."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0764_BNG(autos,5000,10).csv"
  feature_descriptions: &auto_features
    normalized-losses: "Average loss payment per insured vehicle year, normalized for the specific dataset"
    make: "Car manufacturer brand"
    fuel-type: "Type of fuel (gas, diesel, etc.)"
    aspiration: "Engine aspiration type (std, turbo)"
    num-of-doors: "Number of doors on the vehicle"
    body-style: "Body configuration (sedan, hatchback, etc.)"
    drive-wheels: "Type of drive wheels (fwd, rwd, 4wd)"
    engine-location: "Location of the engine (front, rear)"
    wheel-base: "Distance between centers of front and rear wheels (inches)"
    length: "Overall length of vehicle (inches)"
    width: "Overall width of vehicle (inches)"
    height: "Overall height of vehicle (inches)"
    curb-weight: "Weight of vehicle without occupants or baggage (lbs)"
    engine-type: "Type of engine design"
    num-of-cylinders: "Number of cylinders in the engine"
    engine-size: "Engine displacement (cc)"
    fuel-system: "Fuel delivery system type"
    bore: "Cylinder bore diameter (inches)"
    stroke: "Piston stroke length (inches)"
    compression-ratio: "Ratio of cylinder volume (compression)"
    horsepower: "Engine power output (hp)"
    peak-rpm: "Engine speed at peak horsepower (rpm)"
    city-mpg: "Fuel consumption in city driving (mpg)"
    highway-mpg: "Fuel consumption in highway driving (mpg)"
    price: "Market price of the vehicle"
    symboling: "Insurance risk rating (-3 to +3)"

0764_DiabeticMellitus.csv:
  dataset_description: "Diabetic Mellitus clinical dataset with comprehensive diagnostic indicators and patient attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0764_BNG(autos,5000,10).csv"
  feature_descriptions:
    AGE: "Patient age"
    GLU: "Glucose level"
    DBP: "Diastolic blood pressure"
    BMI: "Body mass index"
    WGT: "Weight"
    OCP: "Oral contraceptive use"
    SEX: "Gender"
    DIT: "Diet indicator"
    HST: "Medical history"
    RSB: "Risk behavior"
    LOE: "Level of education"
    DCD: "Diabetes-related complication"
    EXT: "Exercise level"
    FRU: "Fruit consumption"
    WLG: "Weight loss goal"
    FLS: "Fasting lipid screen"
    BRV: "Blood relative with diabetes"
    INT: "Intervention indicator"
    SHC: "Self-health care"
    TLF: "Time since last food intake"
    RIG: "Regular insulin dosage"
    RIV: "Regular IV insulin"
    SWT: "Sweetener use"
    SHK: "Shakiness indicator"
    VDS: "Vision disturbance"
    WKN: "Weakness indicator"
    HNG: "Hunger level"
    DZN: "Dizziness"
    NMV: "Numbness/tingling"
    HDC: "Headache"
    FEB: "Fever"
    IRT_2: "Insulin reaction type 2"
    NSA: "Nausea"
    CES: "Central nervous system symptoms"
    SLS: "Sleep symptoms"
    DLB: "Deliberate insulin misuse"
    DRN_2: "Drinking indicator"
    CFS: "Confusion"
    CRT: "Critical values"
    ATT: "Attention deficit"
    TLC: "Total cholesterol"
    CTS: "Cataracts"
    FTG: "Fatigue"
    DRN: "Drinking frequency"
    SFT: "Soft tissue infection"
    MLD: "Mild symptoms"
    SLT: "Salt intake"
    DRB: "Drug abuse"
    SFN: "Sensory function"
    SOB: "Shortness of breath"
    HBP: "High blood pressure"
    CDC: "Cardiovascular disease"
    LOA: "Level of activity"
    NSA_3: "Nausea level 3"
    VNT: "Vomiting"
    DIS: "Disease indicator"
    FTG_3: "Fatigue level 3"
    SCE: "Socioeconomic indicator"
    GSV: "Glucose screening value"
    BMR: "Basal metabolic rate"
    DSV: "Disease severity"
    DRV: "Driving status"
    EVP: "Evaporation rate"
    FRD: "Fraud indicator"
    SAD: "Sadness/depression"
    SOB_2: "Shortness of breath severity"
    PCJ: "Projected complication indicator"
    FAM: "Family history"
    SWG: "Swelling"
    LIN: "Linear indicator"
    RIH: "Regular insulin at home"
    EXP: "Experience level"
    NRV_2: "Nervous indicator"
    HIT: "History of trauma"
    PPT: "Precipitating event"
    TRM: "Trauma indicator"
    FTG_4: "Fatigue level 4"
    WTL_2: "Weight loss indicator"
    PRE: "Pregnancy"
    ETN: "Ethnicity"
    CIT: "Citizenship"
    FTG_5: "Fatigue level 5"
    SBF: "Substance abuse"
    WTG: "Weight gain"
    CSS: "CSF sample indicator"
    LHV: "Living at home"
    PLH: "Platelet count high"
    TYPE: "Diabetes type"

0765_BNG(autos,10000,1).csv:
  dataset_description: "Automobile dataset (BNG scaled) with 10000 instances and compression ratio 1. Expanded version of the auto dataset."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0765_BNG(autos,10000,1).csv"
  feature_descriptions: *auto_features

0766_BNG(autos,10000,5).csv:
  dataset_description: "Automobile dataset (BNG scaled) with 10000 instances and compression ratio 5."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0766_BNG(autos,10000,5).csv"
  feature_descriptions: *auto_features

0767_BNG(autos,10000,10).csv:
  dataset_description: "Automobile dataset (BNG scaled) with 10000 instances and compression ratio 10."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0767_BNG(autos,10000,10).csv"
  feature_descriptions: *auto_features

0768_BNG(lymph,1000,1).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 1000 instances and compression ratio 1. Contains features for lymphatic disease classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0768_BNG(lymph,1000,1).csv"
  feature_descriptions: &lymph_features
    lymphatics: "Condition of lymphatic vessels"
    block_of_affere: "Block of afferent vessels"
    bl_of_lymph_c: "Block of lymph c"
    bl_of_lymph_s: "Block of lymph s"
    by_pass: "By pass present"
    extravasates: "Extravasates"
    regeneration_of: "Regeneration of"
    early_uptake_in: "Early uptake in"
    lym_nodes_dimin: "Lymph nodes diminished"
    lym_nodes_enlar: "Lymph nodes enlarged"
    changes_in_lym: "Changes in lymph"
    defect_in_node: "Defect in node"
    changes_in_node: "Changes in node"
    changes_in_stru: "Changes in structure"
    special_forms: "Special forms"
    dislocation_of: "Dislocation of"
    exclusion_of_no: "Exclusion of no"
    no_of_nodes_in: "Number of nodes in"
    class: "Diagnostic class"

0769_BNG(lymph,1000,5).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 1000 instances and compression ratio 5."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0769_BNG(lymph,1000,5).csv"
  feature_descriptions: *lymph_features

0770_BNG(lymph,1000,10).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 1000 instances and compression ratio 10."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0770_BNG(lymph,1000,10).csv"
  feature_descriptions: *lymph_features

0771_BNG(lymph,5000,1).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 5000 instances and compression ratio 1."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0771_BNG(lymph,5000,1).csv"
  feature_descriptions: *lymph_features

0772_BNG(lymph,5000,5).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 5000 instances and compression ratio 5."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0772_BNG(lymph,5000,5).csv"
  feature_descriptions: *lymph_features

0773_BNG(lymph,5000,10).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 5000 instances and compression ratio 10."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0773_BNG(lymph,5000,10).csv"
  feature_descriptions: *lymph_features

0773_scm20d.csv:
  dataset_description: "Supply Chain Management dataset with demand and inventory metrics."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0773_BNG(lymph,5000,10).csv"
  feature_descriptions:
    storageUnit: "Storage unit identifier"
    storageCost: "Cost of storage"
    interestRate: "Interest rate"
    compidx0lt2: "Component index 0 less than 2"
    compidx4lt2: "Component index 4 less than 2"
    compidx6lt2: "Component index 6 less than 2"
    compidx8lt2: "Component index 8 less than 2"
    compidx10lt2: "Component index 10 less than 2"
    compidx12lt2: "Component index 12 less than 2"
    compidx14lt2: "Component index 14 less than 2"
    compidx0lt6: "Component index 0 less than 6"
    compidx4lt6: "Component index 4 less than 6"
    compidx6lt6: "Component index 6 less than 6"
    compidx8lt6: "Component index 8 less than 6"
    compidx10lt6: "Component index 10 less than 6"
    compidx12lt6: "Component index 12 less than 6"
    compidx14lt6: "Component index 14 less than 6"
    compidx8lt10: "Component index 8 less than 10"
    compidx4lt10: "Component index 4 less than 10"
    compidx6lt10: "Component index 6 less than 10"
    compidx8lt10: "Component index 8 less than 10"
    compidx10lt10: "Component index 10 less than 10"
    compidx12lt10: "Component index 12 less than 10"
    compidx14lt10: "Component index 14 less than 10"
    compidx0lt20: "Component index 0 less than 20"
    compidx4lt20: "Component index 4 less than 20"
    compidx6lt20: "Component index 6 less than 20"
    compidx8lt20: "Component index 8 less than 20"
    compidx10lt20: "Component index 10 less than 20"
    compidx0lt30: "Component index 0 less than 30"
    compidx4lt30: "Component index 4 less than 30"
    compidx6lt30: "Component index 6 less than 30"
    compidx8lt30: "Component index 8 less than 30"
    compidx10lt30: "Component index 10 less than 30"
    compidx12lt30: "Component index 12 less than 30"
    compidx14lt30: "Component index 14 less than 30"
    sku10: "Stock keeping unit 10"
    sku11: "Stock keeping unit 11"
    sku12: "Stock keeping unit 12"
    sku13: "Stock keeping unit 13"
    sku14: "Stock keeping unit 14"
    sku15: "Stock keeping unit 15"
    sku16: "Stock keeping unit 16"
    sku17: "Stock keeping unit 17"
    sku18: "Stock keeping unit 18"
    sku19: "Stock keeping unit 19"
    demandseg1l2: "Demand segment 1 level 2"
    demandseg1l4: "Demand segment 1 level 4"
    demandseg1l6: "Demand segment 1 level 6"
    demandseg2: "Demand segment 2"
    demandseg2l1: "Demand segment 2 level 1"
    demandseg2l2: "Demand segment 2 level 2"
    demandseg2l4: "Demand segment 2 level 4"
    demandseg2l8: "Demand segment 2 level 8"
    demandseg3: "Demand segment 3"
    demandseg3l1: "Demand segment 3 level 1"
    demandseg3l2: "Demand segment 3 level 2"
    demandseg3l4: "Demand segment 3 level 4"
    demandseg3l8: "Demand segment 3 level 8"
    LBL: "Label indicator"
    MTLp2A: "Material type level p2A"
    MTLp3A: "Material type level p3A"
    MTLp4A: "Material type level p4A"
    MTLp5A: "Material type level p5A"
    MTLp6A: "Material type level p6A"
    MTLp7A: "Material type level p7A"
    MTLp8A: "Material type level p8A"
    MTLp9A: "Material type level p9A"
    MTLp10A: "Material type level p10A"
    MTLp11A: "Material type level p11A"
    MTLp12A: "Material type level p12A"
    MTLp13A: "Material type level p13A"
    MTLp14A: "Material type level p14A"
    MTLp15A: "Material type level p15A"
    MTLp16A: "Material type level p16A"

0774_BNG(lymph,10000,1).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 10000 instances and compression ratio 1."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0774_BNG(lymph,10000,1).csv"
  feature_descriptions: *lymph_features
0763_BNG(autos,5000,5).csv:
  dataset_description: "Automobile dataset (BNG scaled) with 5000 instances and compression ratio 5. Contains vehicle specifications and attributes for price prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0763_BNG(autos,5000,5).csv"
  feature_descriptions:
    normalized-losses: "Average loss payment per insured vehicle year, normalized for the specific dataset"
    make: "Car manufacturer brand"
    fuel-type: "Type of fuel (gas, diesel, etc.)"
    aspiration: "Engine aspiration type (std, turbo)"
    num-of-doors: "Number of doors on the vehicle"
    body-style: "Body configuration (sedan, hatchback, etc.)"
    drive-wheels: "Type of drive wheels (fwd, rwd, 4wd)"
    engine-location: "Location of the engine (front, rear)"
    wheel-base: "Distance between centers of front and rear wheels (inches)"
    length: "Overall length of vehicle (inches)"
    width: "Overall width of vehicle (inches)"
    height: "Overall height of vehicle (inches)"
    curb-weight: "Weight of vehicle without occupants or baggage (lbs)"
    engine-type: "Type of engine design"
    num-of-cylinders: "Number of cylinders in the engine"
    engine-size: "Engine displacement (cc)"
    fuel-system: "Fuel delivery system type"
    bore: "Cylinder bore diameter (inches)"
    stroke: "Piston stroke length (inches)"
    compression-ratio: "Ratio of cylinder volume (compression)"
    horsepower: "Engine power output (hp)"
    peak-rpm: "Engine speed at peak horsepower (rpm)"
    city-mpg: "Fuel consumption in city driving (mpg)"
    highway-mpg: "Fuel consumption in highway driving (mpg)"
    price: "Market price of the vehicle"
    symboling: "Insurance risk rating (-3 to +3)"

0764_BNG(autos,5000,10).csv:
  dataset_description: "Automobile dataset (BNG scaled) with 5000 instances and compression ratio 10. Same schema as 0763 with different compression."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0764_BNG(autos,5000,10).csv"
  feature_descriptions:
    normalized-losses: "Average loss payment per insured vehicle year, normalized for the specific dataset"
    make: "Car manufacturer brand"
    fuel-type: "Type of fuel (gas, diesel, etc.)"
    aspiration: "Engine aspiration type (std, turbo)"
    num-of-doors: "Number of doors on the vehicle"
    body-style: "Body configuration (sedan, hatchback, etc.)"
    drive-wheels: "Type of drive wheels (fwd, rwd, 4wd)"
    engine-location: "Location of the engine (front, rear)"
    wheel-base: "Distance between centers of front and rear wheels (inches)"
    length: "Overall length of vehicle (inches)"
    width: "Overall width of vehicle (inches)"
    height: "Overall height of vehicle (inches)"
    curb-weight: "Weight of vehicle without occupants or baggage (lbs)"
    engine-type: "Type of engine design"
    num-of-cylinders: "Number of cylinders in the engine"
    engine-size: "Engine displacement (cc)"
    fuel-system: "Fuel delivery system type"
    bore: "Cylinder bore diameter (inches)"
    stroke: "Piston stroke length (inches)"
    compression-ratio: "Ratio of cylinder volume (compression)"
    horsepower: "Engine power output (hp)"
    peak-rpm: "Engine speed at peak horsepower (rpm)"
    city-mpg: "Fuel consumption in city driving (mpg)"
    highway-mpg: "Fuel consumption in highway driving (mpg)"
    price: "Market price of the vehicle"
    symboling: "Insurance risk rating (-3 to +3)"

0764_DiabeticMellitus.csv:
  dataset_description: "Diabetic Mellitus clinical dataset with comprehensive diagnostic indicators and patient attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0764_BNG(autos,5000,10).csv"
  feature_descriptions:
    AGE: "Patient age"
    GLU: "Glucose level"
    DBP: "Diastolic blood pressure"
    BMI: "Body mass index"
    WGT: "Weight"
    OCP: "Oral contraceptive use"
    SEX: "Gender"
    DIT: "Diet indicator"
    HST: "Medical history"
    RSB: "Risk behavior"
    LOE: "Level of education"
    DCD: "Diabetes-related complication"
    EXT: "Exercise level"
    FRU: "Fruit consumption"
    WLG: "Weight loss goal"
    FLS: "Fasting lipid screen"
    BRV: "Blood relative with diabetes"
    INT: "Intervention indicator"
    SHC: "Self-health care"
    TLF: "Time since last food intake"
    RIG: "Regular insulin dosage"
    RIV: "Regular IV insulin"
    SWT: "Sweetener use"
    SHK: "Shakiness indicator"
    VDS: "Vision disturbance"
    WKN: "Weakness indicator"
    HNG: "Hunger level"
    DZN: "Dizziness"
    NMV: "Numbness/tingling"
    HDC: "Headache"
    FEB: "Fever"
    IRT_2: "Insulin reaction type 2"
    NSA: "Nausea"
    CES: "Central nervous system symptoms"
    SLS: "Sleep symptoms"
    DLB: "Deliberate insulin misuse"
    DRN_2: "Drinking indicator"
    CFS: "Confusion"
    CRT: "Critical values"
    ATT: "Attention deficit"
    TLC: "Total cholesterol"
    CTS: "Cataracts"
    FTG: "Fatigue"
    DRN: "Drinking frequency"
    SFT: "Soft tissue infection"
    MLD: "Mild symptoms"
    SLT: "Salt intake"
    DRB: "Drug abuse"
    SFN: "Sensory function"
    SOB: "Shortness of breath"
    HBP: "High blood pressure"
    CDC: "Cardiovascular disease"
    LOA: "Level of activity"
    NSA_3: "Nausea level 3"
    VNT: "Vomiting"
    DIS: "Disease indicator"
    FTG_3: "Fatigue level 3"
    SCE: "Socioeconomic indicator"
    GSV: "Glucose screening value"
    BMR: "Basal metabolic rate"
    DSV: "Disease severity"
    DRV: "Driving status"
    EVP: "Evaporation rate"
    FRD: "Fraud indicator"
    SAD: "Sadness/depression"
    SOB_2: "Shortness of breath severity"
    PCJ: "Projected complication indicator"
    FAM: "Family history"
    SWG: "Swelling"
    LIN: "Linear indicator"
    RIH: "Regular insulin at home"
    EXP: "Experience level"
    NRV_2: "Nervous indicator"
    HIT: "History of trauma"
    PPT: "Precipitating event"
    TRM: "Trauma indicator"
    FTG_4: "Fatigue level 4"
    WTL_2: "Weight loss indicator"
    PRE: "Pregnancy"
    ETN: "Ethnicity"
    CIT: "Citizenship"
    FTG_5: "Fatigue level 5"
    SBF: "Substance abuse"
    WTG: "Weight gain"
    CSS: "CSF sample indicator"
    LHV: "Living at home"
    PLH: "Platelet count high"
    TYPE: "Diabetes type"

0765_BNG(autos,10000,1).csv:
  dataset_description: "Automobile dataset (BNG scaled) with 10000 instances and compression ratio 1. Expanded version of the auto dataset."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0765_BNG(autos,10000,1).csv"
  feature_descriptions:
    normalized-losses: "Average loss payment per insured vehicle year, normalized for the specific dataset"
    make: "Car manufacturer brand"
    fuel-type: "Type of fuel (gas, diesel, etc.)"
    aspiration: "Engine aspiration type (std, turbo)"
    num-of-doors: "Number of doors on the vehicle"
    body-style: "Body configuration (sedan, hatchback, etc.)"
    drive-wheels: "Type of drive wheels (fwd, rwd, 4wd)"
    engine-location: "Location of the engine (front, rear)"
    wheel-base: "Distance between centers of front and rear wheels (inches)"
    length: "Overall length of vehicle (inches)"
    width: "Overall width of vehicle (inches)"
    height: "Overall height of vehicle (inches)"
    curb-weight: "Weight of vehicle without occupants or baggage (lbs)"
    engine-type: "Type of engine design"
    num-of-cylinders: "Number of cylinders in the engine"
    engine-size: "Engine displacement (cc)"
    fuel-system: "Fuel delivery system type"
    bore: "Cylinder bore diameter (inches)"
    stroke: "Piston stroke length (inches)"
    compression-ratio: "Ratio of cylinder volume (compression)"
    horsepower: "Engine power output (hp)"
    peak-rpm: "Engine speed at peak horsepower (rpm)"
    city-mpg: "Fuel consumption in city driving (mpg)"
    highway-mpg: "Fuel consumption in highway driving (mpg)"
    price: "Market price of the vehicle"
    symboling: "Insurance risk rating (-3 to +3)"

0766_BNG(autos,10000,5).csv:
  dataset_description: "Automobile dataset (BNG scaled) with 10000 instances and compression ratio 5."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0766_BNG(autos,10000,5).csv"
  feature_descriptions:
    normalized-losses: "Average loss payment per insured vehicle year, normalized for the specific dataset"
    make: "Car manufacturer brand"
    fuel-type: "Type of fuel (gas, diesel, etc.)"
    aspiration: "Engine aspiration type (std, turbo)"
    num-of-doors: "Number of doors on the vehicle"
    body-style: "Body configuration (sedan, hatchback, etc.)"
    drive-wheels: "Type of drive wheels (fwd, rwd, 4wd)"
    engine-location: "Location of the engine (front, rear)"
    wheel-base: "Distance between centers of front and rear wheels (inches)"
    length: "Overall length of vehicle (inches)"
    width: "Overall width of vehicle (inches)"
    height: "Overall height of vehicle (inches)"
    curb-weight: "Weight of vehicle without occupants or baggage (lbs)"
    engine-type: "Type of engine design"
    num-of-cylinders: "Number of cylinders in the engine"
    engine-size: "Engine displacement (cc)"
    fuel-system: "Fuel delivery system type"
    bore: "Cylinder bore diameter (inches)"
    stroke: "Piston stroke length (inches)"
    compression-ratio: "Ratio of cylinder volume (compression)"
    horsepower: "Engine power output (hp)"
    peak-rpm: "Engine speed at peak horsepower (rpm)"
    city-mpg: "Fuel consumption in city driving (mpg)"
    highway-mpg: "Fuel consumption in highway driving (mpg)"
    price: "Market price of the vehicle"
    symboling: "Insurance risk rating (-3 to +3)"

0767_BNG(autos,10000,10).csv:
  dataset_description: "Automobile dataset (BNG scaled) with 10000 instances and compression ratio 10."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0767_BNG(autos,10000,10).csv"
  feature_descriptions:
    normalized-losses: "Average loss payment per insured vehicle year, normalized for the specific dataset"
    make: "Car manufacturer brand"
    fuel-type: "Type of fuel (gas, diesel, etc.)"
    aspiration: "Engine aspiration type (std, turbo)"
    num-of-doors: "Number of doors on the vehicle"
    body-style: "Body configuration (sedan, hatchback, etc.)"
    drive-wheels: "Type of drive wheels (fwd, rwd, 4wd)"
    engine-location: "Location of the engine (front, rear)"
    wheel-base: "Distance between centers of front and rear wheels (inches)"
    length: "Overall length of vehicle (inches)"
    width: "Overall width of vehicle (inches)"
    height: "Overall height of vehicle (inches)"
    curb-weight: "Weight of vehicle without occupants or baggage (lbs)"
    engine-type: "Type of engine design"
    num-of-cylinders: "Number of cylinders in the engine"
    engine-size: "Engine displacement (cc)"
    fuel-system: "Fuel delivery system type"
    bore: "Cylinder bore diameter (inches)"
    stroke: "Piston stroke length (inches)"
    compression-ratio: "Ratio of cylinder volume (compression)"
    horsepower: "Engine power output (hp)"
    peak-rpm: "Engine speed at peak horsepower (rpm)"
    city-mpg: "Fuel consumption in city driving (mpg)"
    highway-mpg: "Fuel consumption in highway driving (mpg)"
    price: "Market price of the vehicle"
    symboling: "Insurance risk rating (-3 to +3)"

0768_BNG(lymph,1000,1).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 1000 instances and compression ratio 1. Contains features for lymphatic disease classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0768_BNG(lymph,1000,1).csv"
  feature_descriptions:
    lymphatics: "Condition of lymphatic vessels"
    block_of_affere: "Block of afferent vessels"
    bl_of_lymph_c: "Block of lymph c"
    bl_of_lymph_s: "Block of lymph s"
    by_pass: "By pass present"
    extravasates: "Extravasates"
    regeneration_of: "Regeneration of"
    early_uptake_in: "Early uptake in"
    lym_nodes_dimin: "Lymph nodes diminished"
    lym_nodes_enlar: "Lymph nodes enlarged"
    changes_in_lym: "Changes in lymph"
    defect_in_node: "Defect in node"
    changes_in_node: "Changes in node"
    changes_in_stru: "Changes in structure"
    special_forms: "Special forms"
    dislocation_of: "Dislocation of"
    exclusion_of_no: "Exclusion of no"
    no_of_nodes_in: "Number of nodes in"
    class: "Diagnostic class"

0769_BNG(lymph,1000,5).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 1000 instances and compression ratio 5."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0769_BNG(lymph,1000,5).csv"
  feature_descriptions:
    lymphatics: "Condition of lymphatic vessels"
    block_of_affere: "Block of afferent vessels"
    bl_of_lymph_c: "Block of lymph c"
    bl_of_lymph_s: "Block of lymph s"
    by_pass: "By pass present"
    extravasates: "Extravasates"
    regeneration_of: "Regeneration of"
    early_uptake_in: "Early uptake in"
    lym_nodes_dimin: "Lymph nodes diminished"
    lym_nodes_enlar: "Lymph nodes enlarged"
    changes_in_lym: "Changes in lymph"
    defect_in_node: "Defect in node"
    changes_in_node: "Changes in node"
    changes_in_stru: "Changes in structure"
    special_forms: "Special forms"
    dislocation_of: "Dislocation of"
    exclusion_of_no: "Exclusion of no"
    no_of_nodes_in: "Number of nodes in"
    class: "Diagnostic class"

0770_BNG(lymph,1000,10).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 1000 instances and compression ratio 10."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0770_BNG(lymph,1000,10).csv"
  feature_descriptions:
    lymphatics: "Condition of lymphatic vessels"
    block_of_affere: "Block of afferent vessels"
    bl_of_lymph_c: "Block of lymph c"
    bl_of_lymph_s: "Block of lymph s"
    by_pass: "By pass present"
    extravasates: "Extravasates"
    regeneration_of: "Regeneration of"
    early_uptake_in: "Early uptake in"
    lym_nodes_dimin: "Lymph nodes diminished"
    lym_nodes_enlar: "Lymph nodes enlarged"
    changes_in_lym: "Changes in lymph"
    defect_in_node: "Defect in node"
    changes_in_node: "Changes in node"
    changes_in_stru: "Changes in structure"
    special_forms: "Special forms"
    dislocation_of: "Dislocation of"
    exclusion_of_no: "Exclusion of no"
    no_of_nodes_in: "Number of nodes in"
    class: "Diagnostic class"

0771_BNG(lymph,5000,1).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 5000 instances and compression ratio 1."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0771_BNG(lymph,5000,1).csv"
  feature_descriptions:
    lymphatics: "Condition of lymphatic vessels"
    block_of_affere: "Block of afferent vessels"
    bl_of_lymph_c: "Block of lymph c"
    bl_of_lymph_s: "Block of lymph s"
    by_pass: "By pass present"
    extravasates: "Extravasates"
    regeneration_of: "Regeneration of"
    early_uptake_in: "Early uptake in"
    lym_nodes_dimin: "Lymph nodes diminished"
    lym_nodes_enlar: "Lymph nodes enlarged"
    changes_in_lym: "Changes in lymph"
    defect_in_node: "Defect in node"
    changes_in_node: "Changes in node"
    changes_in_stru: "Changes in structure"
    special_forms: "Special forms"
    dislocation_of: "Dislocation of"
    exclusion_of_no: "Exclusion of no"
    no_of_nodes_in: "Number of nodes in"
    class: "Diagnostic class"

0772_BNG(lymph,5000,5).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 5000 instances and compression ratio 5."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0772_BNG(lymph,5000,5).csv"
  feature_descriptions:
    lymphatics: "Condition of lymphatic vessels"
    block_of_affere: "Block of afferent vessels"
    bl_of_lymph_c: "Block of lymph c"
    bl_of_lymph_s: "Block of lymph s"
    by_pass: "By pass present"
    extravasates: "Extravasates"
    regeneration_of: "Regeneration of"
    early_uptake_in: "Early uptake in"
    lym_nodes_dimin: "Lymph nodes diminished"
    lym_nodes_enlar: "Lymph nodes enlarged"
    changes_in_lym: "Changes in lymph"
    defect_in_node: "Defect in node"
    changes_in_node: "Changes in node"
    changes_in_stru: "Changes in structure"
    special_forms: "Special forms"
    dislocation_of: "Dislocation of"
    exclusion_of_no: "Exclusion of no"
    no_of_nodes_in: "Number of nodes in"
    class: "Diagnostic class"

0773_BNG(lymph,5000,10).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 5000 instances and compression ratio 10."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0773_BNG(lymph,5000,10).csv"
  feature_descriptions:
    lymphatics: "Condition of lymphatic vessels"
    block_of_affere: "Block of afferent vessels"
    bl_of_lymph_c: "Block of lymph c"
    bl_of_lymph_s: "Block of lymph s"
    by_pass: "By pass present"
    extravasates: "Extravasates"
    regeneration_of: "Regeneration of"
    early_uptake_in: "Early uptake in"
    lym_nodes_dimin: "Lymph nodes diminished"
    lym_nodes_enlar: "Lymph nodes enlarged"
    changes_in_lym: "Changes in lymph"
    defect_in_node: "Defect in node"
    changes_in_node: "Changes in node"
    changes_in_stru: "Changes in structure"
    special_forms: "Special forms"
    dislocation_of: "Dislocation of"
    exclusion_of_no: "Exclusion of no"
    no_of_nodes_in: "Number of nodes in"
    class: "Diagnostic class"

0773_scm20d.csv:
  dataset_description: "Supply Chain Management dataset with demand and inventory metrics."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0773_BNG(lymph,5000,10).csv"
  feature_descriptions:
    storageUnit: "Storage unit identifier"
    storageCost: "Cost of storage"
    interestRate: "Interest rate"
    compidx0lt2: "Component index 0 less than 2"
    compidx4lt2: "Component index 4 less than 2"
    compidx6lt2: "Component index 6 less than 2"
    compidx8lt2: "Component index 8 less than 2"
    compidx10lt2: "Component index 10 less than 2"
    compidx12lt2: "Component index 12 less than 2"
    compidx14lt2: "Component index 14 less than 2"
    compidx0lt6: "Component index 0 less than 6"
    compidx4lt6: "Component index 4 less than 6"
    compidx6lt6: "Component index 6 less than 6"
    compidx8lt6: "Component index 8 less than 6"
    compidx10lt6: "Component index 10 less than 6"
    compidx12lt6: "Component index 12 less than 6"
    compidx14lt6: "Component index 14 less than 6"
    compidx8lt10: "Component index 8 less than 10"
    compidx4lt10: "Component index 4 less than 10"
    compidx6lt10: "Component index 6 less than 10"
    compidx8lt10: "Component index 8 less than 10"
    compidx10lt10: "Component index 10 less than 10"
    compidx12lt10: "Component index 12 less than 10"
    compidx14lt10: "Component index 14 less than 10"
    compidx0lt20: "Component index 0 less than 20"
    compidx4lt20: "Component index 4 less than 20"
    compidx6lt20: "Component index 6 less than 20"
    compidx8lt20: "Component index 8 less than 20"
    compidx10lt20: "Component index 10 less than 20"
    compidx0lt30: "Component index 0 less than 30"
    compidx4lt30: "Component index 4 less than 30"
    compidx6lt30: "Component index 6 less than 30"
    compidx8lt30: "Component index 8 less than 30"
    compidx10lt30: "Component index 10 less than 30"
    compidx12lt30: "Component index 12 less than 30"
    compidx14lt30: "Component index 14 less than 30"
    sku10: "Stock keeping unit 10"
    sku11: "Stock keeping unit 11"
    sku12: "Stock keeping unit 12"
    sku13: "Stock keeping unit 13"
    sku14: "Stock keeping unit 14"
    sku15: "Stock keeping unit 15"
    sku16: "Stock keeping unit 16"
    sku17: "Stock keeping unit 17"
    sku18: "Stock keeping unit 18"
    sku19: "Stock keeping unit 19"
    demandseg1l2: "Demand segment 1 level 2"
    demandseg1l4: "Demand segment 1 level 4"
    demandseg1l6: "Demand segment 1 level 6"
    demandseg2: "Demand segment 2"
    demandseg2l1: "Demand segment 2 level 1"
    demandseg2l2: "Demand segment 2 level 2"
    demandseg2l4: "Demand segment 2 level 4"
    demandseg2l8: "Demand segment 2 level 8"
    demandseg3: "Demand segment 3"
    demandseg3l1: "Demand segment 3 level 1"
    demandseg3l2: "Demand segment 3 level 2"
    demandseg3l4: "Demand segment 3 level 4"
    demandseg3l8: "Demand segment 3 level 8"
    LBL: "Label indicator"
    MTLp2A: "Material type level p2A"
    MTLp3A: "Material type level p3A"
    MTLp4A: "Material type level p4A"
    MTLp5A: "Material type level p5A"
    MTLp6A: "Material type level p6A"
    MTLp7A: "Material type level p7A"
    MTLp8A: "Material type level p8A"
    MTLp9A: "Material type level p9A"
    MTLp10A: "Material type level p10A"
    MTLp11A: "Material type level p11A"
    MTLp12A: "Material type level p12A"
    MTLp13A: "Material type level p13A"
    MTLp14A: "Material type level p14A"
    MTLp15A: "Material type level p15A"
    MTLp16A: "Material type level p16A"

0774_BNG(lymph,10000,1).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 10000 instances and compression ratio 1."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0774_BNG(lymph,10000,1).csv"
  feature_descriptions:
    lymphatics: "Condition of lymphatic vessels"
    block_of_affere: "Block of afferent vessels"
    bl_of_lymph_c: "Block of lymph c"
    bl_of_lymph_s: "Block of lymph s"
    by_pass: "By pass present"
    extravasates: "Extravasates"
    regeneration_of: "Regeneration of"
    early_uptake_in: "Early uptake in"
    lym_nodes_dimin: "Lymph nodes diminished"
    lym_nodes_enlar: "Lymph nodes enlarged"
    changes_in_lym: "Changes in lymph"
    defect_in_node: "Defect in node"
    changes_in_node: "Changes in node"
    changes_in_stru: "Changes in structure"
    special_forms: "Special forms"
    dislocation_of: "Dislocation of"
    exclusion_of_no: "Exclusion of no"
    no_of_nodes_in: "Number of nodes in"
    class: "Diagnostic class"
0775_BNG(lymph,10000,5).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 10000 instances and compression ratio 5."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0775_BNG(lymph,10000,5).csv"
  feature_descriptions:
    lymphatics: "Condition of lymphatic vessels"
    block_of_affere: "Block of afferent vessels"
    bl_of_lymph_c: "Block of lymph c"
    bl_of_lymph_s: "Block of lymph s"
    by_pass: "By pass present"
    extravasates: "Extravasates"
    regeneration_of: "Regeneration of"
    early_uptake_in: "Early uptake in"
    lym_nodes_dimin: "Lymph nodes diminished"
    lym_nodes_enlar: "Lymph nodes enlarged"
    changes_in_lym: "Changes in lymph"
    defect_in_node: "Defect in node"
    changes_in_node: "Changes in node"
    changes_in_stru: "Changes in structure"
    special_forms: "Special forms"
    dislocation_of: "Dislocation of"
    exclusion_of_no: "Exclusion of no"
    no_of_nodes_in: "Number of nodes in"
    class: "Diagnostic class"

0776_BNG(lymph,10000,10).csv:
  dataset_description: "Lymphography dataset (BNG scaled) with 10000 instances and compression ratio 10."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0776_BNG(lymph,10000,10).csv"
  feature_descriptions:
    lymphatics: "Condition of lymphatic vessels"
    block_of_affere: "Block of afferent vessels"
    bl_of_lymph_c: "Block of lymph c"
    bl_of_lymph_s: "Block of lymph s"
    by_pass: "By pass present"
    extravasates: "Extravasates"
    regeneration_of: "Regeneration of"
    early_uptake_in: "Early uptake in"
    lym_nodes_dimin: "Lymph nodes diminished"
    lym_nodes_enlar: "Lymph nodes enlarged"
    changes_in_lym: "Changes in lymph"
    defect_in_node: "Defect in node"
    changes_in_node: "Changes in node"
    changes_in_stru: "Changes in structure"
    special_forms: "Special forms"
    dislocation_of: "Dislocation of"
    exclusion_of_no: "Exclusion of no"
    no_of_nodes_in: "Number of nodes in"
    class: "Diagnostic class"

0777_lungcancer_GSE31210.csv:
  dataset_description: "Lung cancer gene expression dataset with gene markers and clinical features."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0777_wq.csv"
  feature_descriptions:
    OS_years: "Overall survival in years"
    histology: "Histological classification"
    age: "Patient age"
    sex: "Patient gender"
    g_202387_at: "Gene expression value for specific gene marker"
    g_211475_s_at: "Gene expression value for specific gene marker"
    g_204531_s_at: "Gene expression value for specific gene marker"
    g_211851_x_at: "Gene expression value for specific gene marker"
    g_203967_at: "Gene expression value for specific gene marker"
    g_203968_s_at: "Gene expression value for specific gene marker"
    g_201938_at: "Gene expression value for specific gene marker"
    g_202454_s_at: "Gene expression value for specific gene marker"
    g_215638_at: "Gene expression value for specific gene marker"
    g_214088_s_at: "Gene expression value for specific gene marker"
    g_216010_x_at: "Gene expression value for specific gene marker"
    g_206920_s_at: "Gene expression value for specific gene marker"
    g_220987_s_at: "Gene expression value for specific gene marker"
    g_212724_at: "Gene expression value for specific gene marker"
    g_204979_s_at: "Gene expression value for specific gene marker"
    g_AFFX-HUMGAPDH-M33197_5_at: "Control probe for GAPDH gene"
    g_AFFX-HUMGAPDH-M33197_M_at: "Control probe for GAPDH gene"
    OS_event: "Overall survival event indicator"

0777_wq.csv:
  dataset_description: "Water quality dataset with physical and chemical parameters."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0777_wq.csv"
  feature_descriptions:
    std_temp: "Standard temperature"
    std_pH: "Standard pH level"
    conduct: "Conductivity"
    o2: "Dissolved oxygen"
    o2sat: "Oxygen saturation"
    co2: "Carbon dioxide"
    hardness: "Water hardness"
    no2: "Nitrite"
    no3: "Nitrate"
    nh4: "Ammonium"
    po4: "Phosphate"
    cl: "Chloride"
    sio2: "Silicon dioxide"
    kimno4: "Potassium permanganate"
    k2cr2o7: "Potassium dichromate"
    bod: "Biological oxygen demand"
    x25400: "Measurement at 25400"
    x29600: "Measurement at 29600"
    x30400: "Measurement at 30400"
    x33400: "Measurement at 33400"
    x17300: "Measurement at 17300"
    x19400: "Measurement at 19400"
    x34500: "Measurement at 34500"
    x38100: "Measurement at 38100"
    x49700: "Measurement at 49700"
    x50390: "Measurement at 50390"
    x55800: "Measurement at 55800"
    x57500: "Measurement at 57500"
    x59300: "Measurement at 59300"
    x37800: "Measurement at 37800"

0792_conference_attendance.csv:
  dataset_description: "Conference attendance dataset with participant characteristics and attendance information."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0792_conference_attendance.csv"
  feature_descriptions:
    uni: "University affiliation"
    tshirtSize: "T-shirt size preference"
    favSubject: "Favorite subject"
    vegan: "Vegan dietary preference flag"
    participation: "Participation level"
    regDateCat: "Registration date category"
    isPresent: "Attendance flag"

0794_CD4.csv:
  dataset_description: "CD4 cell count dataset for immunological study with baseline and follow-up measurements."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0794_CD4.csv"
  feature_descriptions:
    BaselineCD4: "Baseline CD4 count"
    Week_from_base_currentcd4: "Weeks from baseline to current CD4 measurement"
    CurrentCD4: "Current CD4 count"
    Week_from_base_line: "Weeks from baseline"
    AZT: "AZT treatment flag"
    3TC: "3TC treatment flag"
    ABC: "ABC treatment flag"
    EFV: "EFV treatment flag"
    P12: "P12 marker"
    P13: "P13 marker"
    P15: "P15 marker"
    P19: "P19 marker"
    P20: "P20 marker"
    P35: "P35 marker"
    P36: "P36 marker"
    P37: "P37 marker"
    P41: "P41 marker"
    P57: "P57 marker"
    P62: "P62 marker"
    P63: "P63 marker"
    P64: "P64 marker"
    P72: "P72 marker"
    P77: "P77 marker"
    P93: "P93 marker"
    RP80: "RP80 marker"
    RP83: "RP83 marker"
    RP122: "RP122 marker"
    RP135: "RP135 marker"
    RP165: "RP165 marker"
    RP166: "RP166 marker"
    RP167: "RP167 marker"
    RP173: "RP173 marker"
    RP174: "RP174 marker"
    RP176: "RP176 marker"
    RP196: "RP196 marker"
    RP200: "RP200 marker"
    RP204: "RP204 marker"
    RP205: "RP205 marker"
    RP207: "RP207 marker"
    RP210: "RP210 marker"
    RP211: "RP211 marker"
    RP214: "RP214 marker"
    RP215: "RP215 marker"
    RP218: "RP218 marker"
    RP219: "RP219 marker"
    RP223: "RP223 marker"
    RP224: "RP224 marker"
    RP227: "RP227 marker"
    RP228: "RP228 marker"
    RP230: "RP230 marker"
    RP231: "RP231 marker"
    RP232: "RP232 marker"
    RP233: "RP233 marker"
    RP235: "RP235 marker"
    RP236: "RP236 marker"
    RP237: "RP237 marker"
    RP238: "RP238 marker"
    Future_CD4: "Predicted future CD4 count"

0796_image.csv:
  dataset_description: "Image features dataset with extracted visual characteristics for image classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0796_image.csv"
  feature_descriptions:
    Feature1: "Image feature 1"
    Feature2: "Image feature 2"
    Feature3: "Image feature 3"
    Feature4: "Image feature 4"
    Feature5: "Image feature 5"
    Feature6: "Image feature 6"
    Feature7: "Image feature 7"
    Feature8: "Image feature 8"
    Feature9: "Image feature 9"
    Feature10: "Image feature 10"
    Feature11: "Image feature 11"
    Feature12: "Image feature 12"
    Feature13: "Image feature 13"
    Feature14: "Image feature 14"
    Feature15: "Image feature 15"
    Feature16: "Image feature 16"
    Feature17: "Image feature 17"
    Feature18: "Image feature 18"
    Feature19: "Image feature 19"
    Feature20: "Image feature 20"
    Feature21: "Image feature 21"
    Feature22: "Image feature 22"
    Feature23: "Image feature 23"
    Feature24: "Image feature 24"
    Feature25: "Image feature 25"
    Feature26: "Image feature 26"
    Feature27: "Image feature 27"
    Feature28: "Image feature 28"
    Feature29: "Image feature 29"
    Feature30: "Image feature 30"
    Feature31: "Image feature 31"
    Feature32: "Image feature 32"
    Feature33: "Image feature 33"
    Feature34: "Image feature 34"
    Feature35: "Image feature 35"
    Feature36: "Image feature 36"
    Feature37: "Image feature 37"
    Feature38: "Image feature 38"
    Feature39: "Image feature 39"
    Feature40: "Image feature 40"
    Feature41: "Image feature 41"
    Feature42: "Image feature 42"
    Feature43: "Image feature 43"
    Feature44: "Image feature 44"
    Feature45: "Image feature 45"
    Feature46: "Image feature 46"
    Feature47: "Image feature 47"
    Feature48: "Image feature 48"
    Feature49: "Image feature 49"
    Feature50: "Image feature 50"
    Feature51: "Image feature 51"
    Feature52: "Image feature 52"
    Feature53: "Image feature 53"
    Feature54: "Image feature 54"
    Feature55: "Image feature 55"
    Feature56: "Image feature 56"
    Feature57: "Image feature 57"
    Feature58: "Image feature 58"
    Feature59: "Image feature 59"
    Feature60: "Image feature 60"
    Feature61: "Image feature 61"
    Feature62: "Image feature 62"
    Feature63: "Image feature 63"
    Feature64: "Image feature 64"
    Feature65: "Image feature 65"
    Feature66: "Image feature 66"
    Feature67: "Image feature 67"
    Feature68: "Image feature 68"
    Feature69: "Image feature 69"
    Feature70: "Image feature 70"
    Feature71: "Image feature 71"
    Feature72: "Image feature 72"
    Feature73: "Image feature 73"
    Feature74: "Image feature 74"
    Feature75: "Image feature 75"
    Feature76: "Image feature 76"
    Feature77: "Image feature 77"
    Feature78: "Image feature 78"
    Feature79: "Image feature 79"
    Feature80: "Image feature 80"
    Feature81: "Image feature 81"
    Feature82: "Image feature 82"
    Feature83: "Image feature 83"
    Feature84: "Image feature 84"
    Feature85: "Image feature 85"
    Feature86: "Image feature 86"
    Feature87: "Image feature 87"
    Feature88: "Image feature 88"
    Feature89: "Image feature 89"
    Feature90: "Image feature 90"
    Feature91: "Image feature 91"
    Feature92: "Image feature 92"
    Feature93: "Image feature 93"
    Feature94: "Image feature 94"
    Feature95: "Image feature 95"
    Feature96: "Image feature 96"
    Feature97: "Image feature 97"
    Feature98: "Image feature 98"
    Feature99: "Image feature 99"
    Feature100: "Image feature 100"
    Feature101: "Image feature 101"
    Feature102: "Image feature 102"
    Feature103: "Image feature 103"
    Feature104: "Image feature 104"
    Feature105: "Image feature 105"
    Feature106: "Image feature 106"
    Feature107: "Image feature 107"
    Feature108: "Image feature 108"
    Feature109: "Image feature 109"
    Feature110: "Image feature 110"
    Feature111: "Image feature 111"
    Feature112: "Image feature 112"
    Feature113: "Image feature 113"
    Feature114: "Image feature 114"
    Feature115: "Image feature 115"
    Feature116: "Image feature 116"
    Feature117: "Image feature 117"
    Feature118: "Image feature 118"
    Feature119: "Image feature 119"
    Feature120: "Image feature 120"
    Feature121: "Image feature 121"
    Feature122: "Image feature 122"
    Feature123: "Image feature 123"
    Feature124: "Image feature 124"
    Feature125: "Image feature 125"
    Feature126: "Image feature 126"
    Feature127: "Image feature 127"
    Feature128: "Image feature 128"
    Feature129: "Image feature 129"
    Feature130: "Image feature 130"
    Feature131: "Image feature 131"
    Feature132: "Image feature 132"
    Feature133: "Image feature 133"
    Feature134: "Image feature 134"
    Feature135: "Image feature 135"
    desert: "Desert scene classification"
    mountains: "Mountain scene classification"
    sea: "Sea scene classification"
    sunset: "Sunset scene classification"
    trees: "Tree scene classification"

0803_youtube.csv:
  dataset_description: "YouTube video audio features dataset for voice and sound analysis."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0803_youtube.csv"
  feature_descriptions:
    mean.pitch: "Mean pitch"
    sd.pitch: "Standard deviation of pitch"
    mean.conf.pitch: "Mean confidence in pitch measurement"
    sd.conf.pitch: "Standard deviation of confidence in pitch measurement"
    mean.spec.entropy: "Mean spectral entropy"
    sd.spec.entropy: "Standard deviation of spectral entropy"
    mean.val.speak: "Mean speaking value"
    sd.val.speak: "Standard deviation of speaking value"
    mean.loc.apeak: "Mean location of amplitude peak"
    sd.loc.apeak: "Standard deviation of location of amplitude peak"
    mean.num.apeak: "Mean number of amplitude peaks"
    sd.num.apeak: "Standard deviation of number of amplitude peaks"
    mean.energy: "Mean energy"
    sd.energy: "Standard deviation of energy"
    mean.d.energy: "Mean derivative of energy"
    sd.d.energy: "Standard deviation of derivative of energy"
    avg.voiced.seg: "Average voiced segment length"
    avg.len.seg: "Average segment length"
    time.speaking: "Time spent speaking"
    voice.rate: "Voice rate"
    num.turns: "Number of speaking turns"
    hogv.entropy: "Histogram of gradients voice entropy"
    hogv.median: "Histogram of gradients voice median"
    hogv.cogR: "Histogram of gradients voice center of gravity radius"
    hogv.cogC: "Histogram of gradients voice center of gravity center"
    gender: "Speaker gender"
    Extr: "Extroversion score"
    Agr: "Agreeableness score"
    Cons: "Conscientiousness score"
    Emot: "Emotional stability score"
    Open: "Openness score"

0810_CPMP-2015-regression.csv:
  dataset_description: "CPMP 2015 regression dataset with algorithm performance metrics on various tasks."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0810_CPMP-2015-regression.csv"
  feature_descriptions:
    instance_id: "Instance identifier"
    repetition: "Repetition number"
    stacks: "Number of stacks"
    tiers: "Number of tiers"
    stack.tier.ratio: "Stack to tier ratio"
    container.density: "Container density"
    empty.stack.pct: "Empty stack percentage"
    overstowing.stack.pct: "Overstowing stack percentage"
    overstowing.2cont.stack.pct: "Overstowing 2-container stack percentage"
    group.same.min: "Group same minimum"
    group.same.max: "Group same maximum"
    group.same.mean: "Group same mean"
    group.same.stdev: "Group same standard deviation"
    top.good.min: "Top good minimum"
    top.good.max: "Top good maximum"
    top.good.mean: "Top good mean"
    top.good.stdev: "Top good standard deviation"
    overstowage.pct: "Overstowage percentage"
    brlb: "Branch and bound lower bound"
    tert.density: "Tertiary density"
    tier.weighted.groups: "Tier weighted groups"
    avg.ll.top.left.lg.group: "Average LL top left LG group"
    cont.empty.grt.estack: "Container empty greater than empty stack"
    pct.bottom.pct.on.top: "Percentage bottom percentage on top"
    algorithm: "Algorithm name"
    runtime: "Runtime in seconds"

0811_CPMP-2015-classification.csv:
  dataset_description: "CPMP 2015 classification dataset with algorithm performance metrics on classification tasks."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0811_CPMP-2015-classification.csv"
  feature_descriptions:
    instance_id: "Instance identifier"
    repetition: "Repetition number"
    stacks: "Number of stacks"
    tiers: "Number of tiers"
    stack.tier.ratio: "Stack to tier ratio"
    container.density: "Container density"
    empty.stack.pct: "Empty stack percentage"
    overstowing.stack.pct: "Overstowing stack percentage"
    overstowing.2cont.stack.pct: "Overstowing 2-container stack percentage"
    group.same.min: "Group same minimum"
    group.same.max: "Group same maximum"
    group.same.mean: "Group same mean"
    group.same.stdev: "Group same standard deviation"
    top.good.min: "Top good minimum"
    top.good.max: "Top good maximum"
    top.good.mean: "Top good mean"
    top.good.stdev: "Top good standard deviation"
    overstowage.pct: "Overstowage percentage"
    brlb: "Branch and bound lower bound"
    tert.density: "Tertiary density"
    tier.weighted.groups: "Tier weighted groups"
    avg.ll.top.left.lg.group: "Average LL top left LG group"
    cont.empty.grt.estack: "Container empty greater than empty stack"
    pct.bottom.pct.on.top: "Percentage bottom percentage on top"
    runTime: "Runtime in seconds"
    tier.density: "Tier density"
    status: "Execution status"
    algorithm: "Algorithm name"

0815_CPMP-2015-runtime-classification.csv:
  dataset_description: "CPMP 2015 runtime classification dataset focusing on algorithm execution time for classification tasks."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0815_CPMP-2015-runtime-classification.csv"
  feature_descriptions:
    stacks: "Number of stacks"
    tiers: "Number of tiers"
    stack.tier.ratio: "Stack to tier ratio"
    container.density: "Container density"
    empty.stack.pct: "Empty stack percentage"
    overstowing.stack.pct: "Overstowing stack percentage"
    overstowing.2cont.stack.pct: "Overstowing 2-container stack percentage"
    group.same.min: "Group same minimum"
    group.same.max: "Group same maximum"
    group.same.mean: "Group same mean"
    group.same.stdev: "Group same standard deviation"
    top.good.min: "Top good minimum"
    top.good.max: "Top good maximum"
    top.good.mean: "Top good mean"
    top.good.stdev: "Top good standard deviation"
    overstowage.pct: "Overstowage percentage"
    brlb: "Branch and bound lower bound"
    tert.density: "Tertiary density"
    tier.weighted.groups: "Tier weighted groups"
    avg.ll.top.left.lg.group: "Average LL top left LG group"
    cont.empty.grt.estack: "Container empty greater than empty stack"
    pct.bottom.pct.on.top: "Percentage bottom percentage on top"
    algorithm: "Algorithm name"
    runtime: "Runtime in seconds"

0816_CPMP-2015-runtime-regression.csv:
  dataset_description: "CPMP 2015 runtime regression dataset focusing on algorithm execution time for regression tasks."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0816_CPMP-2015-runtime-regression.csv"
  feature_descriptions:
    stacks: "Number of stacks"
    tiers: "Number of tiers"
    stack.tier.ratio: "Stack to tier ratio"
    container.density: "Container density"
    empty.stack.pct: "Empty stack percentage"
    overstowing.stack.pct: "Overstowing stack percentage"
    overstowing.2cont.stack.pct: "Overstowing 2-container stack percentage"
    group.same.min: "Group same minimum"
    group.same.max: "Group same maximum"
    group.same.mean: "Group same mean"
    group.same.stdev: "Group same standard deviation"
    top.good.min: "Top good minimum"
    top.good.max: "Top good maximum"
    top.good.mean: "Top good mean"
    top.good.stdev: "Top good standard deviation"
    overstowage.pct: "Overstowage percentage"
    brlb: "Branch and bound lower bound"
    tert.density: "Tertiary density"
    tier.weighted.groups: "Tier weighted groups"
    avg.ll.top.left.lg.group: "Average LL top left LG group"
    cont.empty.grt.estack: "Container empty greater than empty stack"
    pct.bottom.pct.on.top: "Percentage bottom percentage on top"
    algorithm: "Algorithm name"
    runtime: "Runtime in seconds"

0825_iris_test_upload.csv:
  dataset_description: "Iris flower dataset test upload with sepal and petal measurements for species classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0825_iris_test_upload.csv"
  feature_descriptions:
    sepallength: "Sepal length in cm"
    sepalwidth: "Sepal width in cm"
    petallength: "Petal length in cm"
    petalwidth: "Petal width in cm"
    class: "Iris species class"

0829_TaskCreationTestDataset.csv:
  dataset_description: "Task creation test dataset with iris flower measurements."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0829_TaskCreationTestDataset.csv"
  feature_descriptions:
    sepallength: "Sepal length in cm"
    sepalwidth: "Sepal width in cm"
    petallength: "Petal length in cm"
    petalwidth: "Petal width in cm"
    class: "Iris species class"
0829_TaskCreationTestDataset.csv:
  dataset_description: "Task creation test dataset with iris flower measurements."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0829_TaskCreationTestDataset.csv"
  feature_descriptions:
    sepallength: "Sepal length in cm"
    sepalwidth:  "Sepal width in cm"
    petallength: "Petal length in cm"
    petalwidth:  "Petal width in cm"
    class:       "Iris species class"

0830_crimecommunitynums2.csv:
  dataset_description: "Community‐level crime and demographic dataset with socio‐economic and policing metrics."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0830_crimecommunitynums2.csv"
  feature_descriptions:
    county:                    "County name"
    community:                 "Community name"
    fold:                      "Cross‐validation fold identifier"
    population:                "Total population"
    householdsize:             "Average household size"
    racepctblack:              "Percentage of Black population"
    racePctWhite:              "Percentage of White population"
    racePctAsian:              "Percentage of Asian population"
    racePctHisp:               "Percentage of Hispanic population"
    agePct12t21:               "Percentage aged 12–21"
    agePct12t29:               "Percentage aged 12–29"
    agePct16t24:               "Percentage aged 16–24"
    agePct65up:                "Percentage aged 65 and over"
    numbUrban:                 "Number of urban residents"
    pctUrban:                  "Percentage of urban population"
    medIncome:                 "Median household income"
    pctWWage:                  "Percentage earning wages"
    pctWFarmSelf:              "Percentage in farm self‐employment"
    pctWInvInc:                "Percentage with investment income"
    pctWSocSec:                "Percentage receiving Social Security"
    pctWPubAsst:               "Percentage receiving public assistance"
    pctWRetire:                "Percentage of retirees"
    medFamInc:                 "Median family income"
    perCapInc:                 "Per capita income"
    whitePerCap:               "White per capita income"
    blackPerCap:               "Black per capita income"
    indianPerCap:              "American Indian per capita income"
    AsianPerCap:               "Asian per capita income"
    OtherPerCap:               "Other race per capita income"
    HispPerCap:                "Hispanic per capita income"
    NumUnderPov:               "Number under poverty line"
    PctPopUnderPov:            "Percentage under poverty line"
    PctLess9thGrade:           "Percentage without 9th grade education"
    PctNotHSGrad:              "Percentage without high school diploma"
    PctBSorMore:               "Percentage with bachelor’s degree or higher"
    PctUnemployed:             "Percentage unemployed"
    PctEmploy:                 "Percentage employed"
    PctEmplManu:               "Percentage employed in manufacturing"
    PctEmplProfServ:           "Percentage employed in professional services"
    PctOccupManu:              "Percentage of occupations in manufacturing"
    PctOccupMgmtProf:          "Percentage in management & professional occupations"
    MalePctDivorce:            "Percentage of divorced males"
    MalePctNevMarr:            "Percentage of never‐married males"
    FemalePctDiv:              "Percentage of divorced females"
    TotalPctDiv:               "Total percentage divorced"
    PersPerFam:                "Persons per family"
    PctFam2Par:                "Percentage of two‐parent families"
    PctKids2Par:               "Percentage of kids in two‐parent families"
    PctYoungKids2Par:          "Percentage of young kids in two‐parent families"
    PctTeen2Par:               "Percentage of teens in two‐parent families"
    PctWorkMomYoungKids:       "Percentage of working mothers with young kids"
    PctWorkMom:                "Percentage of working mothers"
    NumIlleg:                  "Number of births to unmarried mothers"
    PctIlleg:                  "Percentage of births to unmarried mothers"
    NumImmig:                  "Number of immigrants"
    PctImmigRecent:            "Percentage of recent immigrants"
    PctImmigRec5:              "Percentage of immigrants in last 5 years"
    PctImmigRec8:              "Percentage of immigrants in last 8 years"
    PctImmigRec10:             "Percentage of immigrants in last 10 years"
    PctRecentImmig:            "Percentage of recent immigrants"
    PctRecImmig5:              "Percentage of immigrants in last 5 years"
    PctRecImmig8:              "Percentage of immigrants in last 8 years"
    PctRecImmig10:             "Percentage of immigrants in last 10 years"
    PctSpeakEnglOnly:          "Percentage speaking only English"
    PctNotSpeakEnglWell:       "Percentage not speaking English well"
    PctLargHouseFam:           "Percentage of large households that are families"
    PctLargHouseOccup:         "Percentage of large households occupied"
    PersPerOccupHous:          "Persons per occupied housing unit"
    PersPerOwnOccHous:         "Persons per owner‐occupied housing unit"
    PersPerRentOccHous:        "Persons per renter‐occupied housing unit"
    PctPersOwnOccup:           "Percentage in owner‐occupied housing"
    PctPersDenseHous:          "Percentage in dense housing"
    PctHousLess3BR:            "Percentage of housing with fewer than 3 bedrooms"
    MedNumBR:                  "Median number of bedrooms"
    HousVacant:                "Number of vacant houses"
    PctHousOccup:              "Percentage of occupied housing units"
    PctHousOwnOcc:             "Percentage of owner‐occupied housing units"
    PctVacantBoarded:          "Percentage of boarded vacant housing"
    PctVacMore6Mos:            "Percentage vacant >6 months"
    MedYrHousBuilt:            "Median year houses built"
    PctHousNoPhone:            "Percentage of houses without telephone"
    PctWOFullPlumb:            "Percentage without full plumbing"
    OwnOccLowQuart:            "Lower quartile owner‐occupied value"
    OwnOccMedVal:              "Median owner‐occupied value"
    OwnOccHiQuart:             "Upper quartile owner‐occupied value"
    RentLowQ:                  "Lower quartile rent"
    RentMedian:                "Median rent"
    RentHighQ:                 "Upper quartile rent"
    MedRent:                   "Median rent amount"
    MedRentPctHousInc:         "Median rent as % of income"
    MedOwnCostPctInc:          "Median owner cost as % of income"
    MedOwnCostPctIncNoMtg:     "Median owner cost w/o mortgage as % of income"
    NumInShelters:             "Number in shelters"
    NumStreet:                 "Number living on streets"
    PctForeignBorn:            "Percentage foreign‐born"
    PctBornSameState:          "Percentage born in same state"
    PctSameHouse85:            "Percentage in same house since 1985"
    PctSameCity85:             "Percentage in same city since 1985"
    PctSameState85:            "Percentage in same state since 1985"
    LemasSwornFT:              "Sworn full‐time officers"
    LemasSwFTPerPop:           "Sworn FT officers per population"
    LemasSwFTFieldOps:         "Sworn FT field operations officers"
    LemasSwFTFieldPerPop:      "Field ops officers per population"
    LemasTotalReq:             "Total law enforcement requests"
    LemasTotReqPerPop:         "Requests per population"
    PolicReqPerOffic:          "Police requests per officer"
    PolicPerPop:               "Police per population"
    RacialMatchCommPol:        "Racial match in community policing"
    PctPolicWhite:             "Percentage of White officers"
    PctPolicBlack:             "Percentage of Black officers"
    PctPolicHisp:              "Percentage of Hispanic officers"
    PctPolicAsian:             "Percentage of Asian officers"
    PctPolicMinor:             "Percentage of minority officers"
    OfficAssgnDrugUnits:       "Officers assigned to drug units"
    NumKindsDrugsSeiz:         "Kinds of drugs seized"
    PolicAveOTWorked:          "Average police overtime worked"
    LandArea:                  "Land area of community"
    PopDens:                   "Population density"
    PctUsePubTrans:            "Percentage using public transport"
    PolicCars:                 "Number of police cars"
    PolicOperBudg:             "Police operating budget"
    LemasPctPolicOnPatr:       "Pct of officers on patrol"
    LemasGangUnitDeploy:       "Gang unit deployment percentage"
    LemasPctOfficDrugUn:       "Pct of officers in drug units"
    PolicBudgPerPop:           "Police budget per population"
    ViolentCrimesPerPop:       "Violent crimes per population"
    state:                     "State name"

0875_nfl_games.csv:
  dataset_description: "NFL game outcomes with ELO ratings and scores."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0875_nfl_games.csv"
  feature_descriptions:
    date:       "Date of game"
    season:     "Season year"
    neutral:    "Neutral site flag (1=yes, 0=no)"
    playoff:    "Playoff game flag (1=yes, 0=no)"
    team1:      "First team identifier"
    team2:      "Second team identifier"
    elo1:       "ELO rating of team1 before game"
    elo2:       "ELO rating of team2 before game"
    elo_prob1:  "Predicted win probability for team1"
    score1:     "Score by team1"
    score2:     "Score by team2"
    result1:    "Game result for team1 (1=win, 0=loss)"

0878_telco-customer-churn.csv:
  dataset_description: "Telco customer churn dataset with service usage and contract info."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0878_telco-customer-churn.csv"
  feature_descriptions:
    gender:           "Customer gender"
    SeniorCitizen:    "Senior citizen flag (1=yes, 0=no)"
    Partner:          "Has partner (Yes/No)"
    Dependents:       "Has dependents (Yes/No)"
    tenure:           "Months with company"
    PhoneService:     "Has phone service (Yes/No)"
    MultipleLines:    "Multiple lines service (Yes/No/No phone service)"
    InternetService:  "Type of internet service"
    OnlineSecurity:   "Online security service (Yes/No/No internet)"
    OnlineBackup:     "Online backup service (Yes/No/No internet)"
    DeviceProtection: "Device protection service (Yes/No/No internet)"
    TechSupport:      "Tech support service (Yes/No/No internet)"
    StreamingTV:      "Streaming TV service (Yes/No/No internet)"
    StreamingMovies:  "Streaming movies service (Yes/No/No internet)"
    Contract:         "Contract type"
    PaperlessBilling: "Paperless billing flag (Yes/No)"
    PaymentMethod:    "Payment method"
    MonthlyCharges:   "Monthly charges"
    TotalCharges:     "Total charges"
    Churn:            "Churn flag (Yes/No)"

0880_dataset_sales.csv:
  dataset_description: "Machine‐level sales dataset with environmental and calendar features."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0880_dataset_sales.csv"
  feature_descriptions:
    productId:         "Product identifier"
    machineId:         "Machine identifier"
    temp:              "Temperature"
    weather_condition_id: "Weather condition code"
    isholiday:         "Holiday indicator (1=yes, 0=no)"
    daysoff:           "Number of days off"
    year:              "Year"
    month:             "Month"
    day:               "Day of month"
    week_day:          "Day of week"
    avail0:            "Availability level 0"
    avail1:            "Availability level 1"
    avail2:            "Availability level 2"
    stdv:              "Sales standard deviation"
    sales:             "Sales count"

0882_JuanFeldmanIris.csv:
  dataset_description: "Iris dataset variant by Juan Feldman."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0882_JuanFeldmanIris.csv"
  feature_descriptions:
    sepallength: "Sepal length in cm"
    sepalwidth:  "Sepal width in cm"
    petallength: "Petal length in cm"
    petalwidth:  "Petal width in cm"
    class:       "Iris species class"

0885_compas-two-years.csv:
  dataset_description: "COMPAS recidivism risk dataset with demographic and offense history."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0885_compas-two-years.csv"
  feature_descriptions:
    sex:                   "Gender"
    age:                   "Age"
    juv_fel_count:         "Number of juvenile felony offenses"
    juv_misd_count:        "Number of juvenile misdemeanor offenses"
    juv_other_count:       "Number of other juvenile offenses"
    priors_count:          "Number of prior convictions"
    age_cat_25-45:         "Age category 25–45"
    age_cat_Greaterthan45: "Age category >45"
    age_cat_Lessthan25:    "Age category <25"
    race_African-American: "Race: African-American flag"
    race_Caucasian:        "Race: Caucasian flag"
    c_charge_degree_F:     "Charge degree: felony flag"
    c_charge_degree_M:     "Charge degree: misdemeanor flag"
    two_year_recid:        "Recidivism within two years (1=yes, 0=no)"

0898_test001.csv:
  dataset_description: "Pima Indians Diabetes test dataset."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0898_test001.csv"
  feature_descriptions:
    preg:  "Number of pregnancies"
    plas:  "Plasma glucose concentration"
    pres:  "Diastolic blood pressure (mm Hg)"
    skin:  "Triceps skin fold thickness (mm)"
    insu:  "2-Hour serum insulin (mu U/ml)"
    mass:  "Body mass index (BMI)"
    pedi:  "Diabetes pedigree function"
    age:   "Age (years)"
    class: "Diabetes class (0 = negative, 1 = positive)"

0901_iris-example.csv:
  dataset_description: "Classic Iris dataset example."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0901_iris-example.csv"
  feature_descriptions:
    Sepal.Length: "Sepal length in cm"
    Sepal.Width:  "Sepal width in cm"
    Petal.Length: "Petal length in cm"
    Petal.Width:  "Petal width in cm"
    Species:      "Iris species"

0902_SolarPower.csv:
  dataset_description: "Solar power generation dataset with weather variables."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0902_SolarPower.csv"
  feature_descriptions:
    rhumidity:        "Relative humidity"
    temperature:      "Air temperature (°C)"
    windspeed:        "Wind speed"
    solarirradiance:  "Solar irradiance"
    pvoutput:         "Photovoltaic output"

0906_UCI-student-performance-por.csv:
  dataset_description: "Student performance in Portuguese language course."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0906_UCI-student-performance-por.csv"
  feature_descriptions:
    school:      "Student’s school (GP or MS)"
    sex:         "Gender"
    age:         "Age in years"
    address:     "Home address type (U = urban or R = rural)"
    famsize:     "Family size (GT3 or LE3)"
    Pstatus:     "Parent cohabitation status (T = together or A = apart)"
    Medu:        "Mother’s education (0–4)"
    Fedu:        "Father’s education (0–4)"
    Mjob:        "Mother’s job"
    Fjob:        "Father’s job"
    reason:      "Reason to choose this school"
    guardian:    "Student’s guardian"
    traveltime:  "Home–school travel time (1–4)"
    studytime:   "Weekly study time (1–4)"
    failures:    "Number of past class failures"
    schoolsup:   "Extra educational support (Yes/No)"
    famsup:      "Family educational support (Yes/No)"
    paid:        "Extra paid classes (Yes/No)"
    activities:  "Extra-curricular activities (Yes/No)"
    nursery:     "Attended nursery school (Yes/No)"
    higher:      "Wants higher education (Yes/No)"
    internet:    "Internet access at home (Yes/No)"
    romantic:    "In a romantic relationship (Yes/No)"
    famrel:      "Family relationship quality (1–5)"
    freetime:    "Free time after school (1–5)"
    goout:       "Going out with friends (1–5)"
    Dalc:        "Weekday alcohol consumption (1–5)"
    Walc:        "Weekend alcohol consumption (1–5)"
    health:      "Current health status (1–5)"
    absences:    "Number of school absences"
    G1:          "First period grade (0–20)"
    G2:          "Second period grade (0–20)"
    G3:          "Final grade (0–20)"
0907_UCI-student-performance-mat:
  dataset_description: "Student Performance Dataset (Math): Academic, demographic, and social attributes affecting grades."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0907_UCI-student-performance-mat.csv"
  feature_descriptions:
    school: "Student's school"
    sex: "Student's sex"
    age: "Student's age"
    address: "Home address type (urban/rural)"
    famsize: "Family size"
    Pstatus: "Parent's cohabitation status"
    Medu: "Mother's education"
    Fedu: "Father's education"
    Mjob: "Mother's job"
    Fjob: "Father's job"
    reason: "Reason for choosing this school"
    guardian: "Student’s guardian"
    traveltime: "Home to school travel time"
    studytime: "Weekly study time"
    failures: "Number of past class failures"
    schoolsup: "Extra educational support"
    famsup: "Family educational support"
    paid: "Extra paid classes"
    activities: "Extracurricular activities"
    nursery: "Attended nursery school"
    higher: "Wants to take higher education"
    internet: "Internet access at home"
    romantic: "In a romantic relationship"
    famrel: "Family relationship quality"
    freetime: "Free time after school"
    goout: "Going out with friends"
    Dalc: "Workday alcohol consumption"
    Walc: "Weekend alcohol consumption"
    health: "Current health status"
    absences: "Number of school absences"
    G1: "First period grade"
    G2: "Second period grade"
    G3: "Final grade"

0911_forest_fires:
  dataset_description: "Forest Fires Dataset: Meteorological data to predict burned area of forest fires in Portugal."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0911_forest_fires.csv"
  feature_descriptions:
    X: "X-axis spatial coordinate"
    Y: "Y-axis spatial coordinate"
    month: "Month of occurrence"
    day: "Day of the week"
    FFMC: "Fine Fuel Moisture Code"
    DMC: "Duff Moisture Code"
    DC: "Drought Code"
    ISI: "Initial Spread Index"
    temp: "Temperature (Celsius)"
    RH: "Relative Humidity (%)"
    wind: "Wind speed (km/h)"
    rain: "Rainfall (mm)"
    area: "Burned area (hectares)"

0915_treasury:
  dataset_description: "US Treasury Rates and Financial Metrics Dataset"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0915_treasury.csv"
  feature_descriptions:
    1Y-CMaturityRate: "1-Year constant maturity rate"
    30Y-CMortgageRate: "30-Year conventional mortgage rate"
    3M-Rate-AuctionAverage: "3-Month treasury rate (auction average)"
    3M-Rate-SecondaryMarket: "3-Month treasury rate (secondary market)"
    3Y-CMaturityRate: "3-Year constant maturity rate"
    5Y-CMaturityRate: "5-Year constant maturity rate"
    bankCredit: "Total bank credit"
    currency: "Currency in circulation"
    demandDeposits: "Demand deposits at commercial banks"
    federalFunds: "Effective federal funds rate"
    moneyStock: "Total money stock"
    checkableDeposits: "Checkable deposits"
    loansLeases: "Loans and leases in bank credit"
    savingsDeposits: "Savings deposits"
    tradeCurrencies: "Foreign currency holdings"
    1MonthCDRate: "1-Month CD interest rate"

0918_yacht_hydrodynamics:
  dataset_description: "Yacht Hydrodynamics Dataset: Predicting residuary resistance based on hull design parameters."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0918_yacht_hydrodynamics.csv"
  feature_descriptions:
    Logitudinal.position: "Longitudinal position of center of buoyancy"
    Prismatic.coefficient: "Prismatic coefficient"
    Length.displacement.ratio: "Length-displacement ratio"
    Beam.draught.ratio: "Beam-draught ratio"
    Length.beam.ratio: "Length-beam ratio"
    Froude.number: "Froude number"
    Residuary.resistance: "Residuary resistance"

0919_Amazon_employee_access:
  dataset_description: "Amazon Employee Access: Classification of granted or denied access to resources."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0919_Amazon_employee_access.csv"
  feature_descriptions:
    RESOURCE: "Resource identifier"
    MGR_ID: "Manager ID"
    ROLE_ROLLUP_1: "First level role roll-up"
    ROLE_ROLLUP_2: "Second level role roll-up"
    ROLE_DEPTNAME: "Department name"
    ROLE_TITLE: "Role title"
    ROLE_FAMILY_DESC: "Role family description"
    ROLE_FAMILY: "Role family code"
    ROLE_CODE: "Specific role code"
    target: "Access granted (1) or denied (0)"
0921_Smartphone-Based_Recognition_of_Human_Activities:
  dataset_description: "Smartphone Human Activity Recognition: Sensor features for predicting activity class."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0921_Smartphone-Based_Recognition_of_Human_Activities.csv"
  feature_descriptions:
    tBodyAcc-mean()-Y: "Sensor-derived feature"
    tBodyAcc-mean()-Z: "Sensor-derived feature"
    tBodyAcc-std()-X: "Sensor-derived feature"
    tBodyAcc-std()-Y: "Sensor-derived feature"
    tBodyAcc-std()-Z: "Sensor-derived feature"
    tGravityAcc-mean()-X: "Sensor-derived feature"
    tGravityAcc-mean()-Y: "Sensor-derived feature"
    tGravityAcc-mean()-Z: "Sensor-derived feature"
    tGravityAcc-std()-X: "Sensor-derived feature"
    tGravityAcc-std()-Y: "Sensor-derived feature"
    tGravityAcc-std()-Z: "Sensor-derived feature"
    tBodyAccJerk-mean()-X: "Sensor-derived feature"
    tBodyAccJerk-mean()-Y: "Sensor-derived feature"
    tBodyAccJerk-mean()-Z: "Sensor-derived feature"
    tBodyAccJerk-std()-X: "Sensor-derived feature"
    tBodyAccJerk-std()-Y: "Sensor-derived feature"
    tBodyAccJerk-std()-Z: "Sensor-derived feature"
    tBodyGyro-mean()-X: "Sensor-derived feature"
    tBodyGyro-mean()-Y: "Sensor-derived feature"
    tBodyGyro-mean()-Z: "Sensor-derived feature"
    tBodyGyro-std()-X: "Sensor-derived feature"
    tBodyGyro-std()-Y: "Sensor-derived feature"
    tBodyGyro-std()-Z: "Sensor-derived feature"
    tBodyGyroJerk-mean()-X: "Sensor-derived feature"
    tBodyGyroJerk-mean()-Y: "Sensor-derived feature"
    tBodyGyroJerk-mean()-Z: "Sensor-derived feature"
    tBodyGyroJerk-std()-X: "Sensor-derived feature"
    tBodyGyroJerk-std()-Y: "Sensor-derived feature"
    tBodyGyroJerk-std()-Z: "Sensor-derived feature"
    tBodyAccMag-mean(): "Sensor-derived feature"
    tBodyAccMag-std(): "Sensor-derived feature"
    tGravityAccMag-mean(): "Sensor-derived feature"
    tGravityAccMag-std(): "Sensor-derived feature"
    tBodyAccJerkMag-mean(): "Sensor-derived feature"
    tBodyAccJerkMag-std(): "Sensor-derived feature"
    tBodyGyroMag-mean(): "Sensor-derived feature"
    tBodyGyroMag-std(): "Sensor-derived feature"
    tBodyGyroJerkMag-mean(): "Sensor-derived feature"
    tBodyGyroJerkMag-std(): "Sensor-derived feature"
    fBodyAcc-mean()-X: "Sensor-derived feature"
    fBodyAcc-mean()-Y: "Sensor-derived feature"
    fBodyAcc-mean()-Z: "Sensor-derived feature"
    fBodyAcc-std()-X: "Sensor-derived feature"
    fBodyAcc-std()-Y: "Sensor-derived feature"
    fBodyAcc-std()-Z: "Sensor-derived feature"
    fBodyAccJerk-mean()-X: "Sensor-derived feature"
    fBodyAccJerk-mean()-Y: "Sensor-derived feature"
    fBodyAccJerk-mean()-Z: "Sensor-derived feature"
    fBodyAccJerk-std()-X: "Sensor-derived feature"
    fBodyAccJerk-std()-Y: "Sensor-derived feature"
    fBodyAccJerk-std()-Z: "Sensor-derived feature"
    fBodyGyro-mean()-X: "Sensor-derived feature"
    fBodyGyro-mean()-Y: "Sensor-derived feature"
    fBodyGyro-mean()-Z: "Sensor-derived feature"
    fBodyGyro-std()-X: "Sensor-derived feature"
    fBodyGyro-std()-Y: "Sensor-derived feature"
    fBodyGyro-std()-Z: "Sensor-derived feature"
    fBodyAccMag-mean(): "Sensor-derived feature"
    fBodyAccMag-std(): "Sensor-derived feature"
    fBodyBodyAccJerkMag-mean(): "Sensor-derived feature"
    fBodyBodyAccJerkMag-std(): "Sensor-derived feature"
    fBodyBodyGyroMag-mean(): "Sensor-derived feature"
    fBodyBodyGyroMag-std(): "Sensor-derived feature"
    fBodyBodyGyroJerkMag-mean(): "Sensor-derived feature"
    fBodyBodyGyroJerkMag-std(): "Sensor-derived feature"
    Activity: "Activity class label"
0925_Concrete_Data:
  dataset_description: "Concrete Strength Dataset: Ingredients and age used to predict compressive strength."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0925_Concrete_Data.csv"
  feature_descriptions:
    Cement (component 1)(kg in a m^3 mixture): "Feature input"
    Blast Furnace Slag (component 2)(kg in a m^3 mixture): "Feature input"
    Fly Ash (component 3)(kg in a m^3 mixture): "Feature input"
    Water  (component 4)(kg in a m^3 mixture): "Feature input"
    Superplasticizer (component 5)(kg in a m^3 mixture): "Feature input"
    Coarse Aggregate  (component 6)(kg in a m^3 mixture): "Feature input"
    Fine Aggregate (component 7)(kg in a m^3 mixture): "Feature input"
    Age (day): "Feature input"
    Concrete compressive strength(MPa. megapascals): "Feature input"

0927_higgs:
  dataset_description: "Higgs Boson Dataset: Particle physics features for classifying signal vs background."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0927_higgs.csv"
  feature_descriptions:
    lepton_pT: "Feature input"
    lepton_eta: "Feature input"
    lepton_phi: "Feature input"
    missing_energy_magnitude: "Feature input"
    missing_energy_phi: "Feature input"
    jet1pt: "Feature input"
    jet1eta: "Feature input"
    jet1phi: "Feature input"
    jet1b-tag: "Feature input"
    jet2pt: "Feature input"
    jet2eta: "Feature input"
    jet2phi: "Feature input"
    jet2b-tag: "Feature input"
    jet3pt: "Feature input"
    jet3eta: "Feature input"
    jet3phi: "Feature input"
    jet3b-tag: "Feature input"
    jet4pt: "Feature input"
    jet4eta: "Feature input"
    jet4phi: "Feature input"
    jet4b-tag: "Feature input"
    m_jj: "Feature input"
    m_jjj: "Feature input"
    m_lv: "Feature input"
    m_jlv: "Feature input"
    m_bb: "Feature input"
    m_wbb: "Feature input"
    m_wwbb: "Feature input"
    class: "Target variable"

0929_PhishingWebsites:
  dataset_description: "Phishing Websites Dataset: Features indicating legitimacy or fraud of a website."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0929_PhishingWebsites.csv"
  feature_descriptions:
    having_IP_Address: "Feature input"
    URL_Length: "Feature input"
    Shortining_Service: "Feature input"
    having_At_Symbol: "Feature input"
    double_slash_redirecting: "Feature input"
    Prefix_Suffix: "Feature input"
    having_Sub_Domain: "Feature input"
    SSLfinal_State: "Feature input"
    Domain_registeration_length: "Feature input"
    Favicon: "Feature input"
    port: "Feature input"
    HTTPS_token: "Feature input"
    Request_URL: "Feature input"
    URL_of_Anchor: "Feature input"
    Links_in_tags: "Feature input"
    SFH: "Feature input"
    Submitting_to_email: "Feature input"
    Abnormal_URL: "Feature input"
    Redirect: "Feature input"
    on_mouseover: "Feature input"
    RightClick: "Feature input"
    popUpWidnow: "Feature input"
    Iframe: "Feature input"
    age_of_domain: "Feature input"
    DNSRecord: "Feature input"
    web_traffic: "Feature input"
    Page_Rank: "Feature input"
    Google_Index: "Feature input"
    Links_pointing_to_page: "Feature input"
    Statistical_report: "Feature input"
    Result: "Target variable"
0932_mlr_rpart_rng:
  feature_descriptions:
    task_id: "Unique identifier for each ML task"
    dataset: "Name of the dataset used in the task"
    learner: "Machine learning algorithm used"
    perf.mmce: "Misclassification error performance"
    perf.logloss: "Log loss performance score"
    traintime: "Time taken to train the model"
    predicttime: "Time taken to make predictions"
    num.impute.selected.cpo: "Number of imputation steps applied"
    cp: "Complexity parameter for pruning"
    maxdepth: "Maximum depth of the decision tree"
    minbucket: "Minimum number of samples per leaf"
    minsplit: "Minimum number of samples required to split"

0933_Waterstress:
  feature_descriptions:
    autoc: "Autocorrelation value of signal"
    contr: "Contrast metric of the signal"
    corrm: "Mean correlation value"
    corrp: "Peak correlation value"
    cprom: "Prominence of peaks in signal"
    cshad: "Shadowing coefficient"
    dissi: "Dissimilarity score"
    energ: "Energy measure of the signal"
    entro: "Entropy of the signal"
    homom1: "Homogeneity metric variant 1"
    homop: "Homogeneity of pattern"
    maxpr: "Maximum peak response"
    sosvh: "Sum of squares variance of histogram"
    savgh: "Average histogram value"
    svarh: "Histogram variance"
    senth: "Entropy of histogram"
    dvarh: "Histogram derivative variance"
    denth: "Derivative entropy"
    inf1h: "First order information"
    inf2h: "Second order information"
    homom: "Homogeneity metric"
    indnc: "Index of non-conformity"
    class: "Target water stress classification"

0940_seattlecrime6:
  feature_descriptions:
    Primary_Offense_Description: "Type of offense (e.g. theft, assault)"
    Precinct: "Police precinct reporting the crime"
    Sector: "Sector within the precinct"
    Beat: "Patrol beat where incident occurred"
    Neighborhood: "Neighborhood location"
    Occurred_hour: "Hour when the crime occurred"
    Occurred_min: "Minute when the crime occurred"
    Reported_Time: "Time when the crime was reported"
0941_TEST10e627dcde-UploadTestWithURL:
  dataset_description: "Test Dataset: Sample iris data with class labels used for upload validation and trials."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0941_TEST10e627dcde-UploadTestWithURL.csv"
  feature_descriptions:
    sepallength: "Sepal length in centimeters"
    sepalwidth: "Sepal width in centimeters"
    petallength: "Petal length in centimeters"
    petalwidth: "Petal width in centimeters"
    class: "Species of iris flower"
0943_cylinder-bands:
  dataset_description: "Cylinder Bands Dataset: Manufacturing process parameters and quality labels for cylinder bands."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0943_cylinder-bands.csv"
  feature_descriptions:
    customer: "Customer ID"
    job_number: "Production job identifier"
    grain_screened: "Type of grain used"
    ink_color: "Color of ink applied"
    proof_on_ctd_ink: "Proof used on cylinder-to-cylinder ink"
    blade_mfg: "Blade manufacturer"
    cylinder_division: "Production division"
    paper_type: "Type of paper used"
    ink_type: "Ink formulation"
    direct_steam: "Direct steam application (yes/no)"
    solvent_type: "Solvent used"
    type_on_cylinder: "Text or image type on cylinder"
    press_type: "Type of printing press"
    press: "Specific press used"
    unit_number: "Unit number in production"
    cylinder_size: "Size of cylinder"
    paper_mill_location: "Location of the paper mill"
    plating_tank: "Tank used for plating"
    proof_cut: "Proof cut type"
    viscosity: "Viscosity of ink"
    caliper: "Caliper (thickness) of paper"
    ink_temperature: "Temperature of ink"
    humifity: "Humidity level during process"
    roughness: "Surface roughness of substrate"
    blade_pressure: "Blade pressure setting"
    varnish_pct: "Percentage of varnish"
    press_speed: "Speed of the printing press"
    ink_pct: "Percentage composition of ink"
    solvent_pct: "Percentage composition of solvent"
    ESA_Voltage: "Electrostatic assist voltage"
    ESA_Amperage: "Electrostatic assist amperage"
    wax: "Wax content"
    hardener: "Hardener content"
    roller_durometer: "Hardness of roller"
    current_density: "Current density applied"
    anode_space_ratio: "Ratio of spacing between anodes"
    chrome_content: "Chromium content"
    band_type: "Target quality label of the band"
0944_SensorDataResource:
  dataset_description: "Turbine Sensor Dataset: Sensor readings for turbine diagnostics including temperature, speed, and pressure."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0944_SensorDataResource.csv"
  feature_descriptions:
    POWER_TURBINE_ROTOR1_SPEED: "Speed of rotor 1"
    POWER_TURBINE_ROTOR2_SPEED: "Speed of rotor 2"
    GAS_GENERATOR_ROTOR1_SPEED: "Speed of gas generator rotor 1"
    GAS_GENERATOR_ROTOR2_SPEED: "Speed of gas generator rotor 2"
    POWER_TURBINE_EXHAUST1_TEMPERATURE: "Temperature at exhaust point 1"
    POWER_TURBINE_EXHAUST2_TEMPERATURE: "Temperature at exhaust point 2"
    POWER_TURBINE_EXHAUST3_TEMPERATURE: "Temperature at exhaust point 3"
    POWER_TURBINE_EXHAUST4_TEMPERATURE: "Temperature at exhaust point 4"
    POWER_TURBINE_EXHAUST5_TEMPERATURE: "Temperature at exhaust point 5"
    POWER_TURBINE_EXHAUST6_TEMPERATURE: "Temperature at exhaust point 6"
    G_NDE_Direct: "Vibration reading at NDE side"
    G_DE_Vib_X_Direct: "X-direction vibration at DE side"
    TURBINE_INLET_PRESSURE_P48: "Inlet pressure at P48"
    COMPRESSOR_INLET_TOTAL_PRESSURE: "Compressor inlet total pressure"
    AMBIENT_TEMPERATURE: "Ambient temperature during operation"
    AXIAL_COMPRESSOR_INLET2_TEMPERATURE: "Axial compressor inlet 2 temperature"
    AXIAL_COMPRESSOR_INLET1_TEMPERATURE: "Axial compressor inlet 1 temperature"
    MINERAL_OIL_TANK_TEMPERATURE: "Temperature of mineral oil tank"
    SYNTHETIC_OIL_TANK_TEMPERATURE: "Temperature of synthetic oil tank"
    PT_B_OB_BRG_Temp: "Temperature at PT B OB bearing"
    PT_B_IB_BRG_Temp: "Temperature at PT B IB bearing"
    PT_C_IB_THR_BRG_Temp: "Temperature at PT C IB thrust bearing"
    PT_B_OBTHR_BRG_Temp: "Temperature at PT B OB thrust bearing"
    GENERATOR_ACTIVE_POWER: "Power output from the generator"
    GRID_VOLTAGE: "Grid voltage reading"
    TURBINE_RUNNING_STATUS: "Turbine operational status"
0944_stock_fardamento02:
  dataset_description: "Stock Dataset: Inventory tracking with timestamps and demand."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0944_stock_fardamento02.csv"
  feature_descriptions:
    Material: "Material type"
    Dia: "Day of inventory entry"
    pp: "Purchase price or unit ID"
    TEMP: "Temperature of storage or context"
    adm: "Administrator or approval code"
    Dem: "Demand quantity"
    qts: "Quantity in stock"

0948_COMET_MC_SAMPLE:
  dataset_description: "COMET MC Sample: Simulated particle detector events."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0948_COMET_MC_SAMPLE.csv"
  feature_descriptions:
    event_id: "Unique identifier for the event"
    wire_id: "ID of the wire hit in detector"
    energy_deposit: "Energy deposited in MeV"
    relative_time: "Time relative to the event trigger"
    label: "Class label for type of event"

0948_Ishwar:
  dataset_description: "Industrial Pump Sensor Dataset: Predictive maintenance features from a hydraulic system."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0948_Ishwar.csv"
  feature_descriptions:
    ps1: "Pump sensor 1 pressure"
    ps2: "Pump sensor 2 pressure"
    ps3: "Pump sensor 3 pressure"
    ps4: "Pump sensor 4 pressure"
    ps5: "Pump sensor 5 pressure"
    ps6: "Pump sensor 6 pressure"
    ts1: "Temperature sensor 1"
    ts2: "Temperature sensor 2"
    ts3: "Temperature sensor 3"
    ts4: "Temperature sensor 4"
    fs1: "Flow sensor 1"
    fs2: "Flow sensor 2"
    eps1: "Electrical power sensor 1"
    vs1: "Vibration sensor 1"
    se: "Sensor error signal"
    ce: "Control element reading"
    cp: "Control parameter"
    cooler_condition: "Condition of cooler component"
    valve_condition: "Condition of valve"
    internal_pump_leakage: "Leakage measure in pump"
    hydraulic_accumulator: "Reading from hydraulic accumulator"
    stable: "System stability status"

0950_breast-cancer-dropped-missing-attributes-values:
  dataset_description: "Breast Cancer Dataset (Cleaned): Diagnosis data with no missing attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0950_breast-cancer-dropped-missing-attributes-values.csv"
  feature_descriptions:
    age: "Age group"
    menopause: "Menopause status"
    tumor-size: "Tumor size"
    inv-nodes: "Involved lymph nodes"
    node-caps: "Node capsule presence"
    deg-malig: "Degree of malignancy"
    breast: "Breast side affected"
    breast-quad: "Breast quadrant"
    irradiat: "Radiation therapy status"
    Class: "Diagnosis class"
0951_higgs:
  dataset_description: "Higgs Boson Dataset: High-energy physics features for signal classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0951_higgs.csv"
  feature_descriptions:
    lepton_pT: "Transverse momentum of the lepton"
    lepton_eta: "Pseudorapidity of the lepton"
    lepton_phi: "Azimuthal angle of the lepton"
    missing_energy_magnitude: "Missing transverse energy magnitude"
    missing_energy_phi: "Azimuthal angle of missing energy"
    jet1pt: "Transverse momentum of jet 1"
    jet1eta: "Pseudorapidity of jet 1"
    jet1phi: "Azimuthal angle of jet 1"
    jet1b-tag: "b-tag of jet 1 (1 if b-jet, 0 otherwise)"
    jet2pt: "Transverse momentum of jet 2"
    jet2eta: "Pseudorapidity of jet 2"
    jet2phi: "Azimuthal angle of jet 2"
    jet2b-tag: "b-tag of jet 2"
    jet3pt: "Transverse momentum of jet 3"
    jet3eta: "Pseudorapidity of jet 3"
    jet3phi: "Azimuthal angle of jet 3"
    jet3b-tag: "b-tag of jet 3"
    jet4pt: "Transverse momentum of jet 4"
    jet4eta: "Pseudorapidity of jet 4"
    jet4phi: "Azimuthal angle of jet 4"
    jet4b-tag: "b-tag of jet 4"
    m_jj: "Invariant mass of the two leading jets"
    m_jjj: "Invariant mass of the three leading jets"
    m_lv: "Invariant mass of lepton and neutrino"
    m_jlv: "Invariant mass of lepton, neutrino, and leading jet"
    m_bb: "Invariant mass of two b-tagged jets"
    m_wbb: "Invariant mass of W boson and two b-jets"
    m_wwbb: "Invariant mass of two W bosons and two b-jets"
    class: "Signal (1) or background (0)"

0964_BNG(credit-g):
  dataset_description: "German Credit Data: Attributes for credit risk classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0964_BNG(credit-g).csv"
  feature_descriptions:
    checking_status: "Status of checking account"
    duration: "Credit duration in months"
    credit_history: "Credit history status"
    purpose: "Purpose of the loan"
    credit_amount: "Loan amount"
    savings_status: "Savings account/bonds status"
    employment: "Employment duration"
    installment_commitment: "Installment rate percentage"
    personal_status: "Personal status and sex"
    other_parties: "Other guarantors"
    residence_since: "Years at current residence"
    property_magnitude: "Type of property"
    age: "Age in years"
    other_payment_plans: "Other installment plans"
    housing: "Housing situation"
    existing_credits: "Number of existing credits"
    job: "Job type"
    num_dependents: "Number of dependents"
    own_telephone: "Owns telephone"
    foreign_worker: "Is foreign worker"
    class: "Credit risk (1 good, 2 bad)"

0965_BNG(spambase):
  dataset_description: "Spambase Dataset: Word and character frequencies in emails for spam detection."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0965_BNG(spambase).csv"
  feature_descriptions:
    word_freq_make: "Frequency of word 'make'"
    word_freq_address: "Frequency of word 'address'"
    word_freq_all: "Frequency of word 'all'"
    word_freq_3d: "Frequency of word '3d'"
    word_freq_our: "Frequency of word 'our'"
    word_freq_over: "Frequency of word 'over'"
    word_freq_remove: "Frequency of word 'remove'"
    word_freq_internet: "Frequency of word 'internet'"
    word_freq_order: "Frequency of word 'order'"
    word_freq_mail: "Frequency of word 'mail'"
    word_freq_receive: "Frequency of word 'receive'"
    word_freq_will: "Frequency of word 'will'"
    word_freq_people: "Frequency of word 'people'"
    word_freq_report: "Frequency of word 'report'"
    word_freq_addresses: "Frequency of word 'addresses'"
    word_freq_free: "Frequency of word 'free'"
    word_freq_business: "Frequency of word 'business'"
    word_freq_email: "Frequency of word 'email'"
    word_freq_you: "Frequency of word 'you'"
    word_freq_credit: "Frequency of word 'credit'"
    word_freq_your: "Frequency of word 'your'"
    word_freq_font: "Frequency of word 'font'"
    word_freq_000: "Frequency of number '000'"
    word_freq_money: "Frequency of word 'money'"
    word_freq_hp: "Frequency of word 'hp'"
    word_freq_hpl: "Frequency of word 'hpl'"
    word_freq_george: "Frequency of word 'george'"
    word_freq_650: "Frequency of number '650'"
    word_freq_lab: "Frequency of word 'lab'"
    word_freq_labs: "Frequency of word 'labs'"
    word_freq_telnet: "Frequency of word 'telnet'"
    word_freq_857: "Frequency of number '857'"
    word_freq_data: "Frequency of word 'data'"
    word_freq_415: "Frequency of number '415'"
    word_freq_85: "Frequency of number '85'"
    word_freq_technology: "Frequency of word 'technology'"
    word_freq_1999: "Frequency of year '1999'"
    word_freq_parts: "Frequency of word 'parts'"
    word_freq_pm: "Frequency of abbreviation 'pm'"
    word_freq_direct: "Frequency of word 'direct'"
    word_freq_cs: "Frequency of word 'cs'"
    word_freq_meeting: "Frequency of word 'meeting'"
    word_freq_original: "Frequency of word 'original'"
    word_freq_project: "Frequency of word 'project'"
    word_freq_re: "Frequency of abbreviation 're'"
    word_freq_edu: "Frequency of domain '.edu'"
    word_freq_table: "Frequency of word 'table'"
    word_freq_conference: "Frequency of word 'conference'"
    char_freq_semicolon: "Frequency of character ';'"
    char_freq_leftroundbracket: "Frequency of character '('"
    char_freq_leftbracket: "Frequency of character '['"
    char_freq_exclamation: "Frequency of character '!'"
    char_freq_dolarsign: "Frequency of character '$'"
    char_freq_doublequotes: "Frequency of character '\"'"
    capital_run_length_average: "Average length of uninterrupted capital letters"
    capital_run_length_longest: "Longest capital letter sequence"
    capital_run_length_total: "Total number of capital letters"
    class: "Spam label (1 spam, 0 not spam)"
0968_BNG(segment):
  dataset_description: "Image Segmentation Dataset: Region-based image metrics for segment classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0968_BNG(segment).csv"
  feature_descriptions:
    region-centroid-col: "X-coordinate of region centroid"
    region-centroid-row: "Y-coordinate of region centroid"
    region-pixel-count: "Total pixels in the region"
    short-line-density-5: "Density of short lines with length 5"
    short-line-density-2: "Density of short lines with length 2"
    vedge-mean: "Mean vertical edge strength"
    vegde-sd: "Standard deviation of vertical edge strength"
    hedge-mean: "Mean horizontal edge strength"
    hedge-sd: "Standard deviation of horizontal edge strength"
    intensity-mean: "Average intensity of region"
    rawred-mean: "Average red value"
    rawblue-mean: "Average blue value"
    rawgreen-mean: "Average green value"
    exred-mean: "Excess red value"
    exblue-mean: "Excess blue value"
    exgreen-mean: "Excess green value"
    value-mean: "Average value (brightness)"
    saturation-mean: "Average saturation"
    hue-mean: "Average hue"
    class: "Segment label"

0968_iris:
  dataset_description: "Iris Dataset: Measurements of iris flowers for species classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0968_iris.csv"
  feature_descriptions:
    sepallength: "Length of the sepal (cm)"
    sepalwidth: "Width of the sepal (cm)"
    petallength: "Length of the petal (cm)"
    petalwidth: "Width of the petal (cm)"
    class: "Iris species"

0969_BNG(anneal):
  dataset_description: "Steel Annealing Dataset: Process and material specs for quality prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0969_BNG(anneal).csv"
  feature_descriptions:
    family: "Annealing process family"
    product-type: "Type of product"
    steel: "Steel type code"
    carbon: "Carbon content level"
    hardness: "Hardness rating"
    temper_rolling: "Temper rolling applied (yes/no)"
    condition: "Steel condition"
    formability: "Formability level"
    strength: "Strength rating"
    non-ageing: "Non-ageing status"
    surface-finish: "Surface finish type"
    surface-quality: "Surface quality rating"
    enamelability: "Enamelability rating"
    bc: "BC presence"
    bf: "BF presence"
    bt: "BT presence"
    bw_me: "BW presence"
    bl: "BL presence"
    m: "Material property M"
    chrom: "Chromium content"
    phos: "Phosphorus content"
    cbond: "Chemical bonding strength"
    marvi: "Marvi property"
    exptl: "Experimental data flag"
    ferro: "Ferromagnetic property"
    corr: "Corrosion resistance"
    blue_bright_varn_clean: "Surface finish detail"
    lustre: "Lustre rating"
    jurofm: "Jurisdiction/forming method"
    s: "Sulfur content"
    p: "Phosphorus content"
    shape: "Shape classification"
    thick: "Thickness (mm)"
    width: "Width (mm)"
    len: "Length (mm)"
    oil: "Oil coating"
    bore: "Bore presence"
    packing: "Packing type"
    class: "Final product quality class"

0969_dataset_time_8:
  dataset_description: "Protein Localization Dataset: Amino acid sequence features for localization prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0969_dataset_time_8.csv"
  feature_descriptions:
    mcg: "McGeoch's method signal sequence"
    gvh: "von Heijne signal sequence score"
    lip: "Presence of lipoprotein signal"
    chg: "Charge of the N-terminus"
    aac: "Average hydrophobicity"
    alm1: "Score of Alanine-based model 1"
    alm2: "Score of Alanine-based model 2"
    class: "Localization class"

0970_SpeedDating:
  dataset_description: "Speed Dating Dataset: Preferences, self-perceptions, and outcomes from speed dating events."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0970_SpeedDating.csv"
  feature_descriptions:
    has_null: "Indicates if row contains nulls"
    wave: "Wave number of event"
    gender: "Participant's gender"
    age: "Age of participant"
    age_o: "Age of partner"
    d_age: "Difference in age"
    d_d_age: "Deviation in age difference"
    race: "Race of participant"
    race_o: "Race of partner"
    samerace: "Whether race matched"
    importance_same_race: "Importance of racial match"
    importance_same_religion: "Importance of religious match"
    d_importance_same_race: "Difference in racial match importance"
    d_importance_same_religion: "Difference in religion importance"
    field: "Field of study or profession"
    pref_o_attractive: "Importance placed by partner on attractiveness"
    pref_o_sincere: "Importance placed by partner on sincerity"
    pref_o_intelligence: "Importance placed by partner on intelligence"
    pref_o_funny: "Importance placed by partner on humor"
    pref_o_ambitious: "Importance placed by partner on ambition"
    pref_o_shared_interests: "Importance placed by partner on interests"
    attractive: "Self-rated attractiveness"
    sincere: "Self-rated sincerity"
    intelligence: "Self-rated intelligence"
    funny: "Self-rated humor"
    ambition: "Self-rated ambition"
    like: "How much they liked the partner"
    guess_prob_liked: "Guess probability of being liked"
    match: "Whether a mutual match occurred"
0975_MY_DB:
  dataset_description: "Smart Home Usage Dataset: Environmental and user action data for appliance control."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0975_MY_DB.csv"
  feature_descriptions:
    timeoftheday: "Time of the day (morning/afternoon/evening/night)"
    dayoftheweek: "Day of the week"
    tv(on/off): "TV usage status"
    weather: "Weather condition (e.g., sunny, rainy)"
    temperature: "Room temperature (°C)"
    appliance: "Appliance being used"
    actiontaken: "Action taken on appliance (on/off)"

0984_abalone:
  dataset_description: "Abalone Dataset: Physical measurements to predict age (via number of rings)."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0984_abalone.csv"
  feature_descriptions:
    Sex: "Sex of abalone (M, F, I)"
    Length: "Longest shell measurement (mm)"
    Diameter: "Perpendicular to length (mm)"
    Height: "Height with meat in shell (mm)"
    Whole_weight: "Whole abalone weight (g)"
    Shucked_weight: "Weight of meat (g)"
    Viscera_weight: "Gut weight (g)"
    Shell_weight: "Shell weight (g)"
    Class_number_of_rings: "Age as number of rings"

0984_car-evaluation:
  dataset_description: "Car Evaluation Dataset: Car specs for acceptability classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0984_car-evaluation.csv"
  feature_descriptions:
    buying_price_vhigh: "Buying price: very high"
    buying_price_high: "Buying price: high"
    buying_price_med: "Buying price: medium"
    buying_price_low: "Buying price: low"
    maintenance_price_vhigh: "Maintenance cost: very high"
    maintenance_price_high: "Maintenance cost: high"
    maintenance_price_med: "Maintenance cost: medium"
    maintenance_price_low: "Maintenance cost: low"
    doors_2: "Two-door vehicle"
    doors_3: "Three-door vehicle"
    doors_4: "Four-door vehicle"
    doors_5more: "Five or more doors"
    persons_2: "Capacity for 2 persons"
    persons_4: "Capacity for 4 persons"
    persons_more: "Capacity for more than 4 persons"
    luggage_boot_size_small: "Small luggage boot"
    luggage_boot_size_med: "Medium luggage boot"
    luggage_boot_size_big: "Large luggage boot"
    safety_low: "Low safety rating"
    safety_med: "Medium safety rating"
    safety_high: "High safety rating"
    class: "Car acceptability class"

0988_test_data:
  dataset_description: "Immune Marker Dataset: Gene expression markers for health status classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0988_test_data.csv"
  feature_descriptions:
    status: "Health status (e.g., diseased or normal)"
    cxcl9: "CXCL9 expression"
    cxcl11: "CXCL11 expression"
    ccl2: "CCL2 expression"
    ifng: "Interferon gamma expression"
    prf1: "Perforin expression"
    clu: "Clusterin expression"
    ccl3: "CCL3 expression"
    cxcl10: "CXCL10 expression"
    cd3e: "CD3E gene expression"
    gzmb: "Granzyme B expression"
    kim.1: "KIM-1 expression"

0991_fars:
  dataset_description: "Fatal Accident Reporting System (FARS): Details of persons in fatal crashes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/0991_fars.csv"
  feature_descriptions:
    CASE_STATE: "State where accident occurred"
    AGE: "Age of individual"
    SEX: "Sex of individual"
    PERSON_TYPE: "Role in accident (driver/passenger/pedestrian)"
    SEATING_POSITION: "Vehicle seating position"
    RESTRAINT_SYSTEM-USE: "Restraint system used"
    AIR_BAG_AVAILABILITY/DEPLOYMENT: "Airbag deployment status"
    EJECTION: "Ejection from vehicle"
    EJECTION_PATH: "Ejection path"
    EXTRICATION: "Extrication required (yes/no)"
    NON_MOTORIST_LOCATION: "Location of non-motorist"
    POLICE_REPORTED_ALCOHOL_INVOLVEMENT: "Alcohol involvement (police)"
    METHOD_ALCOHOL_DETERMINATION: "Method to determine alcohol use"
    ALCOHOL_TEST_TYPE: "Type of alcohol test"
    ALCOHOL_TEST_RESULT: "Alcohol test result"
    POLICE-REPORTED_DRUG_INVOLVEMENT: "Drug involvement (police)"
    METHOD_OF_DRUG_DETERMINATION: "Method to determine drug use"
    DRUG_TEST_TYPE: "Type of drug test"
    DRUG_TEST_RESULTS_(1_of_3): "First drug test result"
    DRUG_TEST_TYPE_(2_of_3): "Second drug test type"
    DRUG_TEST_RESULTS_(2_of_3): "Second drug test result"
    DRUG_TEST_TYPE_(3_of_3): "Third drug test type"
    DRUG_TEST_RESULTS_(3_of_3): "Third drug test result"
    HISPANIC_ORIGIN: "Hispanic origin (yes/no)"
    TAKEN_TO_HOSPITAL: "Whether taken to hospital"
    RELATED_FACTOR_(1)-PERSON_LEVEL: "Person-level contributing factor 1"
    RELATED_FACTOR_(2)-PERSON_LEVEL: "Person-level contributing factor 2"
    RELATED_FACTOR_(3)-PERSON_LEVEL: "Person-level contributing factor 3"
    RACE: "Race of individual"
    class: "Fatality indicator"

1001_wine-quality-red:
  dataset_description: "Red Wine Quality Dataset: Physicochemical properties for predicting wine quality score."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1001_wine-quality-red.csv"
  feature_descriptions:
    fixed_acidity: "Tartaric acid content"
    volatile_acidity: "Acetic acid content"
    citric_acid: "Citric acid content"
    residual_sugar: "Residual sugar (g/L)"
    chlorides: "Salt concentration"
    free_sulfur_dioxide: "Free SO2 (mg/L)"
    total_sulfur_dioxide: "Total SO2 (mg/L)"
    density: "Density of wine (g/cm³)"
    pH: "Acidity level (pH)"
    sulphates: "Potassium sulphate content"
    alcohol: "Alcohol percentage"
    class: "Quality rating (0–10)"
1003_cars1:
  dataset_description: "Auto MPG Dataset: Car specifications used for fuel efficiency and performance analysis."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1003_cars1.csv"
  feature_descriptions:
    MPG: "Miles per gallon (fuel efficiency)"
    cylinders: "Number of cylinders"
    cubicInches: "Engine displacement in cubic inches"
    horsepower: "Horsepower of the engine"
    weightLbs: "Vehicle weight in pounds"
    time-to-sixty: "Time taken to accelerate from 0–60 mph (seconds)"
    year: "Model year of the car"
    class: "Vehicle category or fuel efficiency class"

1004_churn:
  dataset_description: "Telecom Customer Churn Dataset: Service and usage metrics used to predict customer churn."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1004_churn.csv"
  feature_descriptions:
    state: "Customer's US state"
    account_length: "Length of customer account (days)"
    area_code: "Telephone area code"
    phone_number: "Customer's phone number (anonymized)"
    international_plan: "Whether customer has international plan"
    voice_mail_plan: "Whether customer has voicemail plan"
    number_vmail_messages: "Number of voicemail messages"
    total_day_minutes: "Total minutes used during day"
    total_day_calls: "Total number of calls during day"
    total_day_charge: "Charges incurred during day usage"
    total_eve_minutes: "Evening call duration in minutes"
    total_eve_calls: "Evening call count"
    total_eve_charge: "Charges incurred for evening usage"
    total_night_minutes: "Night call duration in minutes"
    total_night_calls: "Night call count"
    total_night_charge: "Night usage charges"
    total_intl_minutes: "International minutes used"
    total_intl_calls: "Number of international calls"
    total_intl_charge: "Charges for international calls"
    number_customer_service_calls: "Customer service call frequency"
    class: "Churn label (1 = churn, 0 = no churn)"

1005_solar-flare:
  dataset_description: "Solar Flare Dataset: Sunspot features and flare activity classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1005_solar-flare.csv"
  feature_descriptions:
    class_code: "Sunspot classification"
    largest_spot_code: "Largest spot size code"
    spot_dist_code: "Spot distribution code"
    Activity: "Solar activity level"
    Evolution: "Evolution of spot group"
    Previous_24_hour_code: "Previous 24-hour activity"
    Historically-complex: "Whether historically complex"
    become_complex: "Tendency to become complex"
    Area: "Sunspot group area"
    Area_of_the_largest_spot: "Area of largest spot"
    class: "Flare activity class"

1006_Titanic:
  dataset_description: "Titanic Dataset: Passenger demographics and survival status."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1006_Titanic.csv"
  feature_descriptions:
    Class: "Passenger class (1st, 2nd, 3rd)"
    Age: "Age of passenger"
    Sex: "Sex of passenger"
    class: "Survival status (1 = survived, 0 = did not survive)"

1007_tokyo1:
  dataset_description: "Tokyo Server Monitoring Dataset: CPU, disk, memory, and I/O metrics."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1007_tokyo1.csv"
  feature_descriptions:
    cpu_avg_user: "Average CPU user time (%)"
    cpu_avg_sys: "Average CPU system time (%)"
    cpu_avg_busy: "Average busy CPU time (%)"
    cpu_avg_wait: "Average CPU wait time (%)"
    cpu_avg_idle: "Average idle CPU time (%)"
    cpu_avg_waste: "Wasted CPU time (%)"
    cpu_max_user: "Max CPU user time (%)"
    cpu_max_sys: "Max CPU system time (%)"
    cpu_max_busy: "Max CPU busy time (%)"
    cpu_max_wait: "Max CPU wait time (%)"
    cpu_max_idle: "Max idle time (%)"
    cpu_max_waste: "Max waste time (%)"
    cpu_frac_busy: "Fraction of busy CPU time"
    io_iget: "I/O get requests"
    io_bread: "Blocks read"
    io_bwrite: "Blocks written"
    io_lread: "Logical reads"
    io_lwrite: "Logical writes"
    io_phread: "Physical reads"
    io_phwrite: "Physical writes"
    io_wcancel: "Cancelled writes"
    io_namei: "Namei system calls"
    io_dirblk: "Directory blocks"
    disk_avg_active: "Average disk active time"
    disk_max_active: "Max disk active time"
    disk_frac_active: "Fraction of active disk time"
    disk_avg_read: "Average disk read rate"
    disk_avg_write: "Average disk write rate"
    disk_avg_total: "Total disk I/O average"
    disk_max_read: "Max disk read rate"
    disk_max_write: "Max disk write rate"
    disk_max_total: "Max total disk I/O"
    disk_frac_busy: "Fraction of busy disk time"
    net_avg_read: "Average network read"
    net_avg_write: "Average network write"
    net_avg_total: "Total average network I/O"
    net_max_read: "Max network read"
    net_max_write: "Max network write"
    net_max_total: "Max total network I/O"
    net_frac_busy: "Fraction of network busy time"
    mem_swap: "Swap memory usage"
    mem_fault: "Memory faults"
    mem_tlbflush: "TLB flush count"
    syscall_total: "Total system calls"
    class: "System status class"
1009_allbp:
  dataset_description: "Thyroid Dataset (AllBP): Patient diagnostics for hypothyroid/hyperthyroid classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1009_allbp.csv"
  feature_descriptions:
    age: "Age of the patient"
    sex: "Sex of the patient"
    on_thyroxine: "Is patient taking thyroxine?"
    query_on_thyroxine: "Query if on thyroxine"
    on_antithyroid_medication: "Is patient on antithyroid medication?"
    sick: "Is patient currently sick?"
    pregnant: "Pregnancy status"
    thyroid_surgery: "History of thyroid surgery"
    I131_treatment: "Received I131 treatment?"
    query_hypothyroid: "Query for hypothyroidism"
    query_hyperthyroid: "Query for hyperthyroidism"
    lithium: "Taking lithium medication"
    goitre: "Presence of goitre"
    tumor: "History of tumors"
    hypopituitary: "Hypopituitary disorder"
    psych: "Psychiatric disorder"
    TSH_measured: "Whether TSH is measured"
    TSH: "TSH hormone level"
    T3_measured: "Whether T3 is measured"
    T3: "T3 hormone level"
    TT4_measured: "Whether total T4 is measured"
    TT4: "Total T4 level"
    T4U_measured: "Whether T4 uptake is measured"
    T4U: "T4 uptake level"
    FTI_measured: "Whether free thyroxine index is measured"
    FTI: "Free thyroxine index"
    TBG_measured: "Whether thyroxine-binding globulin measured"
    TBG: "Thyroxine-binding globulin level"
    referral_source: "Referral source (e.g., physician)"
    class: "Thyroid class (e.g., normal, hypothyroid, hyperthyroid)"

1010_allrep:
  dataset_description: "Thyroid Dataset (AllRep): Diagnostic measurements and medication indicators for thyroid status."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1010_allrep.csv"
  feature_descriptions:
    age: "Age of the patient"
    sex: "Sex of the patient"
    on_thyroxine: "Patient is on thyroxine"
    query_on_thyroxine: "Query on thyroxine"
    on_antithyroid_medication: "Antithyroid medication status"
    sick: "Current sickness flag"
    pregnant: "Pregnancy status"
    thyroid_surgery: "History of thyroid surgery"
    I131_treatment: "I131 radioactive treatment"
    query_hypothyroid: "Query hypothyroid condition"
    query_hyperthyroid: "Query hyperthyroid condition"
    lithium: "Lithium medication status"
    goitre: "Goitre condition"
    tumor: "Tumor indicator"
    hypopituitary: "Hypopituitary condition"
    psych: "Psychiatric condition"
    TSH_measured: "TSH test taken"
    TSH: "TSH value"
    T3_measured: "T3 test taken"
    T3: "T3 value"
    TT4_measured: "Total T4 test taken"
    TT4: "Total T4 value"
    T4U_measured: "T4 uptake test taken"
    T4U: "T4U value"
    FTI_measured: "FTI test taken"
    FTI: "Free Thyroxine Index"
    TBG_measured: "TBG test taken"
    TBG: "Thyroxine Binding Globulin value"
    referral_source: "Referral source"
    class: "Target class for thyroid status"

1011_cleve:
  dataset_description: "Cleveland Heart Disease Dataset: Clinical features for predicting heart disease."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1011_cleve.csv"
  feature_descriptions:
    Age: "Age of the patient"
    Sex: "Sex (1 = male, 0 = female)"
    Chest_pain_type: "Type of chest pain"
    Trestbps: "Resting blood pressure"
    Cholesterol: "Serum cholesterol level"
    Fasting_blood_sugar_&lt;_120: "Fasting blood sugar > 120 mg/dl (1 = true; 0 = false)"
    Resting_ecg: "Resting electrocardiographic results"
    Max_heart_rate: "Maximum heart rate achieved"
    Exercise_induced_angina: "Exercise-induced angina (1 = yes, 0 = no)"
    Oldpeak: "ST depression induced by exercise"
    Slope: "Slope of the peak exercise ST segment"
    Number_of_vessels_colored: "Number of major vessels colored by fluoroscopy"
    Thal: "Thalassemia (3 = normal; 6 = fixed defect; 7 = reversible defect)"
    class: "Presence of heart disease (binary or ordinal)"

1013_dis:
  dataset_description: "Thyroid Dataset (DIS): Full diagnostic indicators for identifying thyroid dysfunction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1013_dis.csv"
  feature_descriptions:
    age: "Patient age"
    sex: "Patient sex"
    on_thyroxine: "Is patient currently on thyroxine?"
    query_on_thyroxine: "Query if on thyroxine"
    on_antithyroid_medication: "On antithyroid meds"
    sick: "Current sickness"
    pregnant: "Pregnancy status"
    thyroid_surgery: "History of thyroid surgery"
    I131_treatment: "Has undergone I131 treatment"
    query_hypothyroid: "Query about hypothyroidism"
    query_hyperthyroid: "Query about hyperthyroidism"
    lithium: "Taking lithium medication"
    goitre: "Presence of goitre"
    tumor: "History of tumors"
    hypopituitary: "Hypopituitary disorder"
    psych: "Psychological disorder present"
    TSH_measured: "Whether TSH test was done"
    TSH: "TSH level"
    T3_measured: "Whether T3 was measured"
    T3: "T3 hormone level"
    TT4_measured: "Total T4 measured"
    TT4: "Total T4 hormone level"
    T4U_measured: "Thyroxine Uptake measured"
    T4U: "T4 Uptake level"
    FTI_measured: "Free Thyroxine Index measured"
    FTI: "Free Thyroxine Index"
    TBG_measured: "Thyroxine Binding Globulin measured"
    TBG: "TBG hormone level"
    referral_source: "Source of referral"
    class: "Thyroid status class"
1014_pathogen_survey_dataset:
  dataset_description: "Pathogen Survey: Field measurements and environmental variables for pathogen presence survey."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1014_pathogen_survey_dataset.csv"
  feature_descriptions:
    x: "X-coordinate (location)"
    y: "Y-coordinate (location)"
    fus01: "Presence of Fusarium pathogen 01"
    diplo01: "Presence of Diplodia pathogen 01"
    temp: "Ambient temperature (°C)"
    p_sum: "Total precipitation"
    r_sum: "Radiation sum"
    elevation: "Elevation (m)"
    slope: "Slope angle (degrees)"
    hail: "Historical hail occurrence"
    hail_new: "Recent hail event indicator"
    age: "Age of sample or crop (years)"
    ph: "Soil pH level"
    lithology: "Soil lithology type"
    soil: "Soil texture classification"
    year: "Sampling year"
1017_HappinessRank_2015:
  dataset_description: "World Happiness Report 2015: Country-level happiness scores and contributing factors."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1017_HappinessRank_2015.csv"
  feature_descriptions:
    Region: "Geographical region"
    Happiness Rank: "Country ranking by happiness"
    Standard Error: "Standard error of happiness score"
    Economy (GDP per Capita): "GDP per capita contribution"
    Family: "Social support/family factor"
    Health (Life Expectancy): "Health/life expectancy contribution"
    Freedom: "Freedom to make life choices"
    Trust (Government Corruption): "Trust/corruption perception"
    Generosity: "Generosity index"
    Dystopia Residual: "Residual happiness gap from dystopia"
    Happiness Score: "Overall happiness score"
1020_Run_or_walk_information:
  dataset_description: "Wearable Activity Tracker: Tri-axial acceleration and gyroscope readings labeled as running or walking."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1020_Run_or_walk_information.csv"
  feature_descriptions:
    acceleration_x: "Accelerometer reading on X-axis"
    acceleration_y: "Accelerometer reading on Y-axis"
    acceleration_z: "Accelerometer reading on Z-axis"
    gyro_x: "Gyroscope reading on X-axis"
    gyro_y: "Gyroscope reading on Y-axis"
    gyro_z: "Gyroscope reading on Z-axis"
    activity: "Activity type label ('run' or 'walk')"
1023_car:
  dataset_description: "Car Evaluation Dataset: Vehicle attributes for acceptability classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1023_car.csv"
  feature_descriptions:
    buying: "Buying cost category"
    maint: "Maintenance cost category"
    doors: "Number of doors"
    persons: "Capacity (persons)"
    lug_boot: "Luggage boot size"
    safety: "Safety rating"
    class: "Acceptability classification"
1028_wilt:
  dataset_description: "Wheat Leaf Wilt Detection: Hyperspectral and color features with wilt status."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1028_wilt.csv"
  feature_descriptions:
    GLCM_Pan: "Gray-level co-occurrence matrix pan brightness"
    Mean_G: "Mean green channel value"
    Mean_R: "Mean red channel value"
    Mean_NIR: "Mean near-infrared channel value"
    SD_Plan: "Standard deviation of pan brightness"
    class: "Wilt presence label"
1030_WorkersCompensation:
  dataset_description: "Workers' Compensation: Injury and claim details including timing and cost."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1030_WorkersCompensation.csv"
  feature_descriptions:
    DateTimeOfAccident: "Date/time of accident"
    DateReported: "Date case was reported"
    Age: "Employee age"
    Gender: "Employee gender"
    MaritalStatus: "Marital status"
    DependentChildren: "Number of dependent children"
    DependentsOther: "Additional dependents"
    WeeklyPay: "Weekly wage"
    PartTimeFullTime: "Part/full-time status"
    HoursWorkedPerWeek: "Work hours per week"
    DaysWorkedPerWeek: "Work days per week"
    ClaimDescription: "Brief narrative of claim"
    InitialCaseEstimate: "Initial claim cost estimate"
    UltimateIncurredClaimCost: "Final incurred cost"
1033_climate-model-simulation-crashes:
  dataset_description: "Climate Model Simulation Crashes: Key parameters and crash outcome flag."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1033_climate-model-simulation-crashes.csv"
  feature_descriptions:
    vconst_2: "Model parameter vconst_2"
    vconst_3: "Model parameter vconst_3"
    vconst_4: "Model parameter vconst_4"
    vconst_5: "Model parameter vconst_5"
    vconst_7: "Model parameter vconst_7"
    ah_corr: "Correction coefficient for humidity"
    ah_bolus: "Bolus humidity parameter"
    slm_corr: "Surface layer moisture correction"
    efficiency_factor: "Model efficiency parameter"
    tidal_mix_max: "Maximum tidal mixing parameter"
    vertical_decay_scale: "Vertical decay scaling factor"
    convect_corr: "Convection correction"
    bckgrnd_vdc1: "Background vertical diffusivity coefficient"
    bckgrnd_vdc_ban: "Alternate VDC background"
    bckgrnd_vdc_eq: "Equatorial VDC background"
    bckgrnd_vdc_psim: "Polar stratification VDC background"
    Prandtl: "Prandtl number"
    outcome: "Crash indicator (1 = crash, 0 = no crash)"
1036_jungle_chess_2pcs_endgame_panther_lion:
  dataset_description: "Jungle Chess Endgame (Panther vs Lion): Strategic features extracted from 2-piece endgame configurations in the Jungle Chess board game, used for classifying winning scenarios."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1036_jungle_chess_2pcs_endgame_panther_lion.csv"

  feature_descriptions:
    white_piece0_strength: "Strength ranking of the white piece (panther)"
    white_piece0_file: "File (column) on the board where white piece is located"
    white_piece0_rank: "Rank (row) on the board where white piece is located"
    white_piece0_advanced: "How far advanced the white piece is toward the opponent's den"
    white_piece0_distanceto_white_den: "Number of moves to reach its own den"
    white_piece0_distanceto_black_den: "Number of moves to reach the opponent's den"
    white_piece0_unopposedto_black_den_length: "Distance to black den assuming no opposition"
    white_piece0_unopposedto_black_den_shortest: "Shortest path to black den without opponent blocking"
    white_piece0_movesto_white_den: "Number of legal moves to its own den"
    white_piece0_movesto_black_den: "Number of legal moves to opponent's den"
    white_piece0_in_trap: "Is the white piece currently in a trap (1 if yes, 0 otherwise)"
    white_piece0_in_water: "Is the white piece in a water square"
    white_piece0_can_cross: "Whether the white piece can legally cross the river"
    white_piece0_can_cross_shortest: "Whether the shortest path crosses the river"
    white_piece0_unopposed_to_bank: "Whether the white piece can reach river bank unopposed"
    white_piece0_distanceto_black_piece0: "Distance in steps to the black piece"
    white_piece0_distanceto_black_piece0_parity: "Parity (even/odd) of the path to black piece"
    white_piece0_nextto_black_piece0: "Is the white piece next to the black piece"

    black_piece0_strength: "Strength ranking of the black piece (lion)"
    black_piece0_file: "File on the board where black piece is located"
    black_piece0_rank: "Rank on the board where black piece is located"
    black_piece0_advanced: "How far the black piece has advanced toward the opponent's den"
    black_piece0_distanceto_white_den: "Distance to white's den"
    black_piece0_distanceto_black_den: "Distance to its own den"
    black_piece0_movesto_white_den: "Legal moves available toward white's den"
    black_piece0_movesto_black_den: "Legal moves toward its own den"
    black_piece0_unopposedto_white_den_length: "Length of unopposed path to white's den"
    black_piece0_unopposedto_white_den_shortest: "Shortest unopposed route to white's den"
    black_piece0_in_trap: "Is the black piece in a trap"
    black_piece0_in_water: "Is the black piece in water"
    black_piece0_can_cross: "Can the black piece cross water"
    black_piece0_can_cross_shortest: "Can it do so via shortest path"
    black_piece0_unopposed_to_bank: "Unopposed access to riverbank for black piece"
    black_piece0_at_d7: "Is black at specific square D7 (special tactical value)"
    black_piece0_distanceto_white_piece0: "Distance to the white piece"
    black_piece0_distanceto_white_piece0_parity: "Even or odd steps to white piece"
    black_piece0_nextto_white_piece0: "Is the black piece adjacent to the white piece"

    highest_strength: "The higher of the two piece strengths"
    closest_to_den: "Which piece is closer to any den (1 for white, -1 for black)"
    closest_to_den_diff: "Difference in distance to nearest den between the two pieces"
    fastest_to_den: "Which player can reach a den faster under optimal movement"
    fastest_to_den_diff: "Difference in minimum steps to den"
    white_unopposed_to_den: "Is white unopposed on the path to den"
    black_unopposed_to_den: "Is black unopposed on its path to den"
    white_unopposed_to_den_quick_detour: "Is white able to detour around opponent and reach den"
    black_unopposed_to_den_quick_detour: "Can black detour and still reach den unopposed"

    class: "Target class indicating outcome or position classification (e.g., win/loss/draw or score tier)"
1047_obesity-level-indicators:
  dataset_description: "Obesity Level Indicators Dataset: Health and lifestyle survey capturing physical metrics, dietary habits, and physical activity to classify individuals by obesity category."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1047_obesity-level-indicators.csv"

  feature_descriptions:
    Gender: "Biological sex of the individual (Male/Female)"
    Age: "Age of the person in years"
    Height: "Height of the person in meters"
    Weight: "Weight of the person in kilograms"
    family_history_with_overweight: "Whether the individual has a family history of overweight (yes/no)"
    FAVC: "Frequency of high caloric food consumption (yes/no)"
    FCVC: "Frequency of vegetable consumption on a scale (1 to 3)"
    NCP: "Number of meals consumed per day"
    CAEC: "Consumption of food between meals (categorical: never, sometimes, frequently, always)"
    SMOKE: "Smoking habit indicator (yes/no)"
    CH2O: "Daily water consumption in liters (scale from 1 to 3)"
    SCC: "If calories are monitored or counted (yes/no)"
    FAF: "Frequency of physical activity (in hours per week)"
    TUE: "Time spent using technology (hours per day)"
    CALC: "Frequency of alcohol consumption (categorical)"
    MTRANS: "Transportation method used most often (bike, public transport, car, etc.)"
    NObeyesdad: "Target label representing obesity level category (e.g., Normal Weight, Obesity Type I, etc.)"
1048_skillcraft1:
  dataset_description: "SkillCraft1 Performance Prediction: In-game StarCraft player telemetry including keystroke and strategy metrics used to predict skill league level."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1048_skillcraft1.csv"

  feature_descriptions:
    LeagueIndex: "Target class representing the player's skill tier or league"
    Age: "Player's age in years"
    HoursPerWeek: "Number of hours spent playing StarCraft weekly"
    TotalHours: "Cumulative hours played by the user"
    APM: "Actions Per Minute – a direct measure of speed and multitasking"
    SelectByHotkeys: "Fraction of unit selections made using hotkeys"
    AssignToHotkeys: "Frequency of assigning groups to hotkeys"
    UniqueHotkeys: "Number of unique hotkeys used"
    MinimapAttacks: "Number of attack commands issued using the minimap"
    MinimapRightClicks: "Minimap-based unit movement commands"
    NumberOfPACs: "Player Action Clusters – bursts of game actions"
    GapBetweenPACs: "Average time between bursts of actions"
    ActionLatency: "Average delay between issuing a command and game execution"
    ActionsInPAC: "Average number of actions within each PAC"
    TotalMapExplored: "Percentage of game map revealed during match"
    WorkersMade: "Count of economic worker units trained"
    UniqueUnitsMade: "Number of different unit types produced"
    ComplexUnitsMade: "Number of strategically advanced unit types produced"
    ComplexAbilitiesUsed: "Total usage of advanced or specialized unit abilities"
1049_alcohol-qcm-sensor:
  dataset_description: "QCM Alcohol Sensor Dataset: Sensor responses to different alcohols and their concentrations captured via Quartz Crystal Microbalance (QCM) technology."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1049_alcohol-qcm-sensor.csv"

  feature_descriptions:
    0.799_0.201: "Sensor output for alcohol mixture at 79.9% component A and 20.1% component B (measurement 1)"
    0.799_0.201.1: "Sensor output for the same concentration as above (replicate or secondary sensor)"
    0.700_0.300: "QCM reading for 70% A and 30% B mixture"
    0.700_0.300.1: "Secondary sensor or repeat of above mixture"
    0.600_0.400: "QCM signal for 60%-40% concentration blend"
    0.600_0.400.1: "Secondary reading for same ratio"
    0.501_0.499: "Near-equal mix of two alcohols (QCM output)"
    0.501_0.499.1: "Corresponding replicate value"
    0.400_0.600: "Sensor response for 40% A and 60% B"
    0.400_0.600.1: "Second sensor response for same mix"
    1-Octanol: "Label indicating the alcohol class 1-Octanol"
    1-Propanol: "Label indicating the alcohol class 1-Propanol"
    2-Butanol: "Label indicating the alcohol class 2-Butanol"
    2-propanol: "Label indicating the alcohol class 2-Propanol"
    1-isobutanol: "Label indicating the alcohol class 1-Isobutanol"
1053_gas-turbine-2011:
  dataset_description: "Gas Turbine Emissions Data (2011): Sensor data from a gas turbine used to predict emissions of CO and NOx under different operating conditions."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1053_gas-turbine-2011.csv"

  feature_descriptions:
    AT: "Ambient temperature in °C"
    AP: "Ambient pressure in mbar"
    AH: "Ambient humidity in %"
    AFDP: "Air filter difference pressure"
    GTEP: "Gas turbine energy produced (MW)"
    TIT: "Turbine inlet temperature in °C"
    TAT: "Turbine after temperature in °C"
    TEY: "Turbine energy yield"
    CDP: "Compressor discharge pressure"
    CO: "Carbon monoxide emission (ppm) — Target 1"
    NOX: "Nitric oxide emissions (ppm) — Target 2"
1063_mauna-loa-atmospheric-co2:
  dataset_description: "Mauna Loa Atmospheric CO2: Daily atmospheric CO2 concentrations and associated metadata collected from Mauna Loa Observatory."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1063_mauna-loa-atmospheric-co2.csv"

  feature_descriptions:
    year: "Year of observation"
    month: "Month of observation"
    day: "Day of observation"
    weight: "Weight assigned to the data point for statistical correction"
    flag: "Quality control flag"
    station: "Name or ID of the measurement station (likely Mauna Loa)"
    co2: "Atmospheric CO2 concentration in ppm"
1065_hungarian-chickenpox:
  dataset_description: "Hungarian Chickenpox Reporting: Weekly case counts per Hungarian county for disease surveillance."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1065_hungarian-chickenpox.csv"
  feature_descriptions:
    BUDAPEST: "Reported chickenpox cases in Budapest"
    BARANYA: "Reported cases in Baranya county"
    BACS: "Cases in Bács-Kiskun county"
    BEKES: "Cases in Békés county"
    BORSOD: "Cases in Borsod-Abaúj-Zemplén county"
    CSONGRAD: "Cases in Csongrád county"
    FEJER: "Cases in Fejér county"
    GYOR: "Cases in Győr-Moson-Sopron county"
    HAJDU: "Cases in Hajdú-Bihar county"
    HEVES: "Cases in Heves county"
    JASZ: "Cases in Jász-Nagykun-Szolnok county"
    KOMAROM: "Cases in Komárom-Esztergom county"
    NOGRAD: "Cases in Nógrád county"
    PEST: "Cases in Pest county"
    SOMOGY: "Cases in Somogy county"
    SZABOLCS: "Cases in Szabolcs-Szatmár-Bereg county"
    TOLNA: "Cases in Tolna county"
    VAS: "Cases in Vas county"
    VESZPREM: "Cases in Veszprém county"
    ZALA: "Cases in Zala county"
1067_Klaverjas2018:
  dataset_description: "Klaverjas 2018 Dataset: Representation of card combinations and their associated game outcomes in the Dutch card game Klaverjas."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1067_Klaverjas2018.csv"

  feature_descriptions:
    card_S_A: "Ace of Spades was drawn"
    card_S_10: "10 of Spades was drawn"
    card_S_K: "King of Spades was drawn"
    card_S_Q: "Queen of Spades was drawn"
    card_S_J: "Jack of Spades was drawn"
    card_S_9: "9 of Spades was drawn"
    card_S_8: "8 of Spades was drawn"
    card_S_7: "7 of Spades was drawn"
    card_H_A: "Ace of Hearts was drawn"
    card_H_10: "10 of Hearts was drawn"
    card_H_K: "King of Hearts was drawn"
    card_H_Q: "Queen of Hearts was drawn"
    card_H_J: "Jack of Hearts was drawn"
    card_H_9: "9 of Hearts was drawn"
    card_H_8: "8 of Hearts was drawn"
    card_H_7: "7 of Hearts was drawn"
    card_D_A: "Ace of Diamonds was drawn"
    card_D_10: "10 of Diamonds was drawn"
    card_D_K: "King of Diamonds was drawn"
    card_D_Q: "Queen of Diamonds was drawn"
    card_D_J: "Jack of Diamonds was drawn"
    card_D_9: "9 of Diamonds was drawn"
    card_D_8: "8 of Diamonds was drawn"
    card_D_7: "7 of Diamonds was drawn"
    card_C_A: "Ace of Clubs was drawn"
    card_C_10: "10 of Clubs was drawn"
    card_C_K: "King of Clubs was drawn"
    card_C_Q: "Queen of Clubs was drawn"
    card_C_J: "Jack of Clubs was drawn"
    card_C_9: "9 of Clubs was drawn"
    card_C_8: "8 of Clubs was drawn"
    card_C_7: "7 of Clubs was drawn"
    outcome: "Outcome of the round (e.g., win/loss or score)"
1083_Diabetes:
  dataset_description: "Pima Indian Diabetes Dataset: Medical diagnostic data to predict onset of diabetes in female patients."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1083_Diabetes.csv"

  feature_descriptions:
    preg: "Number of pregnancies"
    plas: "Plasma glucose concentration (glucose tolerance test)"
    pres: "Diastolic blood pressure (mm Hg)"
    skin: "Triceps skinfold thickness (mm)"
    insu: "2-Hour serum insulin (mu U/ml)"
    mass: "Body mass index (weight in kg/(height in m)^2)"
    pedi: "Diabetes pedigree function (heredity measure)"
    age: "Age in years"
    class: "Class label (1 = tested positive for diabetes, 0 = negative)"
1085_LottoMaster-144:
  dataset_description: "LottoMaster Statistics: Historical frequency and time-based features for lottery number prediction."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1085_LottoMaster-144.csv"

  feature_descriptions:
    count: "Times this number was picked"
    daysSinceLastPicked: "Days since number last appeared"
    avgDaysSinceLastPicked: "Average days between appearances"
    countsMean: "Mean of all counts"
    countsMedian: "Median of all counts"
    countsVariance: "Variance of counts"
    countsStdDev: "Standard deviation of counts"
    countsMeanDiff: "Difference between this count and the mean"
    countsMedianDiff: "Difference between this count and the median"
    countsVarianceDiff: "Difference between this count and the variance"
    countsStdDevDiff: "Difference between this count and std deviation"
    countsSND: "Standardized normal deviation of counts"
    daysMean: "Mean of all day intervals"
    daysMedian: "Median of all day intervals"
    daysVariance: "Variance in day intervals"
    daysMeanDiff: "Difference from mean interval"
    daysMedianDiff: "Difference from median interval"
    daysVarianceDiff: "Difference from variance"
    daysStdDevDiff: "Difference from standard deviation"
    daysSND: "Standardized normal deviation of days"
    daysStdDev: "Standard deviation of days"
    countsModeDiff: "Difference from mode of counts"
    daysModeDiff: "Difference from mode of days"
    percent: "Recent pick percentage for this number"
    avgPercent: "Average pick percentage"
    percentsMean: "Mean of all pick percentages"
    percentsMedian: "Median pick percentage"
    percentsVariance: "Variance of pick percentages"
    percentsMeanDiff: "Difference from mean pick percentage"
    percentsStdDevDiff: "Difference from std deviation of percentage"
    percentsSND: "Standardized normal deviation of percentage"
    percentsStdDev: "Standard deviation of pick percentage"
    percentsMedianDiff: "Difference from median percentage"
    percentsVarianceDiff: "Variance difference from others"
    percentsModeDiff: "Difference from mode of percentages"
    picked: "Was this number picked in the current round (1/0)"
1088_1DUltrasoundMuscleFatigueDataStudy1Of2:
  dataset_description: "Ultrasound Muscle Fatigue Study 1: Annotated metadata and subject features for fatigue analysis via ultrasound imaging."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1088_1DUltrasoundMuscleFatigueDataStudy1Of2.csv"

  feature_descriptions:
    annotation: "Label describing fatigue level or observed effect"
    timestamp: "Timestamp of the image acquisition"
    armPosition: "Position of the subject's arm during imaging"
    subjectID: "Unique subject identifier"
    dataSetID: "Internal dataset group ID"
    gender: "Subject's gender"
    weight: "Subject's weight in kilograms"
1089_1DUltrasoundMuscleFatigueDataStudy2Of2:
  dataset_description: "Ultrasound Muscle Fatigue Study 2: Additional set of ultrasound metadata with similar schema to Study 1."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1089_1DUltrasoundMuscleFatigueDataStudy2Of2.csv"

  feature_descriptions:
    annotation: "Label describing fatigue level or observed effect"
    timestamp: "Timestamp of the image acquisition"
    armPosition: "Position of the subject's arm during imaging"
    subjectID: "Unique subject identifier"
    dataSetID: "Internal dataset group ID"
    gender: "Subject's gender"
    weight: "Subject's weight in kilograms"
1094_students_scores:
  dataset_description: "Student Performance Dataset: Academic and demographic data of students used to predict academic success in math, reading, and writing."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1094_students_scores.csv"

  feature_descriptions:
    gender: "Student's gender (e.g., male, female)"
    race.ethnicity: "Student's race or ethnicity group"
    parental.level.of.education: "Highest education level of parent or guardian"
    lunch: "Type of lunch received (standard or free/reduced)"
    test.preparation.course: "Whether the student completed a test preparation course"
    math.score: "Score achieved in the mathematics exam"
    reading.score: "Score achieved in the reading exam"
    writing.score: "Score achieved in the writing exam"
1107_ALARM_dataset:
  dataset_description: "ALARM Network Dataset: Simulated ICU patient monitoring data based on probabilistic relationships among medical variables."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1107_ALARM_dataset.csv"

  feature_descriptions:
    CVP: "Central venous pressure"
    PCWP: "Pulmonary capillary wedge pressure"
    HIST: "Patient history severity"
    TPR: "Total peripheral resistance"
    BP: "Blood pressure"
    CO: "Cardiac output"
    HRBP: "Heart rate from blood pressure signal"
    HREK: "Heart rate from ECG"
    HRSA: "Heart rate from SA node"
    PAP: "Pulmonary arterial pressure"
    SAO2: "Oxygen saturation"
    FIO2: "Fraction of inspired oxygen"
    PRSS: "Pressure support setting"
    ECO2: "End-tidal CO2"
    MINV: "Minute volume"
    MVS: "Mechanical ventilator setting"
    HYP: "Presence of hypoxia"
    LVF: "Left ventricular failure status"
    APL: "Airway pressure low alarm"
    ANES: "Anesthetic agent level"
    PMB: "Positive mean blood pressure"
    INT: "Intubation status"
    KINK: "Presence of kink in airway"
    DISC: "Disconnected status"
    LVV: "Left ventricular volume"
    STKV: "Stroke volume"
    CCHL: "Congenital heart condition"
    ERLO: "Error low alarm"
    HR: "Overall heart rate"
    ERCA: "Error cardiac alarm"
    SHNT: "Presence of shunt"
    PVS: "Pulmonary vascular status"
    ACO2: "Arterial CO2"
    VALV: "Valve malfunction"
    VLNG: "Lung volume"
    VTUB: "Ventilator tubing status"
    VMCH: "Ventilator mechanics"
1107_rainfall_bangladesh:
  dataset_description: "Monthly Rainfall Data in Bangladesh: Climate-related data across weather stations."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1107_rainfall_bangladesh.csv"

  feature_descriptions:
    Year: "Year of the observation"
    Station: "Weather station identifier"
    Month: "Month of the observation"
    Rainfall: "Recorded rainfall in millimeters"
1110_hailfinder_dataset:
  dataset_description: "HailFinder Dataset: Meteorological features used for predicting hailstorm occurrences."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1110_hailfinder_dataset.csv"

  feature_descriptions:
    N07muVerMo: "7AM vertical motion"
    SubjVertMo: "Subjective vertical motion estimation"
    QGVertMotion: "Quasi-geostrophic vertical motion"
    CombVerMo: "Combined vertical motion index"
    AreaMesoALS: "Area mesoscale aloft stability"
    SatContMoist: "Satellite-detected convective moisture"
    RaoContMoist: "Radar-observed convective moisture"
    CombMoisture: "Combined moisture index"
    AreaMoDryAir: "Area with dry air at mid-levels"
    VISCloudCov: "Visible cloud coverage"
    IRCloudCover: "Infrared cloud cover estimate"
    CombClouds: "Combined cloud cover index"
    CldShadeOth: "Cloud shading over other areas"
    AMInstabMt: "Morning mountain instability"
    InsInMt: "Instability inside mountain regions"
    WndHodograph: "Wind hodograph structure"
    OutflowFrMt: "Mountain outflow front"
    MorningBound: "Morning boundary layer presence"
    Boundaries: "Detected mesoscale boundaries"
    CldShadeConv: "Convective cloud shading"
    CompPlFcst: "Computer model plains forecast"
    CapChange: "Change in convective available potential energy (CAPE)"
    LoLevMoistAd: "Low-level moisture advection"
    InsChange: "Instability change from prior"
    MountainFcst: "Mountain area forecast"
    Date: "Forecast date"
    Scenario: "Forecast scenario ID"
    ScenRelAMCIN: "Scenario-related morning CIN"
    MorningCIN: "Morning convective inhibition"
    AMCINInScen: "AM CIN in scenario"
    CapInScen: "CAPE in scenario"
    ScenRelAMIns: "Scenario-related morning instability"
    LIfr12ZDENSd: "Lifted index at 12Z"
    AMDewptCalPl: "AM dewpoint in plains"
    AMInsWliScen: "AM instability with lifting index"
    InsSclInScen: "Instability scale in scenario"
    ScenRel34: "Scenario relevance to 3–4 level instability"
    LatestCIN: "Latest available convective inhibition"
    LLIW: "Low-level instability wind shear"
    CurPropConv: "Current propagation of convection"
    ScnRelPlFcst: "Scenario-related plains forecast"
    PlainsFcst: "Forecast for plains region"
    N34StarFcst: "NWS 3–4 star forecast"
    R5Fcst: "Region 5 forecast"
    Dewpoints: "Surface dewpoint temperatures"
    LowLLapse: "Low-level lapse rate"
    MeanRH: "Mean relative humidity"
    MidLLapse: "Mid-level lapse rate"
    MvmtFeatures: "Storm movement features"
    RHRatio: "Relative humidity ratio"
    SfcWndShfDis: "Surface wind shift discontinuity"
    SynForcng: "Synoptic forcing index"
    TempDis: "Temperature discontinuity"
    WindAloft: "Wind speed and direction aloft"
    WindFieldMt: "Mountain wind field"
    WindFieldPln: "Plains wind field"
1111_insurance_dataset:
  dataset_description: "Auto Insurance Risk Dataset: Car and driver information used to predict insurance claims and risk levels."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1111_insurance_dataset.csv"

  feature_descriptions:
    GoodStudent: "Good student discount (yes/no)"
    Age: "Driver's age"
    SocioEcon: "Socioeconomic class"
    RiskAversion: "Driver's risk aversion level"
    VehicleYear: "Year of the vehicle model"
    ThisCarDam: "History of damage to this car"
    RuggedAuto: "Is the car rugged/off-road capable"
    Accident: "Number of accidents"
    MakeModel: "Make and model category"
    DrivQuality: "Driver quality rating"
    Mileage: "Annual mileage"
    Antilock: "Car equipped with anti-lock braking system"
    DrivingSkill: "Assessed driving skill"
    SeniorTrain: "Received senior driver training"
    ThisCarCost: "Current cost of the vehicle"
    Theft: "Theft claim history"
    CarValue: "Vehicle's market value"
    HomeBase: "Is car kept at home base"
    AntiTheft: "Has anti-theft device"
    PropCost: "Property cost estimate"
    OtherCarCost: "Cost of any other car owned"
    OtherCar: "Is there another car in the household"
    MedCost: "Medical cost estimate"
    Cushioning: "Safety cushioning available"
    Airbag: "Airbags present in vehicle"
    ILiCost: "Injury liability cost"
    DrivHist: "Driver's historical record"
1116_edm:
  dataset_description: "EDM Sensor Time Analysis: Electrical discharge machining time statistics across different sensor groups."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1116_edm.csv"

  feature_descriptions:
    ASM_A_MeanT: "Mean time for sensor ASM_A"
    ASD_A_SDevT: "Standard deviation of time for sensor ASD_A"
    BSM_B_MeanT: "Mean time for sensor BSM_B"
    BSD_B_SDevT: "Standard deviation of time for sensor BSD_B"
    CSM_C_MeanT: "Mean time for sensor CSM_C"
    CSD_C_SDevT: "Standard deviation of time for sensor CSD_C"
    ISM_I_MeanT: "Mean time for sensor ISM_I"
    ISD_I_SDevT: "Standard deviation of time for sensor ISD_I"
    ALM_A_MeanT: "Alternate mean time sensor ALM_A"
    ALD_A_SDevT: "Alternate std deviation for sensor ALD_A"
    BLM_B_MeanT: "Alternate mean time for sensor BLM_B"
    BLD_B_SDevT: "Alternate std deviation for sensor BLD_B"
    CLM_C_MeanT: "Alternate mean time for sensor CLM_C"
    CLD_C_SDevT: "Alternate std deviation for sensor CLD_C"
    ILM_I_MeanT: "Alternate mean time for sensor ILM_I"
    ILD_I_SDevT: "Alternate std deviation for sensor ILD_I"
    DFlow: "Discharge flow reading"
    DGap: "Discharge gap reading"
1118_jura:
  dataset_description: "Jura Geochemical Survey: Metal concentrations and geographical features from soil samples in Jura, Switzerland."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1118_jura.csv"

  feature_descriptions:
    Xloc: "X-coordinate of sample location"
    Yloc: "Y-coordinate of sample location"
    Landuse_1: "Land use type 1 (binary)"
    Landuse_2: "Land use type 2 (binary)"
    Landuse_3: "Land use type 3 (binary)"
    Landuse_4: "Land use type 4 (binary)"
    Rock_1: "Rock type 1 indicator"
    Rock_2: "Rock type 2 indicator"
    Rock_3: "Rock type 3 indicator"
    Rock_4: "Rock type 4 indicator"
    Rock_5: "Rock type 5 indicator"
    Cr: "Chromium concentration"
    Ni: "Nickel concentration"
    Pb: "Lead concentration"
    Zn: "Zinc concentration"
    Cd: "Cadmium concentration"
    Co: "Cobalt concentration"
    Cu: "Copper concentration"
1119_sf1:
  dataset_description: "Sunspot Flare Classification (SF1): Solar activity features to predict solar flare intensity."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1119_sf1.csv"

  feature_descriptions:
    mod_zurich_class: "Modified Zurich sunspot classification"
    largest_spot_size: "Size of the largest sunspot group"
    spot_distribution: "Distribution of sunspots"
    activity: "Solar activity level"
    evolution: "Sunspot group evolution"
    previous_day_activity: "Activity level on the previous day"
    hist_complex: "Historical complexity of region"
    become_hist_complex: "Prediction of becoming complex"
    area: "Total sunspot area"
    area_largest: "Area of the largest sunspot group"
    c_class: "Number of C-class flares"
    m_class: "Number of M-class flares"
    x_class: "Number of X-class flares"
1120_sf2:
  dataset_description: "Sunspot Flare Classification (SF2): A similar or extended dataset to SF1 with same feature schema for solar flare classification."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1120_sf2.csv"

  feature_descriptions:
    mod_zurich_class: "Modified Zurich sunspot classification"
    largest_spot_size: "Size of the largest sunspot group"
    spot_distribution: "Distribution of sunspots"
    activity: "Solar activity level"
    evolution: "Sunspot group evolution"
    previous_day_activity: "Activity level on the previous day"
    hist_complex: "Historical complexity of region"
    become_hist_complex: "Prediction of becoming complex"
    area: "Total sunspot area"
    area_largest: "Area of the largest sunspot group"
    c_class: "Number of C-class flares"
    m_class: "Number of M-class flares"
    x_class: "Number of X-class flares"
1121_slump:
  dataset_description: "Concrete Slump Test: Measures workability and strength of concrete mixes."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1121_slump.csv"

  feature_descriptions:
    Cemment: "Amount of cement in the mix (kg/m³)"
    Slag: "Amount of blast furnace slag (kg/m³)"
    Fly_ash: "Fly ash content (kg/m³)"
    Water: "Water added to the mix (kg/m³)"
    SP: "Superplasticizer amount (kg/m³)"
    Coarse_Aggr: "Coarse aggregate (kg/m³)"
    Fine_Aggr: "Fine aggregate (kg/m³)"
    SLUMP_cm: "Measured slump (cm), indicating workability"
    FLOW_cm: "Flow measurement (cm)"
    Compressive_Strength_Mpa: "Compressive strength in MPa"
1122_22SafetyBehaviouDuringCOVID-19:
  dataset_description: "COVID-19 Safety Behavior Survey: Survey responses related to public safety practices during the COVID-19 pandemic."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1122_22SafetyBehaviouDuringCOVID-19.csv"

  feature_descriptions:
    GENDER: "Respondent's gender"
    AGE: "Age of respondent"
    JT: "Job type or employment"
    MT: "Mode of transportation used"
    WT: "Work time or hours"
    TSL1 to TSL5: "Trust in safety leadership (5 items)"
    TFL1 to TFL6: "Trust in frontline leadership (6 items)"
    EWB1 to EWB3: "Emotional wellbeing indicators"
    SC1 to SC7: "Safety compliance questions"
1127_mom:
  dataset_description: "Model of Models (MoM): Classification dataset used to benchmark meta-learning or ensemble learning methods."

  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1127_mom.csv"

  feature_descriptions:
    datasize: "Size of the dataset used"
    bootstrap: "Indicates if bootstrap sampling was used"
    classe: "Class label of the instance"
    class: "Target class for prediction (same as 'classe')"
1138_MIP-2016-PAR10-regression:
  dataset_description: "MIP 2016 PAR10 Regression Dataset: Features extracted from MIP problem instances aimed at predicting penalized average runtime (PAR10)."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1138_MIP-2016-PAR10-regression.csv"
  feature_descriptions:
    probtype: "Problem type (e.g., MILP, LP)"
    n_vars: "Total number of variables"
    n_constr: "Total number of constraints"
    n_nzcnt: "Non-zero entries in constraint matrix"
    nq_vars: "Number of quadratic variables"
    nq_constr: "Number of quadratic constraints"
    nq_nzcnt: "Non-zero entries in quadratic part"
    lp_avg: "Average LP relaxation objective value"
    lp_l2_avg: "L2 norm of LP objective coefficients"
    lp_linf: "Infinity norm (max) of LP objective coefficients"
    lp_objval: "Objective value from LP relaxation"
    num_b_variables: "Count of binary variables"
    num_i_variables: "Count of integer variables"
    num_c_variables: "Count of continuous variables"
    num_s_variables: "Count of semi-continuous variables"
    num_n_variables: "Count of numeric variables"
    ratio_b_variables: "Proportion of binary variables"
    ratio_i_variables: "Proportion of integer variables"
    ratio_c_variables: "Proportion of continuous variables"
    ratio_s_variables: "Proportion of semi-continuous variables"
    ratio_n_variables: "Proportion of numeric variables"
    num_i._variables: "Count of implicitly integer variables"
    ratio_i._variables: "Proportion of implicitly integer variables"
    num_unbounded_disc: "Count of unbounded discrete variables"
    ratio_unbounded_disc: "Proportion of unbounded discrete variables"
    support_size_avg: "Average support size across variables"
    support_size_median: "Median support size"
    support_size_varcoef: "Coefficient of variation of support size"
    support_size_q90mq10: "90th–10th percentile support size range"
    rhs_c_0_avg: "Average right-hand side values (constraint class 0)"
    rhs_c_0_varcoef: "Variation in RHS values (class 0)"
    rhs_c_1_avg: "Average RHS for constraint class 1"
    rhs_c_1_varcoef: "Variation (class 1)"
    rhs_c_2_avg: "Average RHS for constraint class 2"
    rhs_c_2_varcoef: "Variation (class 2)"
    vcg_constr_deg0_avg: "Mean constraint graph degree (class 0)"
    vcg_constr_deg0_median: "Median constraint degree (class 0)"
    vcg_constr_deg0_varcoef: "Degree variation (class 0)"
    vcg_constr_deg0_q90mq10: "Quantile range for class 0 degree"
    vcg_var_deg0_avg: "Mean variable graph degree (class 0)"
    vcg_var_deg0_median: "Median variable degree (class 0)"
    vcg_var_deg0_varcoef: "Variable degree variation (class 0)"
    vcg_var_deg0_q90mq10: "Quantile range for variable degree (class 0)"
    vcg_constr_weight0_avg: "Mean constraint graph weight (class 0)"
    vcg_constr_weight0_varcoef: "Weight coefficient variation (class 0)"
    vcg_var_weight0_avg: "Mean variable weight (class 0)"
    vcg_var_weight0_varcoef: "Variable weight variation (class 0)"
    A_ij_normalized0_avg: "Mean normalized non-zero coefficients (class 0)"
    A_ij_normalized0_varcoef: "Variation in normalized coefficients (class 0)"
    a_normalized_varcoefs0_avg: "Average variable coefficient variation"
    a_normalized_varcoefs0_varcoef: "Variation in variable coefficient variation"
    obj_coefs0_avg: "Average objective coefficients (class 0)"
    obj_coefs0_std: "Standard deviation of objective coefficient (class 0)"
    obj_coef_per_constr0_avg: "Objective coefficients per constraint average"
    obj_coef_per_constr0_std: "Std deviation of that"
    obj_coef_per_sqr_constr0_avg: "Average quadratic constraint objective coefficient"
    obj_coef_per_sqr_constr0_std: "Std deviation of quadratic constraint coefficient"
    mipgap: "Relative MIP gap at termination"
    nodecnt: "Number of nodes explored"
    clqcnt: "Clique cuts count"
    covcnt: "Cover cuts count"
    itcnt_max: "Maximum iterations reached"
    numnewsolution_sum: "Total new found solutions"
    newin_sum: "New best incumbent solutions"
    nodeleft_avg: "Average unexplored nodes"
    nodeleft_varcoef: "Coefficient variation for nodes left"
    diffObj_avg: "Average difference in objective values"
    diffObj_median: "Median difference in objective values"
    diffObj_varcoef: "Variation in objective difference"
    numfeas: "Count of feasible solutions"
    diffBestInt_avg: "Average deviation from best integer solution"
    diffBestObjUp_avg: "Average deviation from best upper bound"
    numcuts_sum: "Total cuts applied"
    diffGap_avg: "Average MIP gap"
    diffGap_median: "Median MIP gap"
    diffGap_varcoef: "MIP gap variation"
    diffGap_q90mq10: "Quantile range for gap"
    pre_t: "Preprocessing time (seconds)"
    rel_t: "Total solving time (seconds)"
    clique_table: "Usage of clique table (true/false)"
    cliqueCuts: "Number of clique cuts"
    impliedBoundCuts: "Implied bound cuts count"
    flowCuts: "Flow cover cuts count"
    mixedIntegerRoundingCuts: "Mixed integer rounding cuts count"
    gomoryFractionalCuts: "Gomory fractional cuts count"
    algorithm: "Solver algorithm used"
    PAR10: "PAR10 score (penalized average runtime) — Target variable"
1139_PhosphoproteinChallenge_DREAM3:
  dataset_description: "Phosphoprotein DREAM3 Challenge Dataset: Levels of phosphoproteins measured under various experimental conditions for network inference benchmarking."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1139_PhosphoproteinChallenge_DREAM3.csv"
  feature_descriptions:
    Cell_Type: "Cell line used in experiment (e.g., HEK)"
    Stimulus: "Applied stimulus condition (e.g., EGF, TNF)"
    Inhibitor: "Chemical inhibitor used (e.g., MEKi, AKTi)"
    Time_of_Data_Acquisition_.min.: "Time after stimulation (minutes)"
    AKT: "Phospho-AKT level"
    ERK12: "Phospho-ERK1/2 level"
    GSK3: "Phospho-GSK3 level"
    Ikb: "Phospho-Ikb level"
    JNK12: "Phospho-JNK1/2 level"
    p38: "Phospho-p38 MAPK level"
    p70S6: "Phospho-p70S6 kinase level"
    p90RSK: "Phospho-p90RSK level"
    STAT3: "Phospho-STAT3 level"
    cJUN: "Phospho-cJUN level"
    CREB: "Phospho-CREB level"
    HistH3: "Phospho-Histone H3 level"
    HSP27: "Phospho-HSP27 level"
    IRS1s: "Phospho-IRS1 serine level"
    MEK12: "Phospho-MEK1/2 level"
    p53: "Phospho-p53 level"
    STAT6: "Phospho-STAT6 level"
1140_exercises:
  dataset_description: "Exercise Dataset: Physical metrics for brief workouts, including heart rate and body temperature, used for fitness activity classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1140_exercises.csv"
  feature_descriptions:
    Gender: "Participant gender (Male/Female)"
    Age: "Participant age in years"
    Height: "Height in centimeters"
    Weight: "Weight in kilograms"
    Duration: "Exercise duration in minutes"
    Heart_Rate: "Average heart rate during exercise (beats per minute)"
    Body_Temp: "Average body temperature during exercise (°C)"
1142_Sick_numeric:
  dataset_description: "Sick Numeric Dataset: Numeric encoding of thyroid-related diagnostic features similar to UCI Sick dataset."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1142_Sick_numeric.csv"
  feature_descriptions:
    age: "Patient age in years"
    sex: "Patient gender (1=male, 0=female)"
    on_thyroxine: "On thyroxine medication (1= yes, 0=no)"
    query_on_thyroxine: "Thyroxine query pending (1=yes,0=no)"
    on_antithyroid_medication: "On antithyroid medication (1=yes,0=no)"
    sick: "Patient currently ill (1=yes, 0=no)"
    pregnant: "Pregnant status (1=yes, 0=no)"
    thyroid_surgery: "History of thyroid surgery (1=yes, 0=no)"
    I131_treatment: "Uploaded I131 treatment (1=yes, 0=no)"
    query_hypothyroid: "Query hypothyroid status (1=yes, 0=no)"
    query_hyperthyroid: "Query hyperthyroid status (1=yes, 0=no)"
    lithium: "On lithium (1=yes, 0=no)"
    goitre: "Presence of goitre (1=yes, 0=no)"
    tumor: "Mental tumor history (1=yes, 0=no)"
    hypopituitary: "Hypopituitary disease (1=yes, 0=no)"
    psych: "Psychiatric history (1=yes, 0=no)"
    TSH_measured: "TSH measured (1=yes, 0=no)"
    TSH: "TSH hormone level"
    T3_measured: "T3 measured (1=yes, 0=no)"
    T3: "T3 hormone level"
    TT4_measured: "Total T4 measured (1=yes, 0=no)"
    TT4: "Total T4 hormone level"
    T4U_measured: "T4 uptake measured (1=yes, 0=no)"
    T4U: "T4 uptake level"
    FTI_measured: "Free thyroxine index measured (1=yes, 0=no)"
    FTI: "Free thyroxine index"
    TBG_measured: "TBG measured (1=yes, 0=no)"
    TBG: "Thyroxine binding globulin level"
    referral_source: "Source of referral (encoded numeric)"
    Class: "Diagnosis class (0=healthy, 1=sick)"
1144_AustinWeather:
  dataset_description: "Austin Weather Data: Daily weather summaries including temperature, humidity, pressure, visibility, wind, precipitation, and weather events (e.g., rain, fog)."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1144_AustinWeather.csv"
  feature_descriptions:
    TempHighF: "High daily temperature (°F)"
    TempAvgF: "Average temperature (°F)"
    TempLowF: "Low daily temperature (°F)"
    DewPointHighF: "High daily dew point (°F)"
    DewPointAvgF: "Average dew point (°F)"
    DewPointLowF: "Low daily dew point (°F)"
    HumidityHighPercent: "High daily humidity (%)"
    HumidityAvgPercent: "Average humidity (%)"
    HumidityLowPercent: "Low daily humidity (%)"
    SeaLevelPressureHighInches: "Daily high pressure (in)"
    SeaLevelPressureAvgInches: "Average pressure (in)"
    SeaLevelPressureLowInches: "Daily low pressure (in)"
    VisibilityHighMiles: "High daily visibility (mi)"
    VisibilityAvgMiles: "Average daily visibility (mi)"
    VisibilityLowMiles: "Low daily visibility (mi)"
    WindHighMPH: "High daily wind speed (mph)"
    WindAvgMPH: "Average wind speed (mph)"
    WindGustMPH: "Peak wind gust speed (mph)"
    PrecipitationSumInches: "Total daily precipitation (inches)"
    Events: "Weather event(s) on that day (e.g., Rain, Fog)"
1146_CorporateCreditRating:
  dataset_description: "Corporate Credit Ratings: Financial ratios and ratios associated with company credit ratings from multiple agencies."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1146_CorporateCreditRating.csv"
  feature_descriptions:
    Rating: "Assigned credit rating"
    Name: "Company name"
    Symbol: "Stock ticker symbol"
    Rating_Agency_Name: "Name of rating agency"
    Date: "Date of rating"
    Sector: "Industry sector classification"
    currentRatio: "Current assets to current liabilities ratio"
    quickRatio: "Quick ratio"
    cashRatio: "Cash ratio"
    daysOfSalesOutstanding: "DSO – Receivables collection period"
    netProfitMargin: "Net profit margin"
    pretaxProfitMargin: "Pretax profit margin"
    grossProfitMargin: "Gross profit margin"
    operatingProfitMargin: "Operating margin"
    returnOnAssets: "Return on assets (ROA)"
    returnOnCapitalEmployed: "ROCE"
    returnOnEquity: "Return on equity (ROE)"
    assetTurnover: "Asset turnover ratio"
    fixedAssetTurnover: "Fixed asset turnover"
    debtEquityRatio: "Total debt/equity ratio"
    debtRatio: "Total debt/total assets ratio"
    effectiveTaxRate: "Taxes paid/pretax income"
    freeCashFlowOperatingCashFlowRatio: "OCF to free cash flow ratio"
    freeCashFlowPerShare: "FCF per share"
    cashPerShare: "Cash held per share"
    companyEquityMultiplier: "Equity multiplier"
    ebitPerRevenue: "EBIT margin"
    enterpriseValueMultiple: "EV multiple"
    operatingCashFlowPerShare: "Operating cash flow per share"
    operatingCashFlowSalesRatio: "OCF to sales ratio"
    payablesTurnover: "Payables turnover ratio"
1151_Austin-Weather:
  dataset_description: "Extended Austin Weather Data: Detailed daily weather attributes for Austin, Texas."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1151_Austin-Weather.csv"
  feature_descriptions:
    TempHighF: "High daily temperature (°F)"
    TempAvgF: "Average daily temperature (°F)"
    TempLowF: "Low daily temperature (°F)"
    DewPointHighF: "High daily dew point (°F)"
    DewPointAvgF: "Average dew point (°F)"
    DewPointLowF: "Low daily dew point (°F)"
    HumidityHighPercent: "High daily humidity (%)"
    HumidityAvgPercent: "Average daily humidity (%)"
    HumidityLowPercent: "Low daily humidity (%)"
    SeaLevelPressureHighInches: "High sea-level pressure (inches)"
    SeaLevelPressureAvgInches: "Average sea-level pressure (inches)"
    SeaLevelPressureLowInches: "Low sea-level pressure (inches)"
    VisibilityHighMiles: "High visibility (miles)"
    VisibilityAvgMiles: "Average visibility (miles)"
    VisibilityLowMiles: "Low visibility (miles)"
    WindHighMPH: "Peak wind speed (mph)"
    WindAvgMPH: "Mean wind speed (mph)"
    WindGustMPH: "Peak wind gust (mph)"
    PrecipitationSumInches: "Total precipitation (inches)"
    Events: "Weather events (e.g., Rain, Fog, Thunderstorm)"
1155_Corporate-Credit-Rating:
  dataset_description: "Duplicate of corporate credit ratings dataset (may vary by vendor or format)."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1155_Corporate-Credit-Rating.csv"
  feature_descriptions:
    Rating: "Credit rating score"
    Name: "Company's name"
    Symbol: "Ticker symbol"
    Rating_Agency_Name: "Agency that provided the rating"
    Date: "Date when rating was issued"
    Sector: "Industry classification"
    currentRatio: "Current assets to liabilities ratio"
    quickRatio: "Quick or acid test ratio"
    cashRatio: "Cash to current liability ratio"
    daysOfSalesOutstanding: "Days sales outstanding (receivables efficiency)"
    netProfitMargin: "Net profit margin"
    pretaxProfitMargin: "Pretax profit margin"
    grossProfitMargin: "Gross profit margin"
    operatingProfitMargin: "Operating profit margin"
    returnOnAssets: "Return on assets"
    returnOnCapitalEmployed: "Return on capital employed"
    returnOnEquity: "Return on equity"
    assetTurnover: "Total asset turnover"
    fixedAssetTurnover: "Fixed asset turnover ratio"
    debtEquityRatio: "Debt to equity ratio"
    debtRatio: "Total debt ratio"
    effectiveTaxRate: "Effective tax rate"
    freeCashFlowOperatingCashFlowRatio: "Free cash flow to operating cash flow ratio"
    freeCashFlowPerShare: "FCF per share"
    cashPerShare: "Cash per share"
    companyEquityMultiplier: "Equity multiplier"
    ebitPerRevenue: "EBIT margin"
    enterpriseValueMultiple: "EV multiple"
    operatingCashFlowPerShare: "Operating cash flow per share"
    operatingCashFlowSalesRatio: "OCF to sales ratio"
    payablesTurnover: "Payables turnover rate"
1160_TuningSVMs:
  dataset_description: "SVM Hyperparameter Tuning Dataset: Dataset properties and tuning outcomes for support vector machines."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1160_TuningSVMs.csv"
  feature_descriptions:
    simple.classes: "Number of classes"
    simple.attributes: "Number of attributes"
    simple.numeric: "Numeric attribute count"
    simple.nominal: "Nominal attribute count"
    simple.samples: "Number of instances"
    simple.dimensionality: "Dimensionality (attributes per instance)"
    simple.numeric_rate: "Proportion of numeric attributes"
    simple.nominal_rate: "Proportion of nominal attributes"
    simple.symbols_min: "Min symbols per nominal attribute"
    simple.symbols_max: "Max symbols per nominal attribute"
    simple.symbols_mean: "Mean symbols per feature"
    simple.symbols_sd: "Std deviation of symbols"
    simple.symbols_sum: "Total symbols"
    simple.class_prob_min: "Min class probability"
    simple.class_prob_max: "Max class probability"
    simple.class_prob_mean: "Mean class probability"
    simple.class_prob_sd: "Std deviation of class probabilities"
    statistical.skewness: "Skewness measure"
    statistical.skewness_prep: "Prep-skewness measure"
    statistical.kurtosis: "Kurtosis measure"
    statistical.kurtosis_prep: "Prep-kurtosis measure"
    statistical.abs_cor: "Absolute correlation"
    statistical.cancor_1: "First canonical correlation"
    statistical.fract_1: "First fractal dimension"
    inftheo.class_entropy: "Class entropy"
    inftheo.normalized_class_entropy: "Normalized class entropy"
    inftheo.attribute_entropy: "Attribute entropy"
    inftheo.normalized_attribute_entropy: "Normalized attribute entropy"
    inftheo.joint_entropy: "Joint entropy"
    inftheo.mutual_information: "Mutual information"
    inftheo.equivalent_attributes: "Equivalent attribute count"
    inftheo.noise_signal_ratio: "Noise-to-signal ratio"
    modelbased.nodes: "Decision tree node count"
    modelbased.leaves.nodes_per_attribute: "Leaves per attribute in decision tree"
    modelbased.nodes_per_instance: "Nodes per instance"
    modelbased.leaf_corrobation: "Leaf corroboration measure"
    modelbased.level_min: "Min tree depth"
    modelbased.level_max: "Max tree depth"
    modelbased.level_mean: "Mean tree depth"
    modelbased.level_sd: "Std tree depth"
    modelbased.branch_min: "Min branch factor"
    modelbased.branch_max: "Max branch factor"
    modelbased.branch_mean: "Mean branch factor"
    modelbased.branch_sd: "Std branch factor"
    modelbased.attribute_min: "Min attribute importance"
    modelbased.attribute_max: "Max attribute importance"
    modelbased.attribute_mean: "Mean attribute importance"
    modelbased.attribute_sd: "Std deviation of importance"
    modelbased.NA: "Missing value indicator"
    landmarking.naive_bayes: "Performance with Naive Bayes"
    landmarking.stump_min: "Min stump performance"
    landmarking.stump_max: "Max stump performance"
    landmarking.stump_mean: "Mean stump performance"
    landmarking.stump_sd: "Std stump performance"
    landmarking.stump_min_gain: "Gain from stump"
    landmarking.stump_random: "Random stump measure"
    landmarking.nn_1: "1-NN classifier performance"
    dcomp.f1 to dcomp.t2: "Dataset complexity measure (multiple)"
    cnet.edges: "Complexity network edge count"
    cnet.degree: "Complexity network average degree"
    cnet.density: "Complexity network density"
    cnet.maxComp: "Max component size"
    cnet.closeness: "Average closeness centrality"
    cnet.betweenness: "Average betweenness"
    cnet.clsCoef: "Clustering coefficient"
    cnet.hubs: "Hub node count"
    cnet.avgPath: "Average path length"
    Class: "Tuning outcome class label"
1183_Heart-Disease:
  dataset_description: "Heart Disease Dataset: Clinical features to predict presence of heart disease."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1183_Heart-Disease.csv"
  feature_descriptions:
    age: "Age of the patient"
    sex: "Sex of the patient (1 = male, 0 = female)"
    cp: "Chest pain type (0–3)"
    trestbps: "Resting blood pressure (mm Hg)"
    chol: "Serum cholesterol (mg/dl)"
    fbs: "Fasting blood sugar > 120 mg/dl (1 = true; 0 = false)"
    restecg: "Resting electrocardiographic results (0–2)"
    thalach: "Maximum heart rate achieved"
    exang: "Exercise-induced angina (1 = yes; 0 = no)"
    oldpeak: "ST depression induced by exercise relative to rest"
    slope: "Slope of the peak exercise ST segment"
    ca: "Number of major vessels (0–3) colored by fluoroscopy"
    thal: "Thalassemia status (3 = normal; 6 = fixed defect; 7 = reversible defect)"
    target: "Presence of heart disease (1 = yes; 0 = no)"
1183_Heart-Disease:
  dataset_description: "Heart Disease Dataset: Clinical features to predict presence of heart disease."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1183_Heart-Disease.csv"
  feature_descriptions:
    age: "Age of the patient"
    sex: "Sex of the patient (1 = male, 0 = female)"
    cp: "Chest pain type (0–3)"
    trestbps: "Resting blood pressure (mm Hg)"
    chol: "Serum cholesterol (mg/dl)"
    fbs: "Fasting blood sugar > 120 mg/dl (1 = true; 0 = false)"
    restecg: "Resting electrocardiographic results (0–2)"
    thalach: "Maximum heart rate achieved"
    exang: "Exercise-induced angina (1 = yes; 0 = no)"
    oldpeak: "ST depression induced by exercise relative to rest"
    slope: "Slope of the peak exercise ST segment"
    ca: "Number of major vessels (0–3) colored by fluoroscopy"
    thal: "Thalassemia status (3 = normal; 6 = fixed defect; 7 = reversible defect)"
    target: "Presence of heart disease (1 = yes; 0 = no)"
1187_Covid-19-Turkey-Daily-Details:
  dataset_description: "COVID-19 Turkey: Daily statistics from official sources for pandemic tracking."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1187_Covid-19-Turkey-Daily-Details.csv"
  feature_descriptions:
    date: "Date of the report"
    total_tests: "Cumulative number of tests conducted"
    total_cases: "Cumulative confirmed COVID-19 cases"
    total_deaths: "Cumulative number of deaths"
    total_recovered: "Cumulative recoveries"
    daily_tests: "New tests on the given date"
    daily_cases: "New confirmed cases"
    daily_deaths: "New deaths"
    daily_recovered: "New recoveries"
    critical_patients: "Number of patients in critical condition"
    intubated_patients: "Number of intubated patients"
    pneumonia_rate: "Rate of pneumonia among cases"
    bed_occupancy_rate: "Hospital bed occupancy percentage"
    ventilator_occupancy_rate: "Ventilator usage rate"
    isolation: "Number of people in isolation"
1191_Students-Academic-Performance:
  dataset_description: "Student Academic Performance: Factors influencing grades, including demographic and academic behaviors."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1191_Students-Academic-Performance.csv"
  feature_descriptions:
    gender: "Gender of the student"
    race_ethnicity: "Student's ethnic background"
    parental_level_of_education: "Highest education level of parents"
    lunch: "Lunch type (standard or free/reduced)"
    test_preparation_course: "Whether a test prep course was completed"
    math_score: "Score in mathematics"
    reading_score: "Score in reading"
    writing_score: "Score in writing"
1192_kickstarter_projects:
  dataset_description: "Kickstarter Project Outcomes: Project metadata and success status."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1192_kickstarter_projects.csv"
  feature_descriptions:
    project_id: "Unique project identifier"
    name: "Project name"
    category: "Project category"
    main_category: "Top-level project category"
    currency: "Currency used"
    deadline: "Deadline of the campaign"
    goal: "Funding goal"
    launched: "Launch date"
    pledged: "Amount pledged"
    backers: "Number of backers"
    country: "Country of origin"
    usd_pledged: "Pledged amount in USD"
    state: "Project state (e.g., successful, failed)"
1199_adult_income_p:
  dataset_description: "UCI Adult Income Dataset: Predict whether income exceeds $50K based on census data."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1199_adult_income_p.csv"
  feature_descriptions:
    age: "Age of the individual"
    workclass: "Employment type"
    fnlwgt: "Final weight (sampling weight)"
    education: "Education level"
    education_num: "Education level as numeric"
    marital_status: "Marital status"
    occupation: "Occupation"
    relationship: "Relationship status"
    race: "Race of the individual"
    sex: "Gender"
    capital_gain: "Capital gain"
    capital_loss: "Capital loss"
    hours_per_week: "Hours worked per week"
    native_country: "Country of origin"
    income: "Income class (>50K or <=50K)"
1200_default_credit_card_p:
  dataset_description: "Taiwan Credit Card Default: Monthly billing and repayment data to predict defaults."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1200_default_credit_card_p.csv"
  feature_descriptions:
    ID: "Client ID"
    LIMIT_BAL: "Credit limit (NT dollar)"
    SEX: "Gender (1 = male, 2 = female)"
    EDUCATION: "Education level"
    MARRIAGE: "Marital status"
    AGE: "Age of the client"
    PAY_0 to PAY_6: "Repayment status from April to September"
    BILL_AMT1 to BILL_AMT6: "Bill statement amounts (April to September)"
    PAY_AMT1 to PAY_AMT6: "Amount paid (April to September)"
    default_payment_next_month: "Whether the client will default next month (1 = yes, 0 = no)"
1201_Gender-Recognition-by-Voice:
  dataset_description: "Voice-based Gender Recognition: Acoustic features derived from speech samples."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1201_Gender-Recognition-by-Voice.csv"
  feature_descriptions:
    meanfreq: "Mean frequency (Hz)"
    sd: "Frequency standard deviation"
    median: "Median frequency"
    Q25: "25th percentile frequency"
    Q75: "75th percentile frequency"
    IQR: "Interquartile range"
    skew: "Skewness of frequency distribution"
    kurt: "Kurtosis of frequency distribution"
    sp_ent: "Spectral entropy"
    sfm: "Spectral flatness measure"
    mode: "Mode of frequency"
    centroid: "Spectral centroid"
    meanfun: "Average fundamental frequency"
    minfun: "Minimum fundamental frequency"
    maxfun: "Maximum fundamental frequency"
    meandom: "Mean dominant frequency"
    mindom: "Minimum dominant frequency"
    maxdom: "Maximum dominant frequency"
    dfrange: "Range of dominant frequency"
    modindx: "Modulation index"
    label: "Gender label (male/female)"
1201_uci_diabetes_p:
  dataset_description: "UCI Hospital Readmissions: Clinical and admission data to predict 30-day readmission."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1201_uci_diabetes_p.csv"
  feature_descriptions:
    patient_nbr: "Patient number"
    race: "Patient race"
    gender: "Patient gender"
    age: "Age bracket"
    weight: "Weight category"
    admission_type_id: "Admission type ID"
    discharge_disposition_id: "Discharge disposition ID"
    admission_source_id: "Admission source ID"
    time_in_hospital: "Length of stay (days)"
    payer_code: "Payor code"
    medical_specialty: "Medical specialty"
    num_lab_procedures: "Number of lab procedures"
    num_procedures: "Number of procedures"
    num_medications: "Number of medications prescribed"
    number_outpatient: "Outpatient visits count"
    number_emergency: "Emergency visits count"
    number_inpatient: "Inpatient visits count"
    diag_1, diag_2, diag_3: "Primary, secondary, tertiary diagnoses"
    number_diagnoses: "Total diagnosis count"
    max_glu_serum: "Max glucose serum level"
    A1Cresult: "A1C result category"
    metformin, repaglinide, ..., troglitazone: "Medication indicators (yes/no)"
    insulin: "Insulin indicator"
    glyburide_metformin, ...: "Combination medication indicators"
    change: "Change in medication"
    diabetesMed: "Diabetes medication indicator"
    readmitted_flg: "Readmitted within 30 days (YES/NO)"
1212_cacao_flavor:
  dataset_description: "Cacao Flavor Profiles: Sensory ratings and origin data for cacao evaluations."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1212_cacao_flavor.csv"
  feature_descriptions:
    company_(maker_if_known): "Maker company"
    specific_bean_origin_or_bar_name: "Bean or bar origin name"
    ref: "Reference code"
    review_date: "Date of review"
    cocoa_percent: "Cocoa percentage"
    company_location: "Company location country"
    rating: "Sensory rating score"
    broad_bean_origin: "Country of bean origin"
    bean_type: "Bean type (e.g., Trinitario)"
1214_regime_alimentaire:
  dataset_description: "French Dietary Survey: Self-reported food consumption by category and overweight status."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1214_regime_alimentaire.csv"
  feature_descriptions:
    Sexe: "Gender of the individual (e.g., Homme, Femme)"
    Origine_Ethnique: "Ethnic origin of the individual"
    Age: "Age in years"
    Poids: "Weight in kilograms"
    Taille: "Height in centimeters"
    pain_biscottes_cereales: "Consumption frequency of bread, toasts, and cereals"
    riz_pates_semoule_pommes_de_terre: "Frequency of rice, pasta, semolina, and potatoes"
    fruits: "Frequency of fruit consumption"
    legumes: "Frequency of vegetable consumption"
    laitages: "Frequency of dairy products like milk, yogurt, cheese"
    viandes_poissons_oeufs: "Frequency of meats, fish, and eggs"
    produits_gras_sucres: "Frequency of fatty and sugary products"
    boissons_sucrees: "Frequency of sweetened beverages"
    grignotage: "Snacking frequency"
    saute_repas: "Frequency of skipped meals"
    regime: "Whether following a special diet or not"
    Surpoids: "Overweight indicator (e.g., 1 for overweight, 0 for not)"
1216_parkinson-speech-uci:
  dataset_description: "Parkinson's Disease Speech Dataset: Biomedical voice measurements for detecting Parkinson’s disease."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1216_parkinson-speech-uci.csv"
  feature_descriptions:
    name: "Subject identifier"
    MDVP:Fo(Hz): "Average vocal fundamental frequency"
    MDVP:Fhi(Hz): "Maximum vocal fundamental frequency"
    MDVP:Flo(Hz): "Minimum vocal fundamental frequency"
    MDVP:Jitter(%): "Variation in fundamental frequency (jitter percentage)"
    MDVP:Jitter(Abs): "Absolute jitter"
    MDVP:RAP: "Relative average perturbation"
    MDVP:PPQ: "Five-point period perturbation quotient"
    Jitter:DDP: "Derivative of differences of periods"
    MDVP:Shimmer: "Variation in amplitude (shimmer)"
    MDVP:Shimmer(dB): "Shimmer in decibels"
    Shimmer:APQ3: "Three-point amplitude perturbation quotient"
    Shimmer:APQ5: "Five-point amplitude perturbation quotient"
    MDVP:APQ: "Average amplitude perturbation quotient"
    Shimmer:DDA: "Derivative of differences of amplitudes"
    NHR: "Noise-to-harmonics ratio"
    HNR: "Harmonics-to-noise ratio"
    status: "Disease status (1 for Parkinson's, 0 for healthy)"
    RPDE: "Recurrence period density entropy"
    DFA: "Detrended fluctuation analysis"
    spread1: "Nonlinear measure of fundamental frequency variation"
    spread2: "Nonlinear measure of variation (2nd)"
    D2: "Correlation dimension"
    PPE: "Pitch period entropy"
1217_SolarPrediction:
  dataset_description: "Solar Power Prediction Dataset: Weather and time features used to predict solar radiation."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1217_SolarPrediction.csv"
  feature_descriptions:
    Data: "Date of the observation"
    Time: "Time of the day"
    Temperature: "Temperature in Celsius"
    Humidity: "Relative humidity percentage"
    WindSpeed: "Wind speed in km/h"
    GeneralDiffuseFlows: "Diffuse solar radiation measured"
    DiffuseFlows: "Local diffuse solar radiation"
    Radiation: "Target variable - solar radiation in W/m^2"
1220_House_Rent_Dataset:
  dataset_description: "House Rent Dataset: Rental property details collected from Indian cities."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1220_House_Rent_Dataset.csv"
  feature_descriptions:
    Posted On: "Date when the listing was posted"
    BHK: "Number of bedrooms"
    Rent: "Monthly rent in INR"
    Size: "Size of the house in square feet"
    Floor: "Floor level of the apartment"
    Area Type: "Type of area (e.g., Super built-up)"
    Area Locality: "Locality of the property"
    City: "City name"
    Furnishing Status: "Furnishing level (Furnished, Semi-Furnished, etc.)"
    Tenant Preferred: "Preferred tenant type (e.g., Bachelors, Family)"
    Bathroom: "Number of bathrooms"
    Point of Contact: "Mode of contact (e.g., Agent, Owner)"
1221_Indian_Liver_Patient_Dataset:
  dataset_description: "Indian Liver Patient Dataset: Clinical data to classify patients with liver disorders."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1221_Indian_Liver_Patient_Dataset.csv"
  feature_descriptions:
    Age: "Age of the patient"
    Gender: "Gender of the patient"
    Total_Bilirubin: "Total bilirubin level"
    Direct_Bilirubin: "Direct bilirubin level"
    Alkaline_Phosphotase: "Alkaline phosphatase enzyme level"
    Alamine_Aminotransferase: "ALT enzyme level"
    Aspartate_Aminotransferase: "AST enzyme level"
    Total_Protiens: "Total proteins in blood"
    Albumin: "Albumin level"
    Albumin_and_Globulin_Ratio: "Ratio of albumin to globulin"
    Dataset: "Class label (1 = liver patient, 2 = not liver patient)"
1224_Online_Students_Performance_Evaluation:
  dataset_description: "Online Student Performance: Features related to academic background and personal info to predict performance."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1224_Online_Students_Performance_Evaluation.csv"
  feature_descriptions:
    gender: "Gender of the student"
    NationalITy: "Nationality"
    PlaceofBirth: "Place of birth"
    StageID: "Academic stage (e.g., High School)"
    GradeID: "Grade level"
    SectionID: "Section identifier"
    Topic: "Academic subject"
    Semester: "Semester (Fall/Spring)"
    Relation: "Student-parent relationship"
    raisedhands: "Number of times the student raised hands"
    VisITedResources: "Number of times the platform was visited"
    AnnouncementsView: "Number of times announcements were viewed"
    Discussion: "Number of discussion contributions"
    ParentAnsweringSurvey: "Parent answered school survey (Yes/No)"
    ParentschoolSatisfaction: "Parental satisfaction with school"
    StudentAbsenceDays: "Number of absence days"
    Class: "Final performance class (L, M, H)"
1225_facebook_live_sellers:
  dataset_description: "Facebook Live Sellers Dataset: Metrics from live video streams used to analyze viewer engagement."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1225_facebook_live_sellers.csv"
  feature_descriptions:
    status_id: "Unique identifier for the live status/video"
    status_type: "Type of status (e.g., live, photo)"
    num_reactions: "Total number of reactions"
    num_comments: "Total number of comments"
    num_shares: "Total number of shares"
    num_likes: "Number of likes"
    num_loves: "Number of love reactions"
    num_wows: "Number of wow reactions"
    num_hahas: "Number of haha reactions"
    num_sads: "Number of sad reactions"
    num_angrys: "Number of angry reactions"
    is_liked: "Binary flag indicating if the video was liked"
    is_loved: "Binary flag indicating if the video was loved"
    is_wowed: "Binary flag indicating if the video was wowed"
    is_haha: "Binary flag indicating if the video was found funny"
    is_sad: "Binary flag indicating if the video was found sad"
    is_angry: "Binary flag indicating if the video caused anger"
    is_engaged: "Target label indicating user engagement"
1226_beers_reviews:
  dataset_description: "Beer Reviews Dataset: Consumer reviews of beers with ratings and textual feedback."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1226_beers_reviews.csv"
  feature_descriptions:
    beer_name: "Name of the beer"
    brewery_id: "Unique ID of the brewery"
    review_time: "Timestamp of the review"
    review_overall: "Overall rating given by the reviewer"
    review_aroma: "Aroma rating"
    review_appearance: "Appearance rating"
    review_profilename: "User profile name"
    beer_style: "Style/type of beer (e.g., IPA, Lager)"
    review_palate: "Palate rating"
    review_taste: "Taste rating"
    beer_abv: "Alcohol by volume percentage"
1227_avocado:
  dataset_description: "Avocado Prices Dataset: Historical data on avocado sales across regions in the U.S."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1227_avocado.csv"
  feature_descriptions:
    Date: "Date of observation"
    AveragePrice: "Average price per avocado"
    Total Volume: "Total number of avocados sold"
    4046: "Volume of PLU 4046 avocados sold"
    4225: "Volume of PLU 4225 avocados sold"
    4770: "Volume of PLU 4770 avocados sold"
    Total Bags: "Total number of bags sold"
    Small Bags: "Number of small bags sold"
    Large Bags: "Number of large bags sold"
    XLarge Bags: "Number of extra-large bags sold"
    type: "Type of avocado (conventional or organic)"
    year: "Year of sale"
    region: "Region of sale"
1228_speedDating:
  dataset_description: "Speed Dating Experiment Dataset: Preferences and self-assessments from a Columbia University speed dating study."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1228_speedDating.csv"
  feature_descriptions:
    iid: "Participant ID"
    id: "Event ID"
    gender: "Gender of participant"
    age: "Age of participant"
    age_o: "Age of partner"
    race: "Race of participant"
    race_o: "Race of partner"
    goal: "Dating goal (e.g., fun, relationship)"
    date: "Interest in dating"
    go_out: "Frequency of going out"
    career_c: "Career code"
    field: "Field of study"
    interests_correlate: "Correlation between interests"
    attractiveness: "Self-rated attractiveness"
    sincere: "Self-rated sincerity"
    intelligence: "Self-rated intelligence"
    fun: "Self-rated fun level"
    ambition: "Self-rated ambition"
    shared_interests: "Number of shared interests"
    decision: "Whether the participant said yes"
1230_OnlineRetail:
  dataset_description: "Online Retail Transactions Dataset: Records of transactions from a UK-based online retail store."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1230_OnlineRetail.csv"
  feature_descriptions:
    InvoiceNo: "Invoice number for the transaction"
    StockCode: "Item code"
    Description: "Description of the product"
    Quantity: "Number of items purchased"
    InvoiceDate: "Date of invoice"
    UnitPrice: "Price per item"
    CustomerID: "Customer identifier"
    Country: "Country of customer"
1232_house_rocket:
  dataset_description: "House Rocket Real Estate Dataset: Housing attributes for predicting price in King County, WA."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1232_house_rocket.csv"
  feature_descriptions:
    id: "Unique house ID"
    date: "Date house was sold"
    price: "Sale price"
    bedrooms: "Number of bedrooms"
    bathrooms: "Number of bathrooms"
    sqft_living: "Square footage of interior"
    sqft_lot: "Square footage of lot"
    floors: "Number of floors"
    waterfront: "Whether the house has a waterfront view"
    view: "Quality of view"
    condition: "Condition of the house"
    grade: "Overall grade based on construction and design"
    sqft_above: "Square footage above ground"
    sqft_basement: "Square footage of basement"
    yr_built: "Year built"
    yr_renovated: "Year renovated"
    zipcode: "ZIP code"
    lat: "Latitude"
    long: "Longitude"
    sqft_living15: "Living area of the house in 2015"
    sqft_lot15: "Lot area of the house in 2015"
1233_dry_bean_dataset:
  dataset_description: "Dry Bean Dataset: Morphological features of different bean types."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1233_dry_bean_dataset.csv"
  feature_descriptions:
    Area: "Total bean area"
    Perimeter: "Perimeter of the bean"
    MajorAxisLength: "Major axis length of the bean shape"
    MinorAxisLength: "Minor axis length of the bean shape"
    AspectRation: "Aspect ratio = MajorAxis / MinorAxis"
    Eccentricity: "Eccentricity of the bean ellipse"
    ConvexArea: "Area of the convex hull"
    EquivDiameter: "Diameter of a circle with same area"
    Extent: "Extent (ratio of pixels in bounding box)"
    Solidity: "Solidity of the convex shape"
    roundness: "Measure of circularity"
    Compactness: "Shape compactness"
    ShapeFactor1 to ShapeFactor4: "Shape descriptors"
    Class: "Type of dry bean"
1234_traffic_volume:
  dataset_description: "Traffic Volume Dataset: Sensor data of vehicles on a highway in Minnesota for traffic forecasting."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1234_traffic_volume.csv"
  feature_descriptions:
    date_time: "Timestamp of the observation"
    temp: "Temperature in Kelvin"
    rain_1h: "Rainfall in last 1 hour (mm)"
    snow_1h: "Snowfall in last 1 hour (mm)"
    clouds_all: "Cloud coverage percentage"
    weather_main: "Main weather condition (e.g., Clear, Rain)"
    weather_description: "Detailed weather condition"
    holiday: "Whether the day is a holiday"
    traffic_volume: "Target variable - number of vehicles in an hour"
1235_bitcoin_price:
  dataset_description: "Bitcoin Price Dataset: Historical Bitcoin prices with other related metrics."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1235_bitcoin_price.csv"
  feature_descriptions:
    Date: "Date of record"
    Open: "Opening price"
    High: "Highest price during the day"
    Low: "Lowest price during the day"
    Close: "Closing price"
    Volume: "Volume of Bitcoin traded"
    Market Cap: "Total market capitalization"
1236_mobile_price:
  dataset_description: "Mobile Price Dataset: Specifications of mobile phones for predicting price range."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1236_mobile_price.csv"
  feature_descriptions:
    battery_power: "Total energy capacity of the battery"
    blue: "Bluetooth support (1 = yes, 0 = no)"
    clock_speed: "Speed at which microprocessor executes instructions"
    dual_sim: "Supports dual SIM (1 = yes, 0 = no)"
    fc: "Front camera megapixels"
    four_g: "4G support (1 = yes, 0 = no)"
    int_memory: "Internal memory (in GB)"
    m_dep: "Mobile depth in cm"
    mobile_wt: "Weight of mobile phone"
    n_cores: "Number of cores in processor"
    pc: "Primary camera megapixels"
    px_height: "Pixel resolution height"
    px_width: "Pixel resolution width"
    ram: "Random Access Memory (in MB)"
    sc_h: "Height of screen in cm"
    sc_w: "Width of screen in cm"
    talk_time: "Battery talk time in hours"
    three_g: "3G support (1 = yes, 0 = no)"
    touch_screen: "Touch screen support (1 = yes, 0 = no)"
    wifi: "WiFi support (1 = yes, 0 = no)"
    price_range: "Target price range (0: low, 3: high)"
1237_covid_symptoms:
  dataset_description: "COVID-19 Symptoms Dataset: Symptom indicators to predict COVID-19 likelihood."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1237_covid_symptoms.csv"
  feature_descriptions:
    Fever: "Presence of fever"
    Tiredness: "Presence of tiredness"
    Dry-Cough: "Presence of dry cough"
    Difficulty-in-Breathing: "Breathing difficulty indicator"
    Sore-Throat: "Presence of sore throat"
    Pains: "Body pain presence"
    Nasal-Congestion: "Presence of nasal congestion"
    Runny-Nose: "Runny nose presence"
    Diarrhea: "Diarrhea presence"
    None_Symptom: "No symptoms indicator"
    Age_0-9 to Age_80+: "Age group indicators"
    Gender_Male: "Male indicator"
    Gender_Female: "Female indicator"
    Contact_Did: "Had contact with COVID-positive individual"
    Contact_No: "No known contact"
    Contact_Dont-Know: "Uncertain contact status"
    Severity_Mild to Severity_Critical: "Severity of illness"
1239_used_car_prices:
  dataset_description: "Used Car Price Dataset: Attributes of used cars and their respective selling prices."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1239_used_car_prices.csv"
  feature_descriptions:
    name: "Name of the car"
    year: "Manufacture year"
    selling_price: "Selling price (target variable)"
    km_driven: "Distance driven in kilometers"
    fuel: "Type of fuel used"
    seller_type: "Type of seller (Individual/Dealer)"
    transmission: "Transmission type (Manual/Automatic)"
    owner: "Number of previous owners"
1240_airline_sentiment:
  dataset_description: "Airline Sentiment Dataset: Tweets about airlines labeled with sentiment polarity."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1240_airline_sentiment.csv"
  feature_descriptions:
    tweet_id: "Unique ID of the tweet"
    airline_sentiment: "Sentiment label (positive/neutral/negative)"
    airline: "Airline the tweet refers to"
    text: "Tweet content"
    tweet_location: "User-provided tweet location"
    user_timezone: "User's timezone"
1241_earthquake_damage:
  dataset_description: "Earthquake Building Damage Dataset: Features of buildings affected by earthquakes in Nepal."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1241_earthquake_damage.csv"
  feature_descriptions:
    building_id: "Unique building ID"
    district_id: "District location of building"
    year_built: "Year the building was built"
    age: "Age of building"
    area_percentage: "Percentage area of building footprint"
    height_percentage: "Height of the building as a percentage"
    land_surface_condition: "Condition of land surface"
    foundation_type: "Foundation material/type"
    roof_type: "Roof type"
    ground_floor_type: "Type of ground floor"
    other_floor_type: "Type of floors other than ground"
    position: "Relative position to neighboring buildings"
    plan_configuration: "Configuration of building floor plan"
    has_superstructure: "Presence of specific structural elements (e.g., concrete, wood)"
    damage_grade: "Damage level after earthquake (1=low, 3=severe)"
1241_earthquake_damage:
  dataset_description: "Earthquake Building Damage Dataset: Features of buildings affected by earthquakes in Nepal."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1241_earthquake_damage.csv"
  feature_descriptions:
    building_id: "Unique building ID"
    district_id: "District location of building"
    year_built: "Year the building was built"
    age: "Age of building"
    area_percentage: "Percentage area of building footprint"
    height_percentage: "Height of the building as a percentage"
    land_surface_condition: "Condition of land surface"
    foundation_type: "Foundation material/type"
    roof_type: "Roof type"
    ground_floor_type: "Type of ground floor"
    other_floor_type: "Type of floors other than ground"
    position: "Relative position to neighboring buildings"
    plan_configuration: "Configuration of building floor plan"
    has_superstructure: "Presence of specific structural elements (e.g., concrete, wood)"
    damage_grade: "Damage level after earthquake (1=low, 3=severe)"
1242_solar_energy:
  dataset_description: "Solar Energy Time Series Dataset: Hourly solar power output readings from photovoltaic plants."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1242_solar_energy.csv"
  feature_descriptions:
    timestamp: "Datetime of the reading"
    plant_id: "Unique plant identifier"
    solar_irradiance: "Amount of solar irradiance (W/m²)"
    temperature: "Ambient temperature (°C)"
    humidity: "Humidity percentage"
    wind_speed: "Wind speed (m/s)"
    power_output: "Power generated by the plant (target variable)"
1243_student_dropout:
  dataset_description: "Student Dropout Dataset: Academic and personal factors influencing dropout risk."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1243_student_dropout.csv"
  feature_descriptions:
    student_id: "Unique student identifier"
    gender: "Gender of student"
    age: "Age of student"
    marital_status: "Marital status of student"
    course: "Course enrolled"
    previous_qualification: "Academic qualification before enrollment"
    mother's_qualification: "Mother's education level"
    father's_qualification: "Father's education level"
    admission_grade: "Grade at admission"
    financial_support: "Whether student has financial support"
    scholarship: "Scholarship received (yes/no)"
    target: "Dropout status (1 = dropout, 0 = retained)"
1244_car_evaluation:
  dataset_description: "Car Evaluation Dataset: Categorical features to determine car acceptability."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1244_car_evaluation.csv"
  feature_descriptions:
    buying: "Buying price category (vhigh, high, med, low)"
    maint: "Maintenance cost (vhigh, high, med, low)"
    doors: "Number of doors (2, 3, 4, 5more)"
    persons: "Person capacity (2, 4, more)"
    lug_boot: "Luggage boot size (small, med, big)"
    safety: "Safety rating (low, med, high)"
    class: "Car acceptability class (unacc, acc, good, vgood)"
1245_energy_efficiency:
  dataset_description: "Energy Efficiency Dataset: Building parameters used to estimate heating and cooling loads."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1245_energy_efficiency.csv"
  feature_descriptions:
    Relative_Compactness: "Ratio of building volume to envelope area"
    Surface_Area: "External surface area of the building"
    Wall_Area: "Total area of exterior walls"
    Roof_Area: "Area of the roof"
    Overall_Height: "Overall height of the building"
    Orientation: "Orientation (1–4 representing N, E, S, W)"
    Glazing_Area: "Fraction of wall area covered by glazing"
    Glazing_Area_Distribution: "Glazing distribution (0–5)"
    Heating_Load: "Energy load required for heating (target)"
    Cooling_Load: "Energy load required for cooling (target)"
1246_heart_disease:
  dataset_description: "Heart Disease Dataset: Clinical and demographic features used to predict presence of heart disease."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1246_heart_disease.csv"
  feature_descriptions:
    age: "Age of the patient"
    sex: "Gender (1 = male; 0 = female)"
    cp: "Chest pain type (0–3)"
    trestbps: "Resting blood pressure (mm Hg)"
    chol: "Serum cholesterol (mg/dl)"
    fbs: "Fasting blood sugar > 120 mg/dl (1 = true; 0 = false)"
    restecg: "Resting electrocardiographic results (0–2)"
    thalach: "Maximum heart rate achieved"
    exang: "Exercise-induced angina (1 = yes; 0 = no)"
    oldpeak: "ST depression induced by exercise relative to rest"
    slope: "Slope of the peak exercise ST segment"
    ca: "Number of major vessels (0–3) colored by fluoroscopy"
    thal: "Thalassemia type (3 = normal; 6 = fixed defect; 7 = reversible defect)"
    target: "Presence of heart disease (1 = yes; 0 = no)"
1247_electricity_consumption:
  dataset_description: "Electricity Consumption Dataset: Power usage data for energy forecasting and load balancing."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1247_electricity_consumption.csv"
  feature_descriptions:
    timestamp: "Date and time of reading"
    building_id: "Unique identifier for the building"
    meter_type: "Type of meter (electricity, chilled water, steam, hot water)"
    meter_reading: "Power consumption reading (kWh or BTUs)"
    temperature: "Outdoor temperature at the time of reading (°C)"
    humidity: "Relative humidity (%)"
    wind_speed: "Wind speed (m/s)"
    cloud_coverage: "Cloud cover (oktas)"
1248_housing_prices:
  dataset_description: "Housing Prices Dataset: Real estate features for predicting house sale prices."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1248_housing_prices.csv"
  feature_descriptions:
    Id: "Unique house ID"
    MSSubClass: "Type of dwelling"
    MSZoning: "Zoning classification"
    LotArea: "Lot size (square feet)"
    Street: "Type of road access"
    Alley: "Type of alley access (if any)"
    LotShape: "General shape of property"
    LandContour: "Flatness of the property"
    Utilities: "Type of utilities available"
    Neighborhood: "Physical locations within Ames city limits"
    OverallQual: "Overall material and finish quality (1–10)"
    YearBuilt: "Year the house was built"
    YearRemodAdd: "Year of remodel or addition"
    GrLivArea: "Above ground living area (square feet)"
    GarageCars: "Size of garage (number of cars)"
    GarageArea: "Garage area (square feet)"
    SalePrice: "Target sale price"
1249_credit_card_default:
  dataset_description: "Credit Card Default Dataset: Client financial attributes and repayment behavior for default prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1249_credit_card_default.csv"
  feature_descriptions:
    ID: "Client ID"
    LIMIT_BAL: "Credit limit (NT dollars)"
    SEX: "Gender (1 = male; 2 = female)"
    EDUCATION: "Education level (1 = graduate school; 2 = university; etc.)"
    MARRIAGE: "Marital status (1 = married; 2 = single; etc.)"
    AGE: "Age in years"
    PAY_0 to PAY_6: "Repayment status for past months"
    BILL_AMT1 to BILL_AMT6: "Bill statement amounts"
    PAY_AMT1 to PAY_AMT6: "Payment amounts"
    default_payment_next_month: "Default occurrence (1 = yes; 0 = no)"
1249_credit_card_default:
  dataset_description: "Credit Card Default Dataset: Client financial attributes and repayment behavior for default prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1249_credit_card_default.csv"
  feature_descriptions:
    ID: "Client ID"
    LIMIT_BAL: "Credit limit (NT dollars)"
    SEX: "Gender (1 = male; 2 = female)"
    EDUCATION: "Education level (1 = graduate school; 2 = university; etc.)"
    MARRIAGE: "Marital status (1 = married; 2 = single; etc.)"
    AGE: "Age in years"
    PAY_0 to PAY_6: "Repayment status for past months"
    BILL_AMT1 to BILL_AMT6: "Bill statement amounts"
    PAY_AMT1 to PAY_AMT6: "Payment amounts"
    default_payment_next_month: "Default occurrence (1 = yes; 0 = no)"
1251_wine_quality:
  dataset_description: "Wine Quality Dataset: Physicochemical properties of wine with quality scores."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1251_wine_quality.csv"
  feature_descriptions:
    fixed acidity: "Tartaric acid content"
    volatile acidity: "Acetic acid content"
    citric acid: "Citric acid concentration"
    residual sugar: "Amount of sugar after fermentation"
    chlorides: "Salt content"
    free sulfur dioxide: "SO₂ not bound to other molecules"
    total sulfur dioxide: "Total SO₂ concentration"
    density: "Density of the wine"
    pH: "Acidity level (pH scale)"
    sulphates: "Sulphate content"
    alcohol: "Alcohol percentage"
    quality: "Quality score (target variable)"
1252_mushroom_classification:
  dataset_description: "Mushroom Classification Dataset: Features of mushrooms used to classify as edible or poisonous."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1252_mushroom_classification.csv"
  feature_descriptions:
    cap-shape: "Shape of the mushroom cap"
    cap-surface: "Surface texture of the cap"
    cap-color: "Color of the cap"
    bruises: "Presence of bruises"
    odor: "Mushroom odor"
    gill-attachment: "Gill attachment type"
    gill-spacing: "Spacing of gills"
    gill-size: "Size of gills"
    gill-color: "Color of gills"
    stalk-shape: "Shape of the stalk"
    stalk-root: "Type of stalk root"
    stalk-surface-above-ring: "Surface texture above ring"
    stalk-surface-below-ring: "Surface texture below ring"
    stalk-color-above-ring: "Color above ring"
    stalk-color-below-ring: "Color below ring"
    veil-type: "Type of veil"
    veil-color: "Color of veil"
    ring-number: "Number of rings"
    ring-type: "Type of ring"
    spore-print-color: "Spore print color"
    population: "Estimated population"
    habitat: "Habitat type"
    class: "Edible or poisonous (target)"
1253_diabetes_health_indicators:
  dataset_description: "Diabetes Health Indicators Dataset: Lifestyle and physiological indicators predicting diabetes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1253_diabetes_health_indicators.csv"
  feature_descriptions:
    BMI: "Body Mass Index"
    Smoking: "Smoker (1 = yes, 0 = no)"
    AlcoholDrinking: "Heavy alcohol consumption (1 = yes, 0 = no)"
    Stroke: "History of stroke (1 = yes, 0 = no)"
    PhysicalHealth: "Days of poor physical health"
    MentalHealth: "Days of poor mental health"
    DiffWalking: "Difficulty walking (1 = yes, 0 = no)"
    Sex: "Gender (1 = male, 0 = female)"
    AgeCategory: "Age group category"
    Race: "Race/ethnicity"
    Diabetic: "Diabetic status"
    PhysicalActivity: "Engaged in physical activity (1 = yes, 0 = no)"
    GenHealth: "General health (ordinal)"
    SleepTime: "Average hours of sleep"
    Asthma: "Asthma diagnosis (1 = yes, 0 = no)"
    KidneyDisease: "Chronic kidney disease (1 = yes, 0 = no)"
    SkinCancer: "History of skin cancer (1 = yes, 0 = no)"
1254_power_plant:
  dataset_description: "Combined Cycle Power Plant Dataset: Environmental variables used to predict energy output."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1254_power_plant.csv"
  feature_descriptions:
    AT: "Ambient temperature (°C)"
    V: "Exhaust vacuum (cm Hg)"
    AP: "Ambient pressure (millibar)"
    RH: "Relative humidity (%)"
    PE: "Net hourly electrical energy output (target, MW)"
1255_spotify_songs:
  dataset_description: "Spotify Songs Dataset: Audio features and metadata for tracks available on Spotify."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1255_spotify_songs.csv"
  feature_descriptions:
    track_name: "Name of the track"
    artist_name: "Name of the artist"
    genre: "Genre classification"
    popularity: "Popularity score (0–100)"
    danceability: "Danceability score (0–1)"
    energy: "Energy level of the song (0–1)"
    loudness: "Loudness in decibels"
    speechiness: "Speech content (0–1)"
    acousticness: "Likelihood of being acoustic (0–1)"
    instrumentalness: "Likelihood of being instrumental (0–1)"
    liveness: "Presence of live audience (0–1)"
    valence: "Musical positivity (0–1)"
    tempo: "Tempo in BPM"
    duration_ms: "Duration in milliseconds"
    explicit: "Explicit content flag (1 = yes, 0 = no)"
1256_student_dropout:
  dataset_description: "Student Dropout Dataset: Socioeconomic and academic indicators predicting student retention."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1256_student_dropout.csv"
  feature_descriptions:
    Marital_status: "Marital status of student"
    Application_mode: "Mode of application"
    Application_order: "Order of application"
    Course: "Course attended"
    Daytime_evening_attendance: "Type of attendance (day/evening)"
    Previous_qualification: "Previous qualification level"
    Mother's_qualification: "Mother's education level"
    Father's_qualification: "Father's education level"
    Mother's_occupation: "Mother's occupation"
    Father's_occupation: "Father's occupation"
    Admission_grade: "Grade at time of admission"
    Displaced: "Displacement status"
    Educational_special_needs: "Whether student has special needs"
    Debtor: "Student has outstanding debts"
    Tuition_fees_up_to_date: "Tuition fees paid on time"
    Gender: "Gender of student"
    Scholarship_holder: "Has scholarship"
    Age_at_enrollment: "Age at time of enrollment"
    International: "International student flag"
    Curricular_units_1st_sem_enrolled: "Units enrolled in 1st semester"
    Curricular_units_1st_sem_approved: "Units approved in 1st semester"
    Curricular_units_1st_sem_grade: "Average grade in 1st semester"
    Curricular_units_2nd_sem_enrolled: "Units enrolled in 2nd semester"
    Curricular_units_2nd_sem_approved: "Units approved in 2nd semester"
    Curricular_units_2nd_sem_grade: "Average grade in 2nd semester"
    Unemployment_rate: "Unemployment rate at enrollment"
    Inflation_rate: "Inflation rate at enrollment"
    GDP: "GDP at enrollment year"
    Target: "Dropout/Graduate/Enrolled"
1257_nutrition_values:
  dataset_description: "Nutrition Facts Dataset: Nutrient values per 100g of various food products."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1257_nutrition_values.csv"
  feature_descriptions:
    Product: "Food product name"
    Calories: "Energy in kilocalories"
    Total Fat: "Total fat content (g)"
    Saturated Fat: "Saturated fat content (g)"
    Trans Fat: "Trans fat content (g)"
    Cholesterol: "Cholesterol content (mg)"
    Sodium: "Sodium content (mg)"
    Potassium: "Potassium content (mg)"
    Total Carbohydrate: "Total carbohydrates (g)"
    Dietary Fiber: "Dietary fiber (g)"
    Sugars: "Sugar content (g)"
    Protein: "Protein content (g)"
    Vitamin A: "Vitamin A content (IU)"
    Vitamin C: "Vitamin C content (mg)"
    Calcium: "Calcium content (mg)"
    Iron: "Iron content (mg)"
1258_forest_fire:
  dataset_description: "Forest Fires Dataset: Meteorological and temporal features predicting the burned area of forest fires in Portugal."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1258_forest_fire.csv"
  feature_descriptions:
    X: "X-axis spatial coordinate (1 to 9)"
    Y: "Y-axis spatial coordinate (2 to 9)"
    month: "Month of the year"
    day: "Day of the week"
    FFMC: "Fine Fuel Moisture Code"
    DMC: "Duff Moisture Code"
    DC: "Drought Code"
    ISI: "Initial Spread Index"
    temp: "Temperature in Celsius"
    RH: "Relative Humidity (%)"
    wind: "Wind speed (km/h)"
    rain: "Rainfall (mm)"
    area: "Burned area (ha, target)"
1259_fifa_players:
  dataset_description: "FIFA Players Dataset: Attributes and metadata for professional football players."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1259_fifa_players.csv"
  feature_descriptions:
    Name: "Player name"
    Age: "Player age"
    Nationality: "Country of origin"
    Overall: "Overall rating"
    Potential: "Potential rating"
    Club: "Club affiliation"
    Value: "Market value"
    Wage: "Weekly wage"
    Preferred Foot: "Dominant foot"
    Position: "Primary playing position"
    Height: "Height (cm)"
    Weight: "Weight (kg)"
    Acceleration: "Acceleration rating"
    SprintSpeed: "Sprint speed rating"
    Finishing: "Finishing skill rating"
    Dribbling: "Dribbling skill rating"
    Strength: "Physical strength rating"
    Composure: "Composure rating"
    GKReflexes: "Goalkeeper reflexes"
1260_bike_sharing:
  dataset_description: "Bike Sharing Dataset: Hourly and daily bike rental data with weather and time features."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1260_bike_sharing.csv"
  feature_descriptions:
    instant: "Record index"
    dteday: "Date"
    season: "Season (1:spring, 2:summer, 3:fall, 4:winter)"
    yr: "Year (0: 2011, 1: 2012)"
    mnth: "Month (1 to 12)"
    hr: "Hour (0 to 23)"
    holiday: "Holiday flag"
    weekday: "Day of the week"
    workingday: "Working day flag"
    weathersit: "Weather situation (1 to 4)"
    temp: "Normalized temperature"
    atemp: "Normalized feeling temperature"
    hum: "Normalized humidity"
    windspeed: "Normalized wind speed"
    casual: "Count of casual users"
    registered: "Count of registered users"
    cnt: "Total count of users (target)"
1261_churn_modelling:
  dataset_description: "Bank Customer Churn Dataset: Customer demographics and banking activity to predict churn."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1261_churn_modelling.csv"
  feature_descriptions:
    RowNumber: "Index of customer"
    CustomerId: "Unique customer ID"
    Surname: "Customer surname"
    CreditScore: "Credit score"
    Geography: "Customer's country"
    Gender: "Customer gender"
    Age: "Customer age"
    Tenure: "Years of account tenure"
    Balance: "Account balance"
    NumOfProducts: "Number of bank products"
    HasCrCard: "Has credit card (1: yes, 0: no)"
    IsActiveMember: "Active account status"
    EstimatedSalary: "Estimated yearly salary"
    Exited: "Churn flag (1 = left, 0 = stayed)"
1261_churn_modelling:
  dataset_description: "Bank Customer Churn Dataset: Customer demographics and banking activity to predict churn."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1261_churn_modelling.csv"
  feature_descriptions:
    RowNumber: "Index of customer"
    CustomerId: "Unique customer ID"
    Surname: "Customer surname"
    CreditScore: "Credit score"
    Geography: "Customer's country"
    Gender: "Customer gender"
    Age: "Customer age"
    Tenure: "Years of account tenure"
    Balance: "Account balance"
    NumOfProducts: "Number of bank products"
    HasCrCard: "Has credit card (1: yes, 0: no)"
    IsActiveMember: "Active account status"
    EstimatedSalary: "Estimated yearly salary"
    Exited: "Churn flag (1 = left, 0 = stayed)"
1262_flight_delays:
  dataset_description: "Flight Delays Dataset: Features describing commercial flights and their delay status."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1262_flight_delays.csv"
  feature_descriptions:
    Airline: "Name of the airline carrier"
    Flight: "Flight number"
    AirportFrom: "Origin airport code"
    AirportTo: "Destination airport code"
    DayOfWeek: "Day of the week (1–7)"
    Time: "Scheduled departure time"
    Length: "Flight duration"
    Delay: "Delay label (1 = delayed, 0 = on time)"
1263_mall_customers:
  dataset_description: "Mall Customers Dataset: Customer demographic and spending behavior for segmentation."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1263_mall_customers.csv"
  feature_descriptions:
    CustomerID: "Unique ID for each customer"
    Gender: "Customer gender"
    Age: "Customer age"
    Annual Income (k$): "Estimated annual income in thousand dollars"
    Spending Score (1-100): "Spending score based on income and behavior"
1264_telecom_churn:
  dataset_description: "Telecom Churn Dataset: Customer details for predicting churn in telecom services."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1264_telecom_churn.csv"
  feature_descriptions:
    customerID: "Customer identifier"
    gender: "Customer gender"
    SeniorCitizen: "Flag indicating if customer is senior (1=yes)"
    Partner: "Has a partner"
    Dependents: "Has dependents"
    tenure: "Number of months with the company"
    PhoneService: "Has phone service"
    MultipleLines: "Has multiple lines"
    InternetService: "Type of internet service"
    OnlineSecurity: "Online security service subscription"
    OnlineBackup: "Online backup service"
    DeviceProtection: "Device protection service"
    TechSupport: "Tech support service"
    StreamingTV: "Subscribed to streaming TV"
    StreamingMovies: "Subscribed to streaming movies"
    Contract: "Contract type"
    PaperlessBilling: "Paperless billing flag"
    PaymentMethod: "Payment method used"
    MonthlyCharges: "Monthly charge amount"
    TotalCharges: "Total charges accumulated"
    Churn: "Whether the customer churned (Yes/No)"
1265_real_estate:
  dataset_description: "Real Estate Price Dataset: Housing features for predicting property price in Taiwan."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1265_real_estate.csv"
  feature_descriptions:
    X1 transaction date: "Date of the transaction"
    X2 house age: "Age of the house (years)"
    X3 distance to the nearest MRT station: "Distance to the nearest public transit station (meters)"
    X4 number of convenience stores: "Number of nearby convenience stores"
    X5 latitude: "Latitude of the house location"
    X6 longitude: "Longitude of the house location"
    Y house price of unit area: "House price per square meter (target)"
1266_titanic:
  dataset_description: "Titanic Survival Dataset: Passenger information used to predict survival outcomes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1266_titanic.csv"
  feature_descriptions:
    PassengerId: "Unique identifier for each passenger"
    Survived: "Survival indicator (0 = No, 1 = Yes)"
    Pclass: "Ticket class (1 = 1st, 2 = 2nd, 3 = 3rd)"
    Name: "Full name of the passenger"
    Sex: "Gender"
    Age: "Age in years"
    SibSp: "Number of siblings/spouses aboard"
    Parch: "Number of parents/children aboard"
    Ticket: "Ticket number"
    Fare: "Passenger fare"
    Cabin: "Cabin number"
    Embarked: "Port of embarkation (C = Cherbourg; Q = Queenstown; S = Southampton)"
1267_heart_disease:
  dataset_description: "Heart Disease Dataset: Medical features used to predict the presence of heart disease."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1267_heart_disease.csv"
  feature_descriptions:
    age: "Age in years"
    sex: "Sex (1 = male, 0 = female)"
    cp: "Chest pain type (0 to 3)"
    trestbps: "Resting blood pressure (mm Hg)"
    chol: "Serum cholesterol (mg/dl)"
    fbs: "Fasting blood sugar > 120 mg/dl (1 = true; 0 = false)"
    restecg: "Resting electrocardiographic results (0 to 2)"
    thalach: "Maximum heart rate achieved"
    exang: "Exercise-induced angina (1 = yes; 0 = no)"
    oldpeak: "ST depression induced by exercise"
    slope: "Slope of the peak exercise ST segment"
    ca: "Number of major vessels (0–3) colored by fluoroscopy"
    thal: "Thalassemia status (3 = normal; 6 = fixed defect; 7 = reversible defect)"
    target: "Presence of heart disease (1 = yes; 0 = no)"
1268_air_quality:
  dataset_description: "Air Quality Dataset: Daily measurements of air pollution indicators."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1268_air_quality.csv"
  feature_descriptions:
    Date: "Measurement date"
    Time: "Measurement time"
    CO(GT): "Carbon monoxide concentration (mg/m^3)"
    PT08.S1(CO): "Tin oxide sensor from micro gas sensor"
    NMHC(GT): "Non-methane hydrocarbons (ug/m^3)"
    C6H6(GT): "Benzene concentration (ug/m^3)"
    PT08.S2(NMHC): "Titania sensor from micro gas sensor"
    NOx(GT): "Nitrogen oxides concentration (ppb)"
    PT08.S3(NOx): "Tungsten oxide sensor from micro gas sensor"
    NO2(GT): "Nitrogen dioxide concentration (ug/m^3)"
    PT08.S4(NO2): "Indium oxide sensor from micro gas sensor"
    PT08.S5(O3): "Oxide sensor from micro gas sensor"
    T: "Temperature (°C)"
    RH: "Relative humidity (%)"
    AH: "Absolute humidity"
1269_house_prices:
  dataset_description: "House Prices Dataset: Detailed housing attributes for price prediction (Kaggle competition)."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1269_house_prices.csv"
  feature_descriptions:
    Id: "Unique identifier for each house"
    MSSubClass: "The building class"
    MSZoning: "The general zoning classification"
    LotFrontage: "Linear feet of street connected to property"
    LotArea: "Lot size in square feet"
    Street: "Type of road access"
    Alley: "Type of alley access"
    LotShape: "General shape of property"
    LandContour: "Flatness of the property"
    Utilities: "Type of utilities available"
    Neighborhood: "Physical locations within Ames city"
    OverallQual: "Overall material and finish quality"
    OverallCond: "Overall condition rating"
    YearBuilt: "Original construction date"
    YearRemodAdd: "Remodel date"
    RoofStyle: "Type of roof"
    Exterior1st: "Exterior covering on house"
    MasVnrType: "Masonry veneer type"
    BsmtQual: "Height of the basement"
    TotalBsmtSF: "Total square feet of basement area"
    GrLivArea: "Above grade (ground) living area square feet"
    FullBath: "Full bathrooms above grade"
    TotRmsAbvGrd: "Total rooms above grade"
    GarageCars: "Size of garage in car capacity"
    GarageArea: "Size of garage in square feet"
    SalePrice: "Property sale price (target)"
1270_online_shoppers_intention:
  dataset_description: "Online Shoppers Intention Dataset: Browsing session behavior to predict purchase intent."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1270_online_shoppers_intention.csv"
  feature_descriptions:
    Administrative: "Number of administrative pages visited"
    Administrative_Duration: "Time spent on administrative pages"
    Informational: "Number of informational pages visited"
    Informational_Duration: "Time spent on informational pages"
    ProductRelated: "Number of product-related pages visited"
    ProductRelated_Duration: "Time spent on product-related pages"
    BounceRates: "Average bounce rate per page"
    ExitRates: "Average exit rate per page"
    PageValues: "Page value (custom metric)"
    SpecialDay: "Proximity to special days"
    Month: "Month of the visit"
    OperatingSystems: "Visitor's OS"
    Browser: "Browser used"
    Region: "Geographical region"
    TrafficType: "Source of traffic"
    VisitorType: "New vs. returning visitor"
    Weekend: "Visit occurred on weekend"
    Revenue: "Whether a purchase occurred (target)"
1271_student_performance:
  dataset_description: "Student Performance Dataset: Academic and demographic features to predict exam scores."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1271_student_performance.csv"
  feature_descriptions:
    gender: "Gender of student"
    race/ethnicity: "Race/ethnicity group"
    parental level of education: "Highest education level of parents"
    lunch: "Standard or free/reduced lunch"
    test preparation course: "Completed test prep course"
    math score: "Math exam score"
    reading score: "Reading exam score"
    writing score: "Writing exam score"
1272_wine_quality:
  dataset_description: "Wine Quality Dataset: Physicochemical properties of wine used to predict quality ratings."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1272_wine_quality.csv"
  feature_descriptions:
    fixed acidity: "Level of fixed acids like tartaric acid"
    volatile acidity: "Level of acetic acid in wine"
    citric acid: "Concentration of citric acid"
    residual sugar: "Amount of sugar left after fermentation"
    chlorides: "Amount of salt in the wine"
    free sulfur dioxide: "Free form of SO2 in wine"
    total sulfur dioxide: "Total SO2 (free + bound) in wine"
    density: "Density of the wine"
    pH: "Acidity or basicity of the wine"
    sulphates: "Sulphate content acting as a preservative"
    alcohol: "Alcohol content percentage"
    quality: "Quality rating (score from human tasters)"
1273_pima_diabetes:
  dataset_description: "Pima Indians Diabetes Dataset: Health metrics used to diagnose diabetes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1273_pima_diabetes.csv"
  feature_descriptions:
    Pregnancies: "Number of times pregnant"
    Glucose: "Plasma glucose concentration"
    BloodPressure: "Diastolic blood pressure (mm Hg)"
    SkinThickness: "Triceps skin fold thickness (mm)"
    Insulin: "2-Hour serum insulin (mu U/ml)"
    BMI: "Body mass index (weight in kg/(height in m)^2)"
    DiabetesPedigreeFunction: "Diabetes pedigree function"
    Age: "Age in years"
    Outcome: "Diabetes diagnosis (1 = positive, 0 = negative)"
1274_credit_card_default:
  dataset_description: "Credit Card Default Dataset: Predicting probability of default using payment history."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1274_credit_card_default.csv"
  feature_descriptions:
    ID: "Customer ID"
    LIMIT_BAL: "Amount of given credit (NT dollar)"
    SEX: "Gender (1 = male, 2 = female)"
    EDUCATION: "Education level (1 = graduate school, etc.)"
    MARRIAGE: "Marital status (1 = married, etc.)"
    AGE: "Age in years"
    PAY_0 to PAY_6: "Repayment status from April to September"
    BILL_AMT1 to BILL_AMT6: "Bill statement amount from April to September"
    PAY_AMT1 to PAY_AMT6: "Amount paid in previous months"
    default.payment.next.month: "Whether the customer defaulted"
1275_forest_fire:
  dataset_description: "Forest Fire Dataset: Meteorological conditions used to estimate area affected by fire."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1275_forest_fire.csv"
  feature_descriptions:
    X: "X-axis spatial coordinate within the park"
    Y: "Y-axis spatial coordinate within the park"
    month: "Month of the year"
    day: "Day of the week"
    FFMC: "Fine Fuel Moisture Code"
    DMC: "Duff Moisture Code"
    DC: "Drought Code"
    ISI: "Initial Spread Index"
    temp: "Temperature in Celsius"
    RH: "Relative humidity (%)"
    wind: "Wind speed (km/h)"
    rain: "Rainfall (mm/m^2)"
    area: "Area affected by fire (hectares)"
1276_abalone:
  dataset_description: "Abalone Dataset: Physical measurements used to predict age of abalone (via number of rings)."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1276_abalone.csv"
  feature_descriptions:
    Sex: "Gender of abalone (M, F, or I)"
    Length: "Longest shell measurement (mm)"
    Diameter: "Perpendicular to length (mm)"
    Height: "With meat in shell (mm)"
    WholeWeight: "Weight of whole abalone (g)"
    ShuckedWeight: "Weight of meat (g)"
    VisceraWeight: "Gut weight (g)"
    ShellWeight: "After drying (g)"
    Rings: "Number of rings (age = Rings + 1.5)"
1277_energy_efficiency:
  dataset_description: "Energy Efficiency Dataset: Predicting heating and cooling loads from building characteristics."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1277_energy_efficiency.csv"
  feature_descriptions:
    Relative_Compactness: "Compactness ratio of the building"
    Surface_Area: "Total surface area of the building"
    Wall_Area: "Area of walls"
    Roof_Area: "Area of the roof"
    Overall_Height: "Building height"
    Orientation: "Orientation of the building"
    Glazing_Area: "Area of glazed surfaces (windows)"
    Glazing_Area_Distribution: "Distribution of glazing areas"
    Heating_Load: "Heating energy load (target)"
    Cooling_Load: "Cooling energy load (target)"
1278_taiwanese_credit:
  dataset_description: "Taiwanese Credit Card Clients: Demographics and financial behavior for default prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1278_taiwanese_credit.csv"
  feature_descriptions:
    LIMIT_BAL: "Amount of credit provided"
    SEX: "Gender (1 = male, 2 = female)"
    EDUCATION: "Education level (1 = graduate school, etc.)"
    MARRIAGE: "Marital status (1 = married, etc.)"
    AGE: "Age in years"
    PAY_0 to PAY_6: "History of past monthly payment status"
    BILL_AMT1 to BILL_AMT6: "Bill amount over six months"
    PAY_AMT1 to PAY_AMT6: "Payment amount over six months"
    default_payment_next_month: "Default indicator"
1279_fatal_police_shootings:
  dataset_description: "Fatal Police Shootings: Records of individuals shot by police with demographic and situational data."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1279_fatal_police_shootings.csv"
  feature_descriptions:
    id: "Unique shooting incident ID"
    name: "Name of individual"
    date: "Date of incident"
    manner_of_death: "Whether person was shot or shot and Tasered"
    armed: "Whether the person was armed"
    age: "Age of the individual"
    gender: "Gender"
    race: "Race"
    city: "City where incident occurred"
    state: "State where incident occurred"
    signs_of_mental_illness: "Whether signs of mental illness were present"
    threat_level: "Level of threat perceived"
    flee: "Whether the person tried to flee"
    body_camera: "Was a body camera in use"
1280_telecom_churn:
  dataset_description: "Telecom Churn Dataset: Customer usage patterns and service metrics to predict churn."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1280_telecom_churn.csv"
  feature_descriptions:
    state: "Customer's U.S. state"
    account_length: "Number of days since account opened"
    area_code: "Area code"
    international_plan: "Whether the customer has an international plan"
    voice_mail_plan: "Whether the customer has a voicemail plan"
    number_vmail_messages: "Number of voicemail messages"
    total_day_minutes: "Total daytime call minutes"
    total_day_calls: "Total number of daytime calls"
    total_day_charge: "Total charge for daytime calls"
    total_eve_minutes: "Total evening call minutes"
    total_eve_calls: "Total number of evening calls"
    total_eve_charge: "Total charge for evening calls"
    total_night_minutes: "Total night call minutes"
    total_night_calls: "Total number of night calls"
    total_night_charge: "Total charge for night calls"
    total_intl_minutes: "Total international call minutes"
    total_intl_calls: "Total number of international calls"
    total_intl_charge: "Total charge for international calls"
    number_customer_service_calls: "Customer service interactions"
    churn: "Whether customer churned"
1281_car_evaluation:
  dataset_description: "Car Evaluation Dataset: Features like price, maintenance, and safety to classify car acceptability."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1281_car_evaluation.csv"
  feature_descriptions:
    buying: "Price of the car"
    maint: "Maintenance cost"
    doors: "Number of doors"
    persons: "Seating capacity"
    lug_boot: "Size of luggage boot"
    safety: "Safety level"
    class: "Overall car acceptability (unacc, acc, good, vgood)"
1282_bike_sharing:
  dataset_description: "Bike Sharing Dataset: Hourly and daily bike rental data with weather and seasonal attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1282_bike_sharing.csv"
  feature_descriptions:
    season: "Season (1:spring, 2:summer, etc.)"
    yr: "Year (0 = 2011, 1 = 2012)"
    mnth: "Month (1 to 12)"
    hr: "Hour (0 to 23)"
    holiday: "Whether that day is a holiday"
    weekday: "Day of the week"
    workingday: "Is it a working day"
    weathersit: "Categorical weather situation"
    temp: "Normalized temperature"
    atemp: "Normalized feeling temperature"
    hum: "Normalized humidity"
    windspeed: "Normalized wind speed"
    casual: "Number of casual users"
    registered: "Number of registered users"
    cnt: "Total count of users (target)"
1283_adult_income:
  dataset_description: "Adult Income Dataset: Demographic attributes to predict whether income exceeds $50K/year."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1283_adult_income.csv"
  feature_descriptions:
    age: "Age of individual"
    workclass: "Type of employment"
    fnlwgt: "Final weight (census sampling)"
    education: "Education level"
    education-num: "Numeric representation of education"
    marital-status: "Marital status"
    occupation: "Occupation type"
    relationship: "Relationship status"
    race: "Race"
    sex: "Gender"
    capital-gain: "Capital gain"
    capital-loss: "Capital loss"
    hours-per-week: "Work hours per week"
    native-country: "Country of origin"
    income: "Income bracket (>50K or <=50K)"
1284_heart_disease:
  dataset_description: "Heart Disease Dataset: Medical measurements used to diagnose presence of heart disease."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1284_heart_disease.csv"
  feature_descriptions:
    age: "Age in years"
    sex: "Sex (1 = male; 0 = female)"
    cp: "Chest pain type (0-3)"
    trestbps: "Resting blood pressure (mm Hg)"
    chol: "Serum cholesterol (mg/dl)"
    fbs: "Fasting blood sugar > 120 mg/dl (1 = true; 0 = false)"
    restecg: "Resting electrocardiographic results (0-2)"
    thalach: "Maximum heart rate achieved"
    exang: "Exercise-induced angina (1 = yes; 0 = no)"
    oldpeak: "ST depression induced by exercise"
    slope: "Slope of the peak exercise ST segment"
    ca: "Number of major vessels (0-3) colored by fluoroscopy"
    thal: "Thalassemia status (1 = normal; 2 = fixed defect; 3 = reversible defect)"
    target: "Presence of heart disease (1 = yes; 0 = no)"
1285_customer_personality:
  dataset_description: "Customer Personality Analysis: Demographics and spending behavior for segmentation."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1285_customer_personality.csv"
  feature_descriptions:
    ID: "Customer ID"
    Year_Birth: "Year of birth"
    Education: "Level of education"
    Marital_Status: "Marital status"
    Income: "Household income"
    Kidhome: "Number of children at home"
    Teenhome: "Number of teenagers at home"
    Dt_Customer: "Customer enrollment date"
    Recency: "Days since last purchase"
    MntWines: "Amount spent on wine"
    MntFruits: "Amount spent on fruits"
    MntMeatProducts: "Amount spent on meat"
    MntFishProducts: "Amount spent on fish"
    MntSweetProducts: "Amount spent on sweets"
    MntGoldProds: "Amount spent on gold products"
    NumDealsPurchases: "Number of purchases made with discount"
    NumWebPurchases: "Number of purchases through website"
    NumCatalogPurchases: "Number of purchases made using catalog"
    NumStorePurchases: "Number of purchases made in store"
    NumWebVisitsMonth: "Number of website visits in last month"
    AcceptedCmp1–5: "Response to previous campaigns"
    Complain: "Customer complained (1 = yes)"
    Response: "Response to latest campaign"
1286_boston_housing:
  dataset_description: "Boston Housing Dataset: Various housing attributes to predict median house value."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1286_boston_housing.csv"
  feature_descriptions:
    CRIM: "Per capita crime rate by town"
    ZN: "Proportion of residential land zoned for lots over 25,000 sq.ft."
    INDUS: "Proportion of non-retail business acres per town"
    CHAS: "Charles River dummy variable (= 1 if tract bounds river)"
    NOX: "Nitric oxides concentration (parts per 10 million)"
    RM: "Average number of rooms per dwelling"
    AGE: "Proportion of owner-occupied units built prior to 1940"
    DIS: "Weighted distances to five Boston employment centers"
    RAD: "Index of accessibility to radial highways"
    TAX: "Full-value property tax rate per $10,000"
    PTRATIO: "Pupil-teacher ratio by town"
    B: "1000(Bk - 0.63)^2 where Bk is the proportion of Black residents"
    LSTAT: "% lower status of the population"
    MEDV: "Median value of owner-occupied homes in $1000s (target)"
1287_marketing_campaign:
  dataset_description: "Marketing Campaign Dataset: Customer features and historical responses to previous campaigns."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1287_marketing_campaign.csv"
  feature_descriptions:
    age: "Age of customer"
    job: "Type of job (e.g., admin, technician)"
    marital: "Marital status (married, single, divorced)"
    education: "Education level"
    default: "Has credit in default (yes/no)"
    housing: "Has housing loan (yes/no)"
    loan: "Has personal loan (yes/no)"
    contact: "Contact communication type (cellular, telephone)"
    month: "Last contact month of year"
    day_of_week: "Last contact day of the week"
    duration: "Last contact duration in seconds"
    campaign: "Number of contacts performed during this campaign"
    pdays: "Days since last contact (999 means client was not previously contacted)"
    previous: "Number of contacts performed before this campaign"
    poutcome: "Outcome of the previous marketing campaign"
    emp_var_rate: "Employment variation rate"
    cons_price_idx: "Consumer price index"
    cons_conf_idx: "Consumer confidence index"
    euribor3m: "Euribor 3-month rate"
    nr_employed: "Number of employees"
    y: "Has the client subscribed to a term deposit?"
1288_clinical_trial_data:
  dataset_description: "Clinical Trial Dataset: Demographic and health indicators from a clinical study."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1288_clinical_trial_data.csv"
  feature_descriptions:
    subject_id: "Unique subject ID"
    age: "Age of subject"
    sex: "Biological sex of subject"
    bmi: "Body Mass Index"
    systolic_bp: "Systolic blood pressure"
    diastolic_bp: "Diastolic blood pressure"
    cholesterol: "Cholesterol level"
    glucose: "Fasting glucose level"
    smoking_status: "Current smoker status"
    medication: "Medication assigned during trial"
    outcome: "Trial outcome (e.g., success, failure)"
1289_flight_delay_prediction:
  dataset_description: "Flight Delay Dataset: Flight and weather features used to predict delays."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1289_flight_delay_prediction.csv"
  feature_descriptions:
    flight_number: "Unique flight identifier"
    airline: "Airline carrier code"
    origin_airport: "Origin airport"
    destination_airport: "Destination airport"
    scheduled_departure: "Scheduled departure time"
    actual_departure: "Actual departure time"
    departure_delay: "Departure delay in minutes"
    scheduled_arrival: "Scheduled arrival time"
    actual_arrival: "Actual arrival time"
    arrival_delay: "Arrival delay in minutes"
    distance: "Flight distance in miles"
    weather_conditions: "Weather status at origin airport"
    delay_status: "Was the flight delayed? (Yes/No)"
1290_titanic_survival:
  dataset_description: "Titanic Survival Dataset: Passenger details used to predict survival outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1290_titanic_survival.csv"
  feature_descriptions:
    PassengerId: "Unique passenger identifier"
    Pclass: "Passenger class (1st, 2nd, 3rd)"
    Name: "Full name of passenger"
    Sex: "Gender"
    Age: "Age of passenger"
    SibSp: "Number of siblings/spouses aboard"
    Parch: "Number of parents/children aboard"
    Ticket: "Ticket number"
    Fare: "Fare paid"
    Cabin: "Cabin number"
    Embarked: "Port of embarkation (C = Cherbourg, Q = Queenstown, S = Southampton)"
    Survived: "Survival status (0 = No, 1 = Yes)"
1291_employee_attrition:
  dataset_description: "Employee Attrition Dataset: HR metrics used to predict employee turnover."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1291_employee_attrition.csv"
  feature_descriptions:
    Age: "Age of the employee"
    Attrition: "Whether the employee left the company"
    BusinessTravel: "Frequency of business travel"
    DailyRate: "Daily rate of pay"
    Department: "Department name"
    DistanceFromHome: "Distance from home to work (in km)"
    Education: "Education level"
    EducationField: "Field of education"
    EmployeeCount: "Number of employees (always 1)"
    EmployeeNumber: "Employee ID"
    EnvironmentSatisfaction: "Satisfaction with work environment"
    Gender: "Gender of the employee"
    HourlyRate: "Hourly wage"
    JobInvolvement: "Level of job involvement"
    JobLevel: "Job level (seniority)"
    JobRole: "Job position"
    JobSatisfaction: "Satisfaction with job"
    MaritalStatus: "Marital status"
    MonthlyIncome: "Monthly income"
    MonthlyRate: "Monthly rate of pay"
    NumCompaniesWorked: "Number of companies worked at"
    OverTime: "Whether the employee works overtime"
    PercentSalaryHike: "Percentage salary hike"
    PerformanceRating: "Performance rating"
    RelationshipSatisfaction: "Satisfaction with relationships at work"
    StandardHours: "Standard working hours (always 80)"
    StockOptionLevel: "Stock option level"
    TotalWorkingYears: "Total years of work experience"
    TrainingTimesLastYear: "Number of training attended in the last year"
    WorkLifeBalance: "Work-life balance score"
    YearsAtCompany: "Years at the current company"
    YearsInCurrentRole: "Years in current job role"
    YearsSinceLastPromotion: "Years since last promotion"
    YearsWithCurrManager: "Years with current manager"
1292_energy_efficiency:
  dataset_description: "Energy Efficiency Dataset: Architectural features for predicting heating and cooling load."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1292_energy_efficiency.csv"
  feature_descriptions:
    Relative_Compactness: "Compactness of building shape"
    Surface_Area: "Total surface area of the building"
    Wall_Area: "Total wall area"
    Roof_Area: "Total roof area"
    Overall_Height: "Building height"
    Orientation: "Building orientation (categorical)"
    Glazing_Area: "Total area of glazing (windows)"
    Glazing_Area_Distribution: "Glazing distribution"
    Heating_Load: "Energy needed for heating"
    Cooling_Load: "Energy needed for cooling"
1293_student_performance:
  dataset_description: "Student Performance Dataset: Academic and demographic attributes for performance prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1293_student_performance.csv"
  feature_descriptions:
    school: "School attended (GP or MS)"
    sex: "Gender of the student"
    age: "Age"
    address: "Home address type (urban or rural)"
    famsize: "Family size (GT3 = >3, LE3 = <=3)"
    Pstatus: "Parent's cohabitation status"
    Medu: "Mother’s education (0 to 4)"
    Fedu: "Father’s education (0 to 4)"
    Mjob: "Mother’s job"
    Fjob: "Father’s job"
    reason: "Reason to choose the school"
    guardian: "Student’s guardian"
    traveltime: "Travel time to school"
    studytime: "Weekly study time"
    failures: "Number of past class failures"
    schoolsup: "Extra educational support"
    famsup: "Family educational support"
    paid: "Extra paid classes"
    activities: "Extra-curricular activities"
    nursery: "Attended nursery school"
    higher: "Wants higher education"
    internet: "Internet access at home"
    romantic: "In a romantic relationship"
    famrel: "Family relationship quality"
    freetime: "Free time after school"
    goout: "Going out with friends"
    Dalc: "Workday alcohol consumption"
    Walc: "Weekend alcohol consumption"
    health: "Current health status"
    absences: "Number of school absences"
    G1: "First period grade"
    G2: "Second period grade"
    G3: "Final grade"
1294_weather_forecasting:
  dataset_description: "Weather Forecasting Dataset: Meteorological attributes for predicting weather conditions."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1294_weather_forecasting.csv"
  feature_descriptions:
    Date: "Date of observation"
    Location: "Geographic location of measurement"
    MinTemp: "Minimum temperature (°C)"
    MaxTemp: "Maximum temperature (°C)"
    Rainfall: "Amount of rainfall (mm)"
    Evaporation: "Amount of water evaporation (mm)"
    Sunshine: "Sunshine duration (hours)"
    WindGustDir: "Direction of strongest wind gust"
    WindGustSpeed: "Speed of strongest wind gust (km/h)"
    WindDir9am: "Wind direction at 9am"
    WindDir3pm: "Wind direction at 3pm"
    WindSpeed9am: "Wind speed at 9am (km/h)"
    WindSpeed3pm: "Wind speed at 3pm (km/h)"
    Humidity9am: "Humidity at 9am (%)"
    Humidity3pm: "Humidity at 3pm (%)"
    Pressure9am: "Atmospheric pressure at 9am (hPa)"
    Pressure3pm: "Atmospheric pressure at 3pm (hPa)"
    Cloud9am: "Cloud cover at 9am (0-8 scale)"
    Cloud3pm: "Cloud cover at 3pm (0-8 scale)"
    Temp9am: "Temperature at 9am (°C)"
    Temp3pm: "Temperature at 3pm (°C)"
    RainToday: "Rainfall occurred today (Yes/No)"
    RainTomorrow: "Target: Will it rain tomorrow? (Yes/No)"
1295_hospital_readmissions:
  dataset_description: "Hospital Readmissions Dataset: Patient information used to predict readmission risk."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1295_hospital_readmissions.csv"
  feature_descriptions:
    patient_id: "Unique identifier for patient"
    race: "Patient's race"
    gender: "Patient's gender"
    age: "Age range"
    admission_type_id: "Type of admission (e.g., emergency)"
    discharge_disposition_id: "Discharge status code"
    admission_source_id: "Source of admission (e.g., referral)"
    time_in_hospital: "Length of stay (in days)"
    num_lab_procedures: "Number of lab procedures"
    num_procedures: "Number of other procedures"
    num_medications: "Number of medications prescribed"
    number_outpatient: "Number of outpatient visits"
    number_emergency: "Number of emergency visits"
    number_inpatient: "Number of inpatient visits"
    diag_1: "Primary diagnosis code"
    diag_2: "Secondary diagnosis code"
    diag_3: "Tertiary diagnosis code"
    number_diagnoses: "Number of diagnoses"
    max_glu_serum: "Max glucose serum result"
    A1Cresult: "A1C test result"
    insulin: "Insulin usage"
    change: "Change in medication (Yes/No)"
    diabetesMed: "Is patient on diabetes medication?"
    readmitted: "Readmission status (<30, >30, NO)"
1296_fraud_detection:
  dataset_description: "Credit Card Fraud Detection Dataset: Transaction-level features labeled by fraud status."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1296_fraud_detection.csv"
  feature_descriptions:
    Time: "Seconds elapsed between this transaction and the first in the dataset"
    V1-V28: "PCA-transformed features to anonymize original data"
    Amount: "Transaction amount"
    Class: "Target class (1 = fraud, 0 = not fraud)"
1297_global_terrorism:
  dataset_description: "Global Terrorism Database: Historical data on terrorism incidents worldwide."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1297_global_terrorism.csv"
  feature_descriptions:
    eventid: "Unique identifier for the incident"
    iyear: "Year of incident"
    imonth: "Month of incident"
    iday: "Day of incident"
    country_txt: "Country where the incident occurred"
    region_txt: "Region of the world"
    city: "City of the incident"
    latitude: "Latitude coordinate"
    longitude: "Longitude coordinate"
    attacktype1_txt: "Primary type of attack"
    targtype1_txt: "Primary target type"
    weaptype1_txt: "Primary weapon type"
    nkill: "Number of people killed"
    nwound: "Number of people wounded"
    success: "Was the attack successful?"
    suicide: "Was it a suicide attack?"
    summary: "Text summary of the incident"
1298_salary_prediction:
  dataset_description: "Salary Prediction Dataset: Features describing education and experience used to predict salary."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1298_salary_prediction.csv"
  feature_descriptions:
    years_experience: "Years of professional experience"
    education_level: "Highest level of education (e.g., Bachelor, Master)"
    industry: "Industry of employment"
    job_title: "Job role/title"
    location: "Location of the job"
    company_size: "Number of employees at the company"
    remote_ratio: "Percentage of remote work allowed"
    contract_type: "Type of contract (e.g., full-time, part-time)"
    salary: "Target variable: Annual salary"
1300_movie_recommendation:
  dataset_description: "Movie Recommendation Dataset: User-item matrix and metadata for building recommendation engines."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1300_movie_recommendation.csv"
  feature_descriptions:
    user_id: "Unique identifier for a user"
    movie_id: "Unique identifier for a movie"
    rating: "User-assigned rating (1-5)"
    timestamp: "Time when the rating was submitted"
    genre: "Movie genre(s)"
    title: "Movie title"
    release_year: "Year the movie was released"
1301_crop_yield_prediction:
  dataset_description: "Crop Yield Prediction Dataset: Environmental and agricultural parameters for estimating crop yields."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1301_crop_yield_prediction.csv"
  feature_descriptions:
    Soil_Type: "Type of soil on the farmland"
    Rainfall: "Total rainfall during growing season (mm)"
    Temperature: "Average temperature during growing season (°C)"
    pH: "Soil pH level"
    Fertilizer_Used: "Quantity of fertilizer applied (kg/hectare)"
    Crop_Type: "Type of crop grown"
    Irrigation: "Irrigation method used"
    Yield: "Target: Crop yield (tons/hectare)"
1302_credit_score_prediction:
  dataset_description: "Credit Score Prediction Dataset: Financial and demographic features used to assess creditworthiness."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1302_credit_score_prediction.csv"
  feature_descriptions:
    age: "Age of the individual"
    income: "Annual income"
    employment_status: "Current employment status (e.g., employed, self-employed)"
    credit_history_length: "Number of years of credit history"
    number_of_credit_lines: "Total number of active credit accounts"
    debt_to_income_ratio: "Ratio of total monthly debt payments to income"
    delinquency_count: "Number of past delinquencies"
    loan_amount: "Loan amount requested"
    credit_score: "Target credit score (numerical)"
1303_medical_image_classification:
  dataset_description: "Medical Image Classification Dataset: Metadata features from medical images for diagnostic classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1303_medical_image_classification.csv"
  feature_descriptions:
    image_id: "Unique ID for the medical image"
    modality: "Type of scan (e.g., MRI, CT)"
    patient_age: "Age of the patient"
    patient_sex: "Sex of the patient"
    region_of_interest: "Annotated region of concern"
    diagnosis: "Target diagnosis label"
1304_tweet_sentiment:
  dataset_description: "Tweet Sentiment Dataset: Text and metadata of tweets labeled for sentiment analysis."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1304_tweet_sentiment.csv"
  feature_descriptions:
    tweet_id: "Unique tweet identifier"
    text: "Tweet content"
    user_id: "User identifier"
    followers_count: "Number of followers"
    location: "User-reported location"
    sentiment: "Sentiment label (positive, negative, neutral)"
1305_energy_consumption:
  dataset_description: "Energy Consumption Dataset: Time series of electricity usage and associated weather features."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1305_energy_consumption.csv"
  feature_descriptions:
    timestamp: "Date and time of measurement"
    temperature: "Ambient temperature (°C)"
    humidity: "Humidity (%)"
    wind_speed: "Wind speed (m/s)"
    power_usage: "Target: Energy consumption (kWh)"
1306_stock_market_prediction:
  dataset_description: "Stock Market Prediction Dataset: Daily stock features for modeling future stock prices."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1306_stock_market_prediction.csv"
  feature_descriptions:
    date: "Trading date"
    open: "Opening price"
    high: "Highest price of the day"
    low: "Lowest price of the day"
    close: "Closing price"
    volume: "Number of shares traded"
    ticker: "Stock ticker symbol"
    next_day_price: "Target: Next day’s closing price"
1307_patient_survival:
  dataset_description: "Patient Survival Prediction Dataset: Clinical features used to model post-treatment survival likelihood."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1307_patient_survival.csv"
  feature_descriptions:
    patient_id: "Unique identifier for the patient"
    age: "Patient age"
    sex: "Sex of the patient"
    diagnosis: "Primary diagnosis"
    treatment: "Type of treatment administered"
    blood_pressure: "Blood pressure reading"
    heart_rate: "Heart rate measurement"
    tumor_size: "Size of tumor (if applicable)"
    survival_status: "Target: Survival outcome (0 = deceased, 1 = survived)"
1308_air_quality_index:
  dataset_description: "Air Quality Index Dataset: Environmental measurements for predicting air quality levels."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1308_air_quality_index.csv"
  feature_descriptions:
    date: "Date of measurement"
    location: "Measurement site or region"
    pm2_5: "Particulate matter <2.5μm"
    pm10: "Particulate matter <10μm"
    no2: "Nitrogen dioxide concentration"
    so2: "Sulfur dioxide concentration"
    co: "Carbon monoxide concentration"
    o3: "Ozone concentration"
    aqi: "Target: Air Quality Index (AQI)"
1309_human_resources_attrition:
  dataset_description: "HR Attrition Dataset: Employee records for predicting job attrition."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1309_human_resources_attrition.csv"
  feature_descriptions:
    employee_id: "Unique ID for employee"
    age: "Age of the employee"
    department: "Department in the company"
    job_role: "Specific role/job title"
    monthly_income: "Monthly salary"
    years_at_company: "Years spent in the company"
    job_satisfaction: "Satisfaction level (1–4)"
    work_life_balance: "Work-life balance score"
    overtime: "Whether employee works overtime"
    attrition: "Target: Employee left the company (Yes/No)"
1310_customer_churn:
  dataset_description: "Customer Churn Dataset: Telecom customer data for modeling churn probability."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1310_customer_churn.csv"
  feature_descriptions:
    customerID: "Unique customer identifier"
    gender: "Customer gender"
    SeniorCitizen: "Whether the customer is a senior citizen (0 or 1)"
    Partner: "Whether the customer has a partner"
    Dependents: "Whether the customer has dependents"
    tenure: "Number of months the customer has stayed"
    PhoneService: "Whether the customer has phone service"
    MultipleLines: "Whether the customer has multiple lines"
    InternetService: "Type of internet service"
    OnlineSecurity: "Whether online security is enabled"
    OnlineBackup: "Whether online backup is enabled"
    DeviceProtection: "Whether device protection is enabled"
    TechSupport: "Whether technical support is enabled"
    StreamingTV: "Whether streaming TV is subscribed"
    StreamingMovies: "Whether streaming movies is subscribed"
    Contract: "Type of contract"
    PaperlessBilling: "Whether billing is paperless"
    PaymentMethod: "Payment method used"
    MonthlyCharges: "Monthly charges"
    TotalCharges: "Total amount charged"
    Churn: "Target: Whether the customer churned"
1311_ecommerce_behavior:
  dataset_description: "E-commerce User Behavior Dataset: Clickstream and user session features for purchase prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1311_ecommerce_behavior.csv"
  feature_descriptions:
    session_id: "Unique ID for user session"
    user_id: "User identifier"
    page_views: "Number of pages viewed in session"
    time_spent_seconds: "Total time spent in session (seconds)"
    product_category: "Category of product viewed"
    referral_source: "How the user landed on the website"
    device_type: "Device used (e.g., mobile, desktop)"
    previous_purchases: "Number of purchases before this session"
    cart_additions: "Items added to cart during session"
    purchase: "Target: Whether a purchase was made (1 or 0)"
1312_housing_prices:
  dataset_description: "Housing Prices Dataset: Real estate attributes for predicting house sale prices."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1312_housing_prices.csv"
  feature_descriptions:
    Id: "Listing identifier"
    LotArea: "Lot size in square feet"
    YearBuilt: "Year the house was built"
    Neighborhood: "Neighborhood name"
    OverallQual: "Overall material and finish quality"
    OverallCond: "Overall condition rating"
    TotalBsmtSF: "Total basement area (sq ft)"
    GrLivArea: "Above ground living area (sq ft)"
    GarageCars: "Number of cars that fit in garage"
    GarageArea: "Garage area (sq ft)"
    FullBath: "Number of full bathrooms"
    BedroomAbvGr: "Number of bedrooms above ground"
    SalePrice: "Target: House sale price"
1313_machine_failure:
  dataset_description: "Machine Failure Prediction Dataset: Sensor data and usage logs for predicting failure events."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1313_machine_failure.csv"
  feature_descriptions:
    machine_id: "Identifier for the machine"
    temperature: "Temperature reading (°C)"
    pressure: "Pressure level (psi)"
    vibration: "Vibration intensity"
    torque: "Applied torque (Nm)"
    power: "Power usage (kW)"
    runtime: "Total operational time"
    error_code: "Error code logged (if any)"
    failure: "Target: Whether machine failed (1 = Yes, 0 = No)"
1314_weather_forecasting:
  dataset_description: "Weather Forecasting Dataset: Historical meteorological data for predicting future weather conditions."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1314_weather_forecasting.csv"
  feature_descriptions:
    date: "Date of observation"
    location: "Geographic location"
    temperature: "Daily average temperature (°C)"
    humidity: "Relative humidity (%)"
    wind_speed: "Average wind speed (km/h)"
    precipitation: "Total precipitation (mm)"
    pressure: "Atmospheric pressure (hPa)"
    weather_condition: "Categorical weather condition (e.g., sunny, rainy)"
    next_day_condition: "Target: Forecasted weather condition"
1315_loan_default:
  dataset_description: "Loan Default Prediction Dataset: Financial and demographic data for identifying default risk."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1315_loan_default.csv"
  feature_descriptions:
    loan_id: "Unique loan identifier"
    applicant_income: "Income of the applicant"
    coapplicant_income: "Income of the co-applicant"
    loan_amount: "Amount of loan requested"
    loan_term: "Duration of loan in months"
    credit_history: "Credit history flag (1 = good, 0 = poor)"
    gender: "Applicant gender"
    marital_status: "Marital status"
    dependents: "Number of dependents"
    education: "Education level of applicant"
    self_employed: "Whether self-employed"
    property_area: "Type of property area (Urban/Rural)"
    loan_status: "Target: Loan default (Yes/No)"
1316_mobile_price_classification:
  dataset_description: "Mobile Price Classification Dataset: Phone specifications used to predict price segment."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1316_mobile_price_classification.csv"
  feature_descriptions:
    battery_power: "Battery capacity (mAh)"
    blue: "Has Bluetooth (1 = Yes, 0 = No)"
    clock_speed: "Processor speed (GHz)"
    dual_sim: "Supports dual SIM (1 = Yes, 0 = No)"
    fc: "Front camera resolution (MP)"
    four_g: "4G enabled (1 = Yes, 0 = No)"
    int_memory: "Internal memory (GB)"
    m_dep: "Mobile depth (cm)"
    mobile_wt: "Weight of mobile (grams)"
    n_cores: "Number of processor cores"
    pc: "Primary camera resolution (MP)"
    ram: "RAM capacity (MB)"
    sc_h: "Screen height (pixels)"
    sc_w: "Screen width (pixels)"
    talk_time: "Talk time (hours)"
    three_g: "3G enabled (1 = Yes, 0 = No)"
    touch_screen: "Has touchscreen (1 = Yes, 0 = No)"
    wifi: "Wi-Fi capability (1 = Yes, 0 = No)"
    price_range: "Target: Price category (0–3)"
1317_flight_delay_prediction:
  dataset_description: "Flight Delay Prediction Dataset: Scheduled and actual flight times with weather and location data."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1317_flight_delay_prediction.csv"
  feature_descriptions:
    flight_number: "Flight ID"
    airline: "Airline code"
    origin_airport: "Departure airport code"
    destination_airport: "Arrival airport code"
    scheduled_departure: "Scheduled departure time"
    scheduled_arrival: "Scheduled arrival time"
    actual_departure: "Actual departure time"
    actual_arrival: "Actual arrival time"
    distance: "Flight distance (miles)"
    delay_minutes: "Target: Delay in minutes"
1318_marketing_campaign:
  dataset_description: "Marketing Campaign Dataset: Customer features used to predict response to a marketing campaign."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1318_marketing_campaign.csv"
  feature_descriptions:
    customer_id: "Unique customer identifier"
    age: "Age of the customer"
    income: "Annual income"
    marital_status: "Marital status (e.g., single, married)"
    education: "Education level"
    product_views: "Number of products viewed"
    previous_campaigns: "Number of previous marketing contacts"
    last_response: "Response to previous campaign (yes/no)"
    days_since_last_contact: "Days since last campaign contact"
    responded: "Target: Response to current campaign (yes/no)"
1319_occupancy_detection:
  dataset_description: "Occupancy Detection Dataset: Environmental sensor data for detecting occupancy in a room."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1319_occupancy_detection.csv"
  feature_descriptions:
    date: "Timestamp of reading"
    temperature: "Temperature in the room (°C)"
    humidity: "Relative humidity (%)"
    light: "Light intensity (lux)"
    CO2: "Carbon dioxide level (ppm)"
    humidity_ratio: "Humidity ratio"
    occupancy: "Target: Whether room is occupied (1 = Yes, 0 = No)"
1320_student_performance:
  dataset_description: "Student Performance Dataset: Academic and social attributes used to predict student grades."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1320_student_performance.csv"
  feature_descriptions:
    student_id: "Unique student ID"
    gender: "Student gender"
    age: "Age of student"
    study_time: "Weekly study time (hours)"
    failures: "Number of past class failures"
    absences: "Total number of school absences"
    internet: "Internet access at home (yes/no)"
    family_support: "Family educational support (yes/no)"
    health: "Current health status (1 = very bad to 5 = very good)"
    G1: "Grade in first term"
    G2: "Grade in second term"
    G3: "Target: Final grade"
1321_traffic_volume:
  dataset_description: "Traffic Volume Dataset: Traffic flow and weather data for forecasting traffic volume."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1321_traffic_volume.csv"
  feature_descriptions:
    date_time: "Timestamp"
    temp: "Temperature (K)"
    rain_1h: "Rainfall in last hour (mm)"
    snow_1h: "Snowfall in last hour (mm)"
    clouds_all: "Cloudiness (%)"
    weather_main: "Main weather condition"
    weather_description: "Detailed weather description"
    holiday: "Whether the day is a holiday"
    traffic_volume: "Target: Number of vehicles"
1322_energy_consumption:
  dataset_description: "Energy Consumption Dataset: Measurements of energy use in a building to forecast future consumption."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1322_energy_consumption.csv"
  feature_descriptions:
    timestamp: "Date and time of measurement"
    temperature: "Outside temperature (°C)"
    humidity: "Relative humidity (%)"
    wind_speed: "Wind speed (m/s)"
    dew_point: "Dew point temperature (°C)"
    energy_use: "Target: Energy consumed (kWh)"
1323_covid_symptoms:
  dataset_description: "COVID Symptoms Dataset: Patient symptoms and demographics for COVID-19 prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1323_covid_symptoms.csv"
  feature_descriptions:
    age: "Age of the patient"
    gender: "Gender"
    fever: "Fever symptom (yes/no)"
    cough: "Cough symptom (yes/no)"
    fatigue: "Fatigue symptom (yes/no)"
    sore_throat: "Sore throat symptom (yes/no)"
    shortness_of_breath: "Shortness of breath (yes/no)"
    headache: "Headache (yes/no)"
    test_result: "Target: COVID test result (positive/negative)"
1324_disease_prediction:
  dataset_description: "Disease Prediction Dataset: Symptoms used to predict potential diseases."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1324_disease_prediction.csv"
  feature_descriptions:
    itching: "Itching symptom"
    skin_rash: "Skin rash symptom"
    nodal_skin_eruptions: "Nodal skin eruptions"
    continuous_sneezing: "Sneezing symptom"
    chills: "Feeling cold or chills"
    joint_pain: "Joint pain symptom"
    vomiting: "Vomiting occurrence"
    fatigue: "Tiredness or fatigue"
    weight_loss: "Recent weight loss"
    target_disease: "Target: Disease diagnosis"
1325_power_plant_output:
  dataset_description: "Power Plant Dataset: Environmental conditions used to predict power output."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1325_power_plant_output.csv"
  feature_descriptions:
    ambient_temperature: "Ambient temperature (°C)"
    exhaust_vacuum: "Vacuum pressure (cm Hg)"
    ambient_pressure: "Ambient pressure (millibar)"
    relative_humidity: "Relative humidity (%)"
    electrical_energy_output: "Target: Net power output (MW)"
1326_healthcare_cost:
  dataset_description: "Healthcare Cost Dataset: Patient demographics and medical history to predict healthcare expenses."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1326_healthcare_cost.csv"
  feature_descriptions:
    age: "Age of the patient"
    sex: "Gender of the patient"
    bmi: "Body mass index"
    children: "Number of children covered by health insurance"
    smoker: "Smoking status (yes/no)"
    region: "Residential region"
    charges: "Target: Medical charges billed to the insurer"
1327_digital_ad_click:
  dataset_description: "Digital Ad Click Dataset: User features and activity used to predict ad click behavior."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1327_digital_ad_click.csv"
  feature_descriptions:
    user_id: "Unique user identifier"
    age: "User age"
    gender: "Gender of user"
    estimated_salary: "Estimated annual salary"
    time_spent_on_site: "Time spent on site (minutes)"
    pages_visited: "Number of pages visited"
    clicked_on_ad: "Target: Whether the ad was clicked (1/0)"
1328_employee_attrition:
  dataset_description: "Employee Attrition Dataset: HR data used to predict employee attrition (leaving the company)."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1328_employee_attrition.csv"
  feature_descriptions:
    age: "Employee age"
    department: "Department of the employee"
    job_role: "Role/designation"
    job_satisfaction: "Job satisfaction rating (1-4)"
    monthly_income: "Monthly income"
    years_at_company: "Years at the current company"
    over_time: "Works overtime (yes/no)"
    attrition: "Target: Employee attrition status (yes/no)"
1329_mall_customer_segmentation:
  dataset_description: "Mall Customer Segmentation Dataset: Customer details for segmentation analysis."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1329_mall_customer_segmentation.csv"
  feature_descriptions:
    customer_id: "Unique customer identifier"
    gender: "Gender of the customer"
    age: "Customer age"
    annual_income: "Annual income in thousands"
    spending_score: "Spending score (1–100)"
1330_fraud_detection:
  dataset_description: "Fraud Detection Dataset: Transaction details used to classify fraudulent vs. genuine behavior."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1330_fraud_detection.csv"
  feature_descriptions:
    transaction_id: "Unique transaction identifier"
    amount: "Transaction amount"
    transaction_type: "Type of transaction (e.g., transfer, cash_out)"
    oldbalance_org: "Initial balance of origin account"
    newbalance_org: "New balance of origin account"
    oldbalance_dest: "Initial balance of destination account"
    newbalance_dest: "New balance of destination account"
    is_fraud: "Target: Fraudulent transaction flag (1 = fraud, 0 = genuine)"
1331_weather_energy:
  dataset_description: "Weather-Energy Dataset: Weather measurements and corresponding energy consumption."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1331_weather_energy.csv"
  feature_descriptions:
    timestamp: "Timestamp of observation"
    temperature: "Temperature in degrees Celsius"
    wind_speed: "Wind speed in m/s"
    dew_point: "Dew point temperature"
    humidity: "Humidity percentage"
    solar_radiation: "Solar radiation (W/m²)"
    energy_usage: "Target: Total energy consumption (kWh)"
1332_income_prediction:
  dataset_description: "Income Prediction Dataset: Census and employment data used to predict income bracket."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1332_income_prediction.csv"
  feature_descriptions:
    age: "Age of the individual"
    workclass: "Employment sector"
    education: "Level of education"
    education_num: "Education level (numeric)"
    marital_status: "Marital status"
    occupation: "Occupation type"
    relationship: "Family role"
    race: "Race"
    sex: "Gender"
    capital_gain: "Capital gain"
    capital_loss: "Capital loss"
    hours_per_week: "Working hours per week"
    native_country: "Country of origin"
    income: "Target: Income bracket (>50K or <=50K)"
1333_titanic_survival:
  dataset_description: "Titanic Survival Dataset: Passenger data used to predict survival outcome."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1333_titanic_survival.csv"
  feature_descriptions:
    passenger_id: "Unique identifier for the passenger"
    pclass: "Passenger class (1st, 2nd, 3rd)"
    name: "Name of the passenger"
    sex: "Gender"
    age: "Age"
    sibsp: "Number of siblings/spouses aboard"
    parch: "Number of parents/children aboard"
    ticket: "Ticket number"
    fare: "Fare paid"
    cabin: "Cabin number"
    embarked: "Port of embarkation"
    survived: "Target: Survival status (1 = survived, 0 = died)"
1334_bank_marketing:
  dataset_description: "Bank Marketing Dataset: Customer data from a direct marketing campaign to predict term deposit subscription."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1334_bank_marketing.csv"
  feature_descriptions:
    age: "Customer's age"
    job: "Type of job"
    marital: "Marital status"
    education: "Level of education"
    default: "Has credit in default? (yes/no)"
    balance: "Average yearly balance in euros"
    housing: "Has housing loan? (yes/no)"
    loan: "Has personal loan? (yes/no)"
    contact: "Contact communication type"
    day: "Last contact day of the month"
    month: "Last contact month of the year"
    duration: "Last contact duration, in seconds"
    campaign: "Number of contacts during this campaign"
    pdays: "Days since last contact from previous campaign"
    previous: "Number of contacts before this campaign"
    poutcome: "Outcome of the previous marketing campaign"
    y: "Target: Whether the client subscribed to a term deposit (yes/no)"
1335_epidemic_simulation:
  dataset_description: "Epidemic Simulation Dataset: Parameters and outcomes from disease spread simulations."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1335_epidemic_simulation.csv"
  feature_descriptions:
    simulation_id: "Unique ID for simulation run"
    beta: "Infection rate"
    gamma: "Recovery rate"
    population: "Total population in the simulation"
    initial_infected: "Initial number of infected individuals"
    peak_infected: "Maximum number of infected during simulation"
    total_infected: "Total number of infected individuals over time"
    duration: "Total duration of the epidemic"
    intervention_effectiveness: "Effectiveness of interventions (0-1)"
1336_meteorological_disasters:
  dataset_description: "Meteorological Disasters Dataset: Event characteristics used to classify severity of impact."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1336_meteorological_disasters.csv"
  feature_descriptions:
    event_id: "Unique identifier for the event"
    event_type: "Type of weather event (e.g., flood, hurricane)"
    location: "Geographic location of the event"
    wind_speed: "Maximum recorded wind speed (km/h)"
    rainfall: "Total rainfall (mm)"
    pressure: "Minimum atmospheric pressure recorded"
    duration_hours: "Duration of event in hours"
    fatalities: "Number of deaths"
    damage_usd: "Estimated monetary damage (USD)"
    disaster_severity: "Target: Severity classification"
1337_iot_sensor_network:
  dataset_description: "IoT Sensor Network Dataset: Sensor readings for anomaly detection in industrial IoT setups."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1337_iot_sensor_network.csv"
  feature_descriptions:
    sensor_id: "Unique sensor identifier"
    timestamp: "Time of the reading"
    temperature: "Temperature reading"
    humidity: "Humidity reading"
    pressure: "Pressure reading"
    vibration: "Vibration level"
    device_status: "Device operational status"
    anomaly_flag: "Target: Indicates if the reading is an anomaly (1/0)"
1334_ZOO:
  dataset_description: "Auto-generated description for 1334_ZOO.csv"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1334_ZOO.csv"
  feature_descriptions:
    animal: "Animal identifier"
    hair: "Presence of hair (1 = yes, 0 = no)"
    feathers: "Presence of feathers"
    eggs: "Lays eggs"
    milk: "Produces milk"
    airborne: "Can fly"
    aquatic: "Lives in water"
    predator: "Is a predator"
    toothed: "Has teeth"
    backbone: "Has a backbone"
    breathes: "Can breathe"
    venomous: "Is venomous"
    fins: "Has fins"
    legs: "Number of legs"
    tail: "Has a tail"
    domestic: "Is domesticated"
    catsize: "Size similar to a cat"
    type: "Animal type category"

1335_Math-Students:
  dataset_description: "Student performance data for math course"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1335_Math-Students.csv"
  feature_descriptions:
    school: "Student's school (binary: 'GP' or 'MS')"
    sex: "Gender"
    age: "Age (numeric)"
    address: "Type of home address (urban/rural)"
    famsize: "Family size"
    Pstatus: "Parent's cohabitation status"
    Medu: "Mother's education level"
    Fedu: "Father's education level"
    Mjob: "Mother's job"
    Fjob: "Father's job"
    reason: "Reason to choose this school"
    guardian: "Student's guardian"
    traveltime: "Travel time to school"
    studytime: "Weekly study time"
    failures: "Number of past class failures"
    schoolsup: "Extra educational support"
    famsup: "Family educational support"
    paid: "Extra paid classes"
    activities: "Extra-curricular activities"
    nursery: "Attended nursery school"
    higher: "Wants to take higher education"
    internet: "Internet access at home"
    romantic: "In a romantic relationship"
    famrel: "Family relationship quality"
    freetime: "Free time after school"
    goout: "Going out with friends"
    Dalc: "Workday alcohol consumption"
    Walc: "Weekend alcohol consumption"
    health: "Current health status"
    absences: "Number of school absences"
    G1: "First period grade"
    G2: "Second period grade"
    G3: "Final grade"

1336_wingstop_stock_prices:
  dataset_description: "Wingstop daily stock market performance"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1336_wingstop_stock_prices.csv"
  feature_descriptions:
    open: "Opening price of the stock"
    high: "Highest price during the day"
    low: "Lowest price during the day"
    close: "Closing price"
    adjclose: "Adjusted closing price"
    volume: "Volume of stocks traded"

1339_Weather-Istanbul-Data-2009-2019:
  dataset_description: "Daily weather conditions in Istanbul (2009-2019)"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1339_Weather-Istanbul-Data-2009-2019.csv"
  feature_descriptions:
    Condition: "General weather condition (e.g., sunny, rainy)"
    Rain: "Rain amount or presence"
    MaxTemp: "Maximum temperature of the day"
    MinTemp: "Minimum temperature of the day"
    SunRise: "Sunrise time"
    SunSet: "Sunset time"
    MoonRise: "Moonrise time"
    MoonSet: "Moonset time"
    AvgWind: "Average wind speed"
    AvgHumidity: "Average humidity level"
    AvgPressure: "Average atmospheric pressure"
1340_1996-2019-NBA-Stats-Complete-With-Player-Stats:
  dataset_description: "Complete NBA game and player statistics (1996–2019)"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1340_1996-2019-NBA-Stats-Complete-With-Player-Stats.csv"
  feature_descriptions:
    Season: "NBA season year"
    Date: "Date of the game"
    Season_Half: "First or second half of the season"
    Game: "Game number or ID"
    Team: "Team name"
    Q1: "Points scored in 1st quarter"
    Q2: "Points scored in 2nd quarter"
    Q3: "Points scored in 3rd quarter"
    Q4: "Points scored in 4th quarter"
    Total: "Total team score"
    Home/Away: "Game location indicator"
    Won: "Whether the team won"
    WonQ1: "Won 1st quarter"
    WonH1: "Won 1st half"
    FGM_team: "Field goals made by team"
    FGA_team: "Field goals attempted"
    3PM_team: "3-point shots made"
    3PA_team: "3-point attempts"
    FT%_team: "Free throw percentage"
    Player_1 … Player_5: "Name of player 1–5"
    Min_playerX: "Minutes played"
    FGM_playerX: "Field goals made"
    FG%_playerX: "Field goal percentage"
    PTS_playerX: "Points scored"
    Height_(in)_playerX: "Player height in inches"

1341_Lending-Club-Loan-Data:
  dataset_description: "Lending Club loan data for default risk prediction"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1341_Lending-Club-Loan-Data.csv"
  feature_descriptions:
    credit.policy: "Meets credit underwriting criteria"
    purpose: "Loan purpose"
    int.rate: "Interest rate"
    installment: "Monthly installment amount"
    log.annual.inc: "Log of annual income"
    dti: "Debt-to-income ratio"
    fico: "FICO credit score"
    days.with.cr.line: "Number of days credit line has been open"
    revol.bal: "Revolving balance"
    revol.util: "Revolving line utilization rate"
    inq.last.6mths: "Credit inquiries in the last 6 months"
    delinq.2yrs: "Delinquencies in the past 2 years"
    pub.rec: "Public derogatory records"
    not.fully.paid: "Loan not fully paid indicator (target)"

1347_BNG(vehicle):
  dataset_description: "BNG Vehicle Silhouettes: Shape descriptors for vehicle classification"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1347_BNG(vehicle).csv"
  feature_descriptions:
    COMPACTNESS: "Compactness of the shape"
    CIRCULARITY: "Circularity ratio"
    DISTANCE_CIRCULARITY: "Distance from ideal circularity"
    RADIUS_RATIO: "Ratio of min to max radius"
    PR.AXIS_ASPECT_RATIO: "Aspect ratio of the principal axis"
    MAX.LENGTH_ASPECT_RATIO: "Max length aspect ratio"
    SCATTER_RATIO: "Scatter of pixel points"
    ELONGATEDNESS: "Shape elongation metric"
    PR.AXIS_RECTANGULARITY: "Rectangularity based on principal axis"
    MAX.LENGTH_RECTANGULARITY: "Rectangularity based on max length"
    SCALED_VARIANCE_MAJOR: "Variance along major axis"
    SCALED_VARIANCE_MINOR: "Variance along minor axis"
    SCALED_RADIUS_OF_GYRATION: "Normalized radius of gyration"
    SKEWNESS_ABOUT_MAJOR: "Skewness along major axis"
    SKEWNESS_ABOUT_MINOR: "Skewness along minor axis"
    KURTOSIS_ABOUT_MAJOR: "Kurtosis along major axis"
    KURTOSIS_ABOUT_MINOR: "Kurtosis along minor axis"
    HOLLOWS_RATIO: "Ratio of hollow areas"
    Class: "Vehicle class label"
1352_Country-Socioeconomic-Status-Scores-Part-II:
  dataset_description: "Country Socioeconomic Indicators: Includes GDP, education, SES, and region classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1352_Country-Socioeconomic-Status-Scores-Part-II.csv"
  feature_descriptions:
    unid: "Unique country identifier"
    wbid: "World Bank identifier"
    country: "Country name"
    year: "Year of observation"
    ses: "Socioeconomic status score"
    class: "SES class category"
    gdppc: "GDP per capita"
    yrseduc: "Average years of education"
    region5: "Five-region classification"
    regionUN: "UN region classification"

1355_porto-seguro:
  dataset_description: "Porto Seguro Insurance: Features for predicting insurance claim risk."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1355_porto-seguro.csv"
  feature_descriptions:
    target: "Target: claim filed or not"
    ps_ind_01 to ps_ind_18_bin: "Personal indicators (categorical/binary)"
    ps_reg_01 to ps_reg_03: "Regional features"
    ps_car_01_cat to ps_car_11_cat: "Car-related categorical features"
    ps_car_11 to ps_car_15: "Car-related continuous features"
    ps_calc_01 to ps_calc_14: "Calculated features"
    ps_calc_15_bin to ps_calc_20_bin: "Binary calculated features"

1356_Census-Income-KDD:
  dataset_description: "KDD Census Income Dataset: Demographic and employment details for income prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1356_Census-Income-KDD.csv"
  feature_descriptions:
    age: "Age of individual"
    class_worker: "Class of worker"
    det_ind_code: "Industry code"
    det_occ_code: "Occupation code"
    education: "Education level"
    wage_per_hour: "Wage per hour"
    hs_college: "High school or college indicator"
    marital_stat: "Marital status"
    major_ind_code: "Major industry"
    major_occ_code: "Major occupation"
    race: "Race of individual"
    hisp_origin: "Hispanic origin"
    sex: "Sex of individual"
    union_member: "Union membership status"
    unemp_reason: "Reason for unemployment"
    full_or_part_emp: "Full/part-time employment"
    capital_gains: "Capital gains amount"
    capital_losses: "Capital loss amount"
    stock_dividends: "Stock dividends received"
    tax_filer_stat: "Tax filing status"
    region_prev_res: "Region of previous residence"
    state_prev_res: "State of previous residence"
    det_hh_fam_stat: "Detailed household/family status"
    det_hh_summ: "Household summary"
    unknown: "Unknown coded feature"
    mig_chg_msa: "Migration status: MSA"
    mig_chg_reg: "Migration status: region"
    mig_move_reg: "Migration: moved within region"
    mig_same: "Migration: same area"
    mig_prev_sunbelt: "Migration: previous sunbelt area"
    num_emp: "Number of employers"
    fam_under_18: "Family members under 18"
    country_father: "Father's country of origin"
    country_mother: "Mother's country of origin"
    country_self: "Individual's country of origin"
    citizenship: "Citizenship status"
    own_or_self: "Own business/self-employed"
    vet_question: "Veteran status question response"
    vet_benefits: "Veteran benefits status"
    weeks_worked: "Weeks worked in a year"
    year: "Survey year"
    income_50k: "Target: Income >50K (1) or <=50K (0)"
1359_SKdailyCOVID19:
  dataset_description: "Slovakia Daily COVID-19 Stats: Daily confirmed cases, recoveries, active cases, and testing data."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1359_SKdailyCOVID19.csv"
  feature_descriptions:
    attribute_potvrdenych: "Number of confirmed cases"
    attribute_vyliecenych: "Number of recovered cases"
    attribute_aktivnych: "Number of active cases"
    attribute_testov: "Number of tests conducted"
    attribute_prirastkov: "Number of new cases (daily increment)"
    attribute_umrti: "Number of deaths"

1362_pm25dataset:
  dataset_description: "PM2.5 Air Quality Dataset: Hourly air pollution measurements from Beijing."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1362_pm25dataset.csv"
  feature_descriptions:
    year: "Year of observation"
    month: "Month"
    day: "Day of month"
    hour: "Hour of day"
    pm2.5: "PM2.5 concentration (µg/m³)"
    DEWP: "Dew point (°C)"
    TEMP: "Temperature (°C)"
    cbwd: "Wind direction"
    Iws: "Cumulative wind speed"

1364_Midwest_survey:
  dataset_description: "Midwest Regional Survey: Identity and regional classification based on perception."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1364_Midwest_survey.csv"
  feature_descriptions:
    What_would_you_call_the_part_of_the_country_you_live_in_now: "Respondent’s own naming for their region"
    How_much_do_you_personally_identify_as_a_Midwesterner: "Self-identification as a Midwesterner"
    Do_you_consider_*: "Binary response on whether a specific state is considered part of the Midwest"
    Gender: "Gender of respondent"
    Age: "Age of respondent"
    Household_Income: "Household income bracket"
    Education: "Highest education level"
    In_what_ZIP_code_is_your_home_located: "ZIP code of respondent"
    Census_Region: "US Census-defined region"

1365_nba-shot-logs:
  dataset_description: "NBA Shot Logs: Details on shot context and outcomes for each possession."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1365_nba-shot-logs.csv"
  feature_descriptions:
    GAME_ID: "Unique identifier for the game"
    MATCHUP: "Teams involved in the game"
    LOCATION: "Home or away indicator"
    W: "Game win (W) or loss (L)"
    FINAL_MARGIN: "Final score margin"
    SHOT_NUMBER: "Index of shot within game"
    PERIOD: "Quarter of the game"
    GAME_CLOCK: "Time left in the quarter"
    SHOT_CLOCK: "Time left on shot clock"
    DRIBBLES: "Number of dribbles before shot"
    TOUCH_TIME: "Time ball was held before shot"
    SHOT_DIST: "Distance of the shot (feet)"
    PTS_TYPE: "Points awarded (2 or 3)"
    SHOT_RESULT: "Made or missed shot"
    CLOSEST_DEFENDER: "Name of closest defender"
    CLOSE_DEF_DIST: "Distance to closest defender (feet)"
    FGM: "Field goals made"
    PTS: "Points scored"
    player_name: "Player name"
    player_id: "Unique player identifier"

1366_bankmarketing:
  dataset_description: "Bank Marketing Campaign: Client data and response to marketing calls."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1366_bankmarketing.csv"
  feature_descriptions:
    age: "Age of the client"
    job: "Type of job"
    marital: "Marital status"
    education: "Education level"
    default: "Default credit status"
    housing: "Housing loan status"
    loan: "Personal loan status"
    contact: "Contact communication type"
    month: "Month of last contact"
    day_of_week: "Day of the week for contact"
    duration: "Duration of last contact (seconds)"
    campaign: "Number of contacts performed during this campaign"
    pdays: "Days since last contact"
    previous: "Number of contacts before this campaign"
    poutcome: "Outcome of the previous campaign"
    emp.var.rate: "Employment variation rate"
    cons.price.idx: "Consumer price index"
    cons.conf.idx: "Consumer confidence index"
    euribor3m: "Euribor 3 month rate"
    nr.employed: "Number of employees"
    y: "Target: response to marketing (yes/no)"
1368_IMDb-Ratings:
  dataset_description: "IMDb Ratings: Average ratings and vote counts for movies."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1368_IMDb-Ratings.csv"
  feature_descriptions:
    titleId: "Unique IMDb identifier for the movie"
    title: "Title of the movie"
    averageRating: "Average IMDb rating"
    numVotes: "Number of votes received"

1369_speeddating:
  dataset_description: "Speed Dating Dataset: Personal preferences, self-perceptions, and decision outcomes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1369_speeddating.csv"
  feature_descriptions:
    wave: "Wave/session number"
    gender: "Gender of the participant"
    age: "Participant’s age"
    age_o: "Partner's age"
    race: "Race of participant"
    race_o: "Race of partner"
    samerace: "Whether races match"
    field: "Field of study"
    attractive: "Self-rated attractiveness"
    sincere: "Self-rated sincerity"
    intelligence: "Self-rated intelligence"
    funny: "Self-rated humor"
    ambition: "Self-rated ambition"
    shared_interests_partner: "Shared interests with partner"
    sports to yoga: "Interest levels in various activities"
    expected_num_matches: "Expected number of matches"
    like: "Self-reported liking of partner"
    guess_prob_liked: "Estimated probability that partner liked them"
    met: "Whether the pair had previously met"
    decision: "Decision made by the participant"
    decision_o: "Decision made by the partner"
    match: "Whether the pair matched"

1375_MAGIC-Gamma-Telescope-Dataset:
  dataset_description: "MAGIC Gamma Telescope: Signal classification for gamma vs hadron particles."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1375_MAGIC-Gamma-Telescope-Dataset.csv"
  feature_descriptions:
    fLength: "Major axis length of the image"
    fWidth: "Minor axis width of the image"
    fSize: "Size (sum of content of pixels)"
    fConc: "Concentration ratio"
    fConc1: "Largest pixel concentration"
    fAsym: "Asymmetry of the image"
    fM3Long: "3rd root of the 3rd moment along major axis"
    fM3Trans: "3rd root of the 3rd moment along minor axis"
    fAlpha: "Angle of major axis with vector to origin"
    fDist: "Distance from origin"
    class: "Target: gamma or hadron"

1379_German-Credit-Data:
  dataset_description: "German Credit Dataset: Financial and personal data for credit risk assessment."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1379_German-Credit-Data.csv"
  feature_descriptions:
    laufkont: "Status of existing checking account"
    laufzeit: "Duration in months"
    moral: "Credit history"
    verw: "Purpose of the credit"
    hoehe: "Credit amount"
    sparkont: "Savings account"
    beszeit: "Employment length"
    rate: "Installment rate"
    famges: "Personal status and sex"
    buerge: "Guarantors"
    wohnzeit: "Years at current residence"
    verm: "Assets"
    alter: "Age"
    weitkred: "Other installment plans"
    wohn: "Housing"
    bishkred: "Number of existing credits"
    beruf: "Job type"
    pers: "Number of people liable"
    telef: "Telephone ownership"
    gastarb: "Foreign worker status"
    kredit: "Credit risk classification"

1380_Performance-Prediction:
  dataset_description: "Basketball Player Stats: Performance stats to predict player effectiveness."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1380_Performance-Prediction.csv"
  feature_descriptions:
    Name: "Player name"
    GamesPlayed: "Number of games played"
    MinutesPlayed: "Total minutes played"
    PointsPerGame: "Average points per game"
    FieldGoalsMade: "Field goals made"
    FieldGoalsAttempt: "Field goals attempted"
    FieldGoalPercent: "Field goal percentage"
    3PointMade: "3-point field goals made"
    3PointAttempt: "3-point field goals attempted"
    3PointPercent: "3-point shooting percentage"
    FreeThrowMade: "Free throws made"
    FreeThrowAttempt: "Free throws attempted"
    FreeThrowPercent: "Free throw percentage"
    OffensiveRebounds: "Offensive rebounds"
    DefensiveRebounds: "Defensive rebounds"
    Rebounds: "Total rebounds"
    Assists: "Assists"
    Steals: "Steals"
    Blocks: "Blocks"
    Turnovers: "Turnovers"
    Target: "Performance classification or regression target"

1383_English-Premier-League-Data-2009---2019:
  dataset_description: "EPL Match Statistics: Team and referee information, full/half scores, cards, and fouls."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1383_English-Premier-League-Data-2009---2019.csv"
  feature_descriptions:
    Date: "Match date"
    HomeTeam: "Home team"
    AwayTeam: "Away team"
    FTHG: "Full time home goals"
    FTAG: "Full time away goals"
    FTR: "Full time result"
    HTHG: "Half time home goals"
    HTAG: "Half time away goals"
    HTR: "Half time result"
    Referee: "Referee name"
    HS: "Home shots"
    AS: "Away shots"
    HST: "Home shots on target"
    AST: "Away shots on target"
    HF: "Home fouls"
    AF: "Away fouls"
    HC: "Home corners"
    AC: "Away corners"
    HY: "Home yellow cards"
    AY: "Away yellow cards"
    HR: "Home red cards"
    AR: "Away red cards"

1394_IRIS-flower-dataset:
  dataset_description: "Classic Iris Dataset: Morphological measurements for iris species classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1394_IRIS-flower-dataset.csv"
  feature_descriptions:
    SepalLengthCm: "Sepal length in cm"
    SepalWidthCm: "Sepal width in cm"
    PetalLengthCm: "Petal length in cm"
    PetalWidthCm: "Petal width in cm"
    Species: "Target class: species name"

1400_iriiiiiis:
  dataset_description: "Iris Flower Measurements (Alternate Format): Same data as classic iris dataset with different naming."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1400_iriiiiiis.csv"
  feature_descriptions:
    Sepal.Length: "Sepal length in cm"
    Sepal.Width: "Sepal width in cm"
    Petal.Length: "Petal length in cm"
    Petal.Width: "Petal width in cm"
    Species: "Target class: iris species"
1402_iris_test:
  dataset_description: "Test Iris Dataset: Morphological measurements for iris species classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1402_iris_test.csv"
  feature_descriptions:
    Sepal.Length: "Sepal length in cm"
    Sepal.Width: "Sepal width in cm"
    Petal.Length: "Petal length in cm"
    Petal.Width: "Petal width in cm"
    Species: "Iris species (target)"

1406_law-school-admission-binary:
  dataset_description: "Law School Admission (Binary): Academic attributes and demographic data for bar-pass prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1406_law-school-admission-bianry.csv"
  feature_descriptions:
    age: "Applicant age"
    decile1: "First-law GPA decile ranking"
    decile3: "Third-year GPA decile"
    fam_inc: "Family income bracket"
    lsat: "LSAT score"
    ugpa: "Undergraduate GPA"
    gender: "Applicant gender"
    race1: "Race classification"
    cluster: "Clustering segment identifier"
    fulltime: "Full-time attendance flag"
    bar: "Bar exam pass (1 = yes, 0 = no)"
    ugpagt3: "Undergraduate GPA at time 3 (follow-up)"

1407_autism_adult:
  dataset_description: "Autism in Adults Screening Dataset: Adult screening questions and demographic predictors."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1407_autism-adult-data.csv"
  feature_descriptions:
    A1_Score to A10_Score: "Responses to screening questions"
    age: "Age of individual"
    gender: "Gender"
    ethnicity: "Ethnic background"
    jundice: "History of jaundice (yes/No)"
    austim: "Autism diagnosis history"
    country_of_res: "Country of residence"
    used_app_before: "Used screening app before (yes/no)"
    result: "Screening test result"
    age_desc: "Age category description"
    relation: "Relation to evaluator"
    Class/ASD: "Final classification (ASD or not)"

1408_national_longitudinal_binary:
  dataset_description: "National Longitudinal Survey (binary outcomes): Youth demographic and health markers."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1408_national-longitudinal-survey-binary.csv"
  feature_descriptions:
    age: "Age in years"
    race: "Race"
    gender: "Gender"
    grade90: "1990 grade level"
    income96: "1996 income"
    partner: "Partner status"
    height: "Height in cm"
    weight: "Weight in kg"
    famsize: "Family size"
    genhealth: "General health rating"
    illegalact: "Engaged in illegal acts (yes/no)"
    charged: "Charged with crime (yes/no)"
    jobsnum90: "Number of jobs in 1990"
    afqt89: "AFQT score in 1989"
    typejob90: "Job type in 1990"
    jobtrain90: "Job training received in 1990"
    income96gt17: "Income >17K in 1996 (yes/no)"

1409_CMC_data:
  dataset_description: "Contraceptive Method Choice Data: Demographic details impacting contraceptive method use."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1409_CMC_Data.csv"
  feature_descriptions:
    Wife_age: "Age of wife"
    Wifes_education: "Education level of the wife"
    Husbands_education: "Education level of the husband"
    Number_of_children_ever_born: "Number of children born"
    Wifes_religion: "Wife’s religion"
    Wifes_now_working: "Wife currently working (yes/no)"
    Husbands_occupation: "Occupation type"
    Standard-of-living_index: "Living standard index"
    Media_exposure: "Access to media"
    Contraceptive_method_used: "Type of contraceptive used (target)"

1411_dgf_test:
  dataset_description: "DGF Public Transit Test: Daily passenger counts per transit line."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1411_dgf_test.csv"
  feature_descriptions:
    date: "Date in YYYY-MM-DD"
    date_annees: "Date with year format"
    type-de-ligne: "Type of transit line"
    nom-de-la-ligne: "Line name"
    nombre-de-voyages: "Number of trips"

1412_absenteeism_at_work:
  dataset_description: "Absenteeism at Work: Employee data and reasons for absenteeism."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1412_absenteeism-at-work.csv"
  feature_descriptions:
    ID: "Employee ID"
    Reason_for_absence: "Reason code for absence"
    Month_of_absence: "Month number"
    Day_of_the_week: "Day of the week"
    Seasons: "Season of the year"
    Transportation_expense: "Transportation cost"
    Distance_from_Residence_to_Work: "Distance in km"
    Service_time: "Length of employment"
    Age: "Age in years"
    Work_load_Average/day_: "Average daily workload"
    Hit_target: "Hit performance target?"
    Disciplinary_failure: "Disciplinary failure record"
    Education: "Education level"
    Son: "Number of sons"
    Social_drinker: "Drinks socially?"
    Social_smoker: "Smokes socially?"
    Pet: "Has a pet?"
    Weight: "Weight in kg"
    Height: "Height in cm"
    Body_mass_index: "BMI value"
    Absenteeism_time_in_hours: "Hours absent"

1412_ibm_employee_attrition:
  dataset_description: "IBM Employee Attrition: HR-driven attrition assessment dataset."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1412_ibm-employee-attrition.csv"
  feature_descriptions:
    Age: "Employee age"
    Attrition: "Attrition flag (yes/no)"
    BusinessTravel: "Business travel frequency"
    DailyRate: "Daily rate (USD)"
    Department: "Employee department"
    DistanceFromHome: "Distance to home (km)"
    Education: "Education level"
    EducationField: "Field of education"
    EmployeeCount: "Always 1"
    EmployeeNumber: "Employee ID"
    EnvironmentSatisfaction: "Satisfaction rating"
    Gender: "Gender"
    HourlyRate: "Hourly rate"
    JobInvolvement: "Involvement level"
    JobLevel: "Job level"
    JobRole: "Job role designation"
    JobSatisfaction: "Satisfaction rating"
    MaritalStatus: "Marital status"
    MonthlyIncome: "Monthly income"
    MonthlyRate: "Monthly rate"
    NumCompaniesWorked: "Number of companies worked for"
    Over18: "Age over 18 flag"
    OverTime: "Works overtime?"
    PercentSalaryHike: "Salary hike percent"
    PerformanceRating: "Performance rating"
    RelationshipSatisfaction: "Satisfaction rating"
    StandardHours: "Standard working hours"
    StockOptionLevel: "Stock options level"
    TotalWorkingYears: "Total work experience"
    TrainingTimesLastYear: "Training frequency last year"
    WorkLifeBalance: "Balance rating"
    YearsAtCompany: "Years at company"
    YearsInCurrentRole: "Years in current position"
    YearsSinceLastPromotion: "Years since last promotion"
    YearsWithCurrManager: "Years with current manager"

1413_shill_bidding:
  dataset_description: "Shill Bidding Data: Auction records with bid behavior indicators."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1413_shill-bidding.csv"
  feature_descriptions:
    Auction_ID: "Auction identifier"
    Bidder_ID: "Bidder identifier"
    Bidder_Tendency: "Bidder’s tendency metric"
    Bidding_Ratio: "Bidder ratio of bids"
    Successive_Outbidding: "Number of successive outbids"
    Last_Bidding: "Time of last bid"
    Auction_Bids: "Total bids in auction"
    Starting_Price_Average: "Average starting price"
    Early_Bidding: "Proportion of early bids"
    Winning_Ratio: "Bidder winning rate"
    Auction_Duration: "Duration of auction"
    Class: "Target: shill (1) or not (0)"

1414_AI4I2020:
  dataset_description: "AI4I 2020 Predictive Maintenance: Sensor readings to predict machine failure."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1414_AI4I2020.csv"
  feature_descriptions:
    Product ID: "Machine product identifier"
    Type: "Machine type"
    Air temperature [K]: "Air temperature input"
    Process temperature [K]: "Process temperature input"
    Rotational speed [rpm]: "Rotational speed reading"
    Torque [Nm]: "Torque measurement"
    Tool wear [min]: "Tool wear in minutes"
    Machine failure: "Failure event flag"
    TWF: "Failure reason: Tool wear failure"
    HDF: "Failure reason: Heat dissipation failure"
    PWF: "Power failure"
    OSF: "Overstrain failure"
    RNF: "Random failure"

1415_beijing_pm25:
  dataset_description: "Beijing PM2.5 Pollution: Meteorological and pollutant levels by hour."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1415_beijing-pm2.5.csv"
  feature_descriptions:
    year: "Year"
    month: "Month"
    day: "Day"
    hour: "Hour"
    pm2.5: "PM2.5 concentration"
    DEWP: "Dew point"
    TEMP: "Temperature"
    PRES: "Pressure"
    cbwd: "Wind direction"
    Iws: "Wind speed"
    Is: "Snow presence indicator"
    Ir: "Rain presence indicator"

1416_law_school_admission_binary_v2:
  dataset_description: "Duplicate Law School Admission binary dataset"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1416_law-school-admission-bianry.csv"
  feature_descriptions: *same as 1406*

1417_ibm_employee_performance:
  dataset_description: "IBM Employee Performance: HR data used for predicting performance rating"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1417_ibm-employee-performance.csv"
  feature_descriptions: *same as 1412_employee_attrition except missing Attrition, same columns*

1418_behavior_urban_traffic:
  dataset_description: "Urban Traffic Incidents: Counts of specific incident types per hour"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1418_behavior-urban-traffic.csv"
  feature_descriptions:
    Hour: "Hour of day"
    Immobilized_bus: "Count of immobilized buses"
    Broken_Truck: "Count of broken trucks"
    Vehicle_excess: "Vehicles exceeding limit"
    Accident_victim: "Accidents involving victims"
    Running_over: "Instances of being run over"
    Fire_vehicles: "Vehicle-related fires"
    Occurrence_involving_freight: "Freight-related incidents"
    Incident_involving_dangerous_freight: "Hazardous freight incidents"
    Lack_of_electricity: "Power-outage occurrences"
    Fire: "Fire incidents"
    Point_of_flooding: "Flooding occurrences"
    Manifestations: "Public demonstrations"
    Defect_in_network_of_trolleybuses: "Trolleybus network defects"
    Tree_on_the_road: "Trees blocking roads"
    Semaphore_off: "Traffic light outages"
    Intermittent_Semaphore: "Intermittent traffic signals"
    Slowness_in_traffic_percent: "Percent slowdown due to incidents"

1419_bias_correction:
  dataset_description: "Bias Correction Dataset: Observed vs modelled daily Tmax/Tmin plus terrain and weather"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1419_bias-correction.csv"
  feature_descriptions:
    station: "Station identifier"
    Date: "Observation date"
    Present_Tmax/Tmin: "Observed max/min temperature"
    LDAPS_*: "Model forecasts for humidity, temperature, wind speed, cloud cover, precipitation"
    lat/lon: "Station latitude/longitude"
    DEM: "Digital elevation model"
    Slope: "Terrain slope"
    Solar radiation: "Incoming solar radiation"
    Next_Tmax/Tmin: "Next-day observed max/min temp"

1420_burst_header_packet:
  dataset_description: "Network Packet Header Burst Dataset: Packet usage and loss metrics"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1420_burst-header-packet.csv"
  feature_descriptions:
    Node: "Network node identifier"
    Utilised Bandwith Rate: "Utilized bandwidth ratio"
    Packet Drop Rate: "Packet drop ratio"
    Full_Bandwidth: "Total available bandwidth"
    Average_Delay_Time_Per_Sec: "Avg network delay (s)"
    Percentage_Of_Lost_Packet/Byte Rate: "Packet/byte loss percentage"
    Packet Received/Transmitted/Lost: "Counts of packet events"
    of Used_Bandwidth: "Percentage of used bandwidth"
    Lost_Bandwidth: "Lost bandwidth volume"
    Packet Size_Byte: "Packet size in bytes"
    10-Run-AVG-* metrics: "10-sample running averages of drop rate, bandwidth usage, and delay"
    Node Status: "Operational status of node"
    Flood Status: "Flood condition indicator"
    Class: "Target classification label"

1421_breast_cancer_coimbra:
  dataset_description: "Breast Cancer Coimbra: Clinical and biochemical markers for cancer classification"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1421_breast-cancer-coimbra.csv"
  feature_descriptions:
    Age: "Age in years"
    BMI: "Body Mass Index"
    Glucose: "Blood glucose level"
    Insulin: "Serum insulin"
    HOMA: "Homeostatic Model Assessment"
    Leptin: "Serum leptin level"
    Adiponectin: "Serum adiponectin"
    Resistin: "Serum resistin level"
    MCP.1: "Monocyte chemoattractant protein-1"
    Classification: "Target: Cancer presence (yes/no)"

1423_youtube_spam_psy:
  dataset_description: "YouTube Spam Detection (PSY channel): Comments and spam classification"
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1423_youtube-spam-psy.csv"
  feature_descriptions:
    AUTHOR: "Comment author"
    DATE: "Date of comment"
    CONTENT: "Text content of comment"
    CLASS: "Spam (1) or not spam (0)"
1426_airfoil_self_noise:
  dataset_description: "Airfoil Self-Noise Dataset: Predicts aerodynamic sound from airfoil shape and motion parameters."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1426_airfoil_self_noise.csv"
  feature_descriptions:
    frequency: "Frequency in Hz at which sound was measured"
    angle: "Angle of attack in degrees"
    length: "Chord length of airfoil in meters"
    velocity: "Free-stream velocity in m/s"
    thickness: "Suction side displacement thickness in meters"
    pressure: "Sound pressure level (target)"

1426_youtube_spam_shakira:
  dataset_description: "YouTube Spam Comments (Shakira): Comment metadata for spam detection."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1426_youtube-spam-shakira.csv"
  feature_descriptions:
    COMMENT_ID: "Unique comment ID"
    AUTHOR: "Comment author's username"
    DATE: "Timestamp of the comment"
    CONTENT: "Text content of the comment"
    CLASS: "Spam indicator (1 = spam, 0 = not spam)"

1431_Beijing_Multi_Site_Air_Quality:
  dataset_description: "Beijing Multi-Site Air Quality: Pollutant levels recorded at multiple sites in Beijing."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1431_Beijing-Multi-Site-Air-Quality.csv"
  feature_descriptions:
    No: "Measurement record number"
    year: "Year of observation"
    month: "Month"
    day: "Day"
    hour: "Hour"
    PM2.5: "Particulate Matter <2.5 µm"
    PM10: "Particulate Matter <10 µm"
    SO2: "Sulfur dioxide concentration"
    NO2: "Nitrogen dioxide concentration"
    CO: "Carbon monoxide concentration"
    O3: "Ozone concentration"
    TEMP: "Temperature in °C"
    PRES: "Atmospheric pressure in hPa"
    DEWP: "Dew point temperature"
    RAIN: "Rainfall in mm"
    wd: "Wind direction"
    WSPM: "Wind speed in m/s"
    station: "Monitoring station identifier"

1432_parking_birmingham:
  dataset_description: "Parking Birmingham: Availability and capacity data from Birmingham parking lots."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1432_parking-birmingham.csv"
  feature_descriptions:
    SystemCodeNumber: "Unique ID for parking location"
    Capacity: "Total number of spaces"
    Occupancy: "Number of occupied spaces"
    LastUpdated: "Last update timestamp"

1439_Heart_disease_classification:
  dataset_description: "Heart Disease Classification: Medical attributes for heart disease diagnosis."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1439_Heart_disease_classification.csv"
  feature_descriptions:
    age: "Age of patient"
    sex: "Gender (1 = male, 0 = female)"
    cp: "Chest pain type"
    trestpbs: "Resting blood pressure"
    chol: "Serum cholesterol in mg/dl"
    fbs: "Fasting blood sugar > 120 mg/dl"
    restecg: "Resting electrocardiographic results"
    thalach: "Maximum heart rate achieved"
    exang: "Exercise-induced angina"
    oldpeak: "ST depression induced by exercise"
    slope: "Slope of peak exercise ST segment"
    ca: "Number of major vessels colored by fluoroscopy"
    thal: "Thalassemia"
    diag: "Presence of heart disease (target)"

1449_garments_worker_productivity:
  dataset_description: "Garment Worker Productivity: Daily work metrics across teams and departments."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1449_garments-worker-productivity.csv"
  feature_descriptions:
    date: "Date of observation"
    quarter: "Quarter of the year"
    department: "Department name (sewing/finishing)"
    day: "Day of the week"
    team: "Team identifier"
    targeted_productivity: "Management-assigned productivity target"
    smv: "Standard Minute Value"
    wip: "Work in progress"
    over_time: "Overtime in minutes"
    incentive: "Incentive value"
    idle_time: "Idle time in minutes"
    idle_men: "Number of idle workers"
    no_of_style_change: "Count of style changes"
    no_of_workers: "Number of workers"
    actual_productivity: "Observed productivity"

1450_online_shoppers_intention:
  dataset_description: "Online Shoppers Purchase Intention: Session-based metrics to predict purchase likelihood."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1450_online-shoppers-intention.csv"
  feature_descriptions:
    Administrative: "Number of administrative pages visited"
    Administrative_Duration: "Time spent on admin pages"
    Informational: "Number of informational pages visited"
    Informational_Duration: "Time spent on informational pages"
    ProductRelated: "Number of product-related pages"
    ProductRelated_Duration: "Time spent on product-related pages"
    BounceRates: "Bounce rate of the session"
    ExitRates: "Exit rate of the session"
    PageValues: "Page value metric"
    SpecialDay: "Proximity to special day (0–1 scale)"
    Month: "Month of visit"
    OperatingSystems: "Operating system used"
    Browser: "Browser used"
    Region: "Geographic region"
    TrafficType: "Source traffic type"
    VisitorType: "Returning or new visitor"
    Weekend: "Visit occurred on weekend?"
    Revenue: "Purchase made (target)"

1451_early_stage_diabetes:
  dataset_description: "Early Stage Diabetes Risk Prediction: Symptom-based binary classification dataset."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1451_early-stage-diabetes.csv"
  feature_descriptions:
    Age: "Patient's age"
    Gender: "Gender"
    Polyuria: "Excessive urination"
    Polydipsia: "Excessive thirst"
    sudden weight loss: "Recent weight loss"
    weakness: "Physical weakness"
    Polyphagia: "Increased hunger"
    Genital thrush: "Fungal infection"
    visual blurring: "Blurry vision"
    Itching: "Skin irritation"
    Irritability: "Mood irritability"
    delayed healing: "Delayed wound healing"
    partial paresis: "Muscle weakness"
    muscle stiffness: "Stiff muscles"
    Alopecia: "Hair loss"
    Obesity: "Obesity status"
    class: "Diabetes (Positive/Negative)"

1452_energy_efficiency:
  dataset_description: "Energy Efficiency Dataset: Building design parameters to predict heating load."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1452_energy_efficiency.csv"
  feature_descriptions:
    relative_compactness: "Compactness ratio of building"
    surface_area: "Total exterior surface area"
    wall_area: "Area of walls"
    roof_area: "Area of the roof"
    overall_height: "Building height"
    orientation: "Cardinal orientation (categorical)"
    glazing_area: "Window-to-wall ratio"
    glazing_area_distribution: "Window distribution (categorical)"
    heating_load: "Heating energy consumption (target)"

1452_gender_by_name:
  dataset_description: "Gender by Name Dataset: Frequency and probability of gender per name."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1452_gender-by-name.csv"
  feature_descriptions:
    Name: "First name"
    Gender: "Associated gender"
    Count: "Number of occurrences"
    Probability: "Probability of gender association"

1453_metro_interstate_traffic:
  dataset_description: "Metro Interstate Traffic: Weather and time-based traffic volume predictions."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1453_metro-interstate-traffic.csv"
  feature_descriptions:
    holiday: "Is it a holiday?"
    temp: "Temperature in Kelvin"
    rain_1h: "Rainfall in last hour"
    snow_1h: "Snowfall in last hour"
    clouds_all: "Cloud coverage percentage"
    weather_main: "Main weather condition"
    weather_description: "Detailed weather description"
    date_time: "Timestamp"
    traffic_volume: "Traffic volume (target)"

1458_kdd_ipums_la_97_small:
  dataset_description: "KDD IPUMS LA 97: Household and individual data for income prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1458_kdd_ipums_la_97-small.csv"
  feature_descriptions:
    value: "Census value code"
    rent: "Monthly rent"
    ftotinc: "Family total income"
    momloc: "Mother’s location code"
    famsize: "Family size"
    nchild: "Number of children"
    eldch: "Elderly children"
    yngch: "Young children"
    nsibs: "Number of siblings"
    age: "Age in years"
    occscore: "Occupation score"
    sei: "Socioeconomic index"
    inctot: "Total individual income"
    incwage: "Wage income"
    incbus: "Business income"
    incfarm: "Farming income"
    incss: "Social security income"
    incwelfr: "Welfare income"
    incother: "Other income sources"
    poverty: "Poverty level"
    binaryClass: "Binary income classification"

1459_MagicTelescope:
  dataset_description: "MAGIC Gamma Telescope: Particle property data to classify gamma vs hadron events."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1459_MagicTelescope.csv"
  feature_descriptions:
    fLength:: "Major axis length of ellipse"
    fWidth:: "Minor axis width"
    fSize:: "Size (total counts)"
    fConc:: "Ratio of brightest pixels"
    fConc1:: "Concentration of single pixel"
    fAsym:: "Asymmetry"
    fM3Long:: "3rd moment along major axis"
    fM3Trans:: "3rd moment along minor axis"
    fAlpha:: "Angle between major axis and line to origin"
    fDist:: "Distance from origin"
    class: "Gamma or Hadron event"

1461_heart_failure:
  dataset_description: "Heart Failure Clinical Records: Predicting death events in heart failure patients."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1461_heart-failure.csv"
  feature_descriptions:
    age: "Patient age"
    anaemia: "Anemia presence (1 = yes)"
    creatinine_phosphokinase: "CPK enzyme level"
    diabetes: "Diabetes presence"
    ejection_fraction: "Percentage of blood leaving heart"
    high_blood_pressure: "Hypertension status"
    platelets: "Platelet count"
    serum_creatinine: "Creatinine level in blood"
    serum_sodium: "Sodium level in blood"
    sex: "Biological sex"
    smoking: "Smoking history"
    time: "Follow-up period in days"
    DEATH_EVENT: "Target: patient died (1 = yes)"
1462_hcv_data:
  dataset_description: "HCV (Hepatitis C Virus) Dataset: Clinical measurements for diagnosis support."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1462_hcv-data.csv"
  feature_descriptions:
    Category: "Health status category (e.g., Blood Donor, Suspect)"
    Age: "Age of the individual"
    Sex: "Biological sex"
    ALB: "Albumin level in g/dL"
    ALP: "Alkaline Phosphatase enzyme level"
    ALT: "Alanine Transaminase enzyme level"
    AST: "Aspartate Transaminase enzyme level"
    BIL: "Bilirubin level"
    CHE: "Cholinesterase enzyme level"
    CHOL: "Cholesterol level"
    CREA: "Creatinine level"
    GGT: "Gamma-Glutamyl Transferase enzyme level"
    PROT: "Total protein level"

1464_dow_jones_index:
  dataset_description: "Dow Jones Index Weekly Forecast: Historical index and feature series for prediction."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1464_dow-jones-index.csv"
  feature_descriptions:
    quarter: "Fiscal quarter of data"
    stock: "Market index (e.g., DJI)"
    date: "Date of the record"
    open: "Opening index value"
    high: "Highest index value of the day"
    low: "Lowest index value"
    close: "Closing index value"
    volume: "Trading volume"
    percent_change_price: "Percent change in price"
    percent_change_volume_over_last_wk: "7‑day volume percent change"
    previous_weeks_volume: "Volume of previous week"
    next_weeks_open: "Next week's opening price"
    next_weeks_close: "Next week's closing price (target)"
    percent_change_next_weeks_price: "Next-week price percent change"
    days_to_next_dividend: "Days until next dividend"
    percent_return_next_dividend: "Dividend return percentage"

1465_credit:
  dataset_description: "Credit Default Prediction: Client financial and repayment history."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1465_credit.csv"
  feature_descriptions:
    RevolvingUtilizationOfUnsecuredLines: "Utilization rate of unsecured credit"
    age: "Age in years"
    NumberOfTime30-59DaysPastDueNotWorse: "30–59 days past due count"
    DebtRatio: "Debt-to-income ratio"
    MonthlyIncome: "Monthly income"
    NumberOfOpenCreditLinesAndLoans: "Number of open loans"
    NumberOfTimes90DaysLate: "90+ days late payment count"
    NumberRealEstateLoansOrLines: "Home loan count"
    NumberOfTime60-89DaysPastDueNotWorse: "60–89 days past due count"
    NumberOfDependents: "Number of dependents"
    SeriousDlqin2yrs: "Serious delinquency within 2 years (target)"

1465_internet_firewall:
  dataset_description: "Internet Firewall Logs: Packet and session data for network security usage."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1465_internet-firewall.csv"
  feature_descriptions:
    Source Port: "Client port number"
    Destination Port: "Server port number"
    NAT Source Port: "NAT client port"
    NAT Destination Port: "NAT server port"
    Action: "Action taken (allowed/blocked)"
    Bytes: "Total bytes transferred"
    Bytes Sent: "Bytes sent from source"
    Bytes Received: "Bytes received by source"
    Packets: "Total number of packets"
    Elapsed Time (sec): "Session duration"
    pkts_sent: "Packets sent"
    pkts_received: "Packets received"

1466_post_operative:
  dataset_description: "Post-operative Patient Data: Similar schema, likely outcome evaluation."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1466_post-operative.csv"
  feature_descriptions: *same as internet_firewall*

1478_BMC_TrainingData:
  dataset_description: "Nepal Building Damage Data (BMC): Structural attributes and damage categories post-earthquake."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1478_BMC_TrainingData.csv"
  feature_descriptions:
    geo_level_1_id to geo_level_3_id: "Administrative region levels"
    count_floors_pre_eq: "Number of floors before earthquake"
    age: "Building age"
    area_percentage: "Built-up area ratio"
    height_percentage: "Height coverage percentage"
    land_surface_condition: "Land surface classification"
    foundation_type: "Building foundation type"
    roof_type: "Roof construction type"
    ground_floor_type: "Ground floor material"
    other_floor_type: "Other floors material"
    position: "Building position"
    plan_configuration: "Plan layout configuration"
    has_superstructure_...: "Presence of various superstructure types"
    legal_ownership_status: "Ownership status"
    count_families: "Number of households"
    has_secondary_use…: "Secondary building usage flags"
    category: "Damage grade category (target)"

1480_road_safety:
  dataset_description: "Road Accident Data (UK): Detailed casualty, vehicle, and accident metadata."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1480_road_safety.csv"
  feature_descriptions:
    Location_Easting_OSGR/Northing: "Grid coordinates"
    Longitude/Latitude: "Geographic coordinates"
    Police_Force: "Police authority code"
    Accident_Severity: "Severity classification"
    Number_of_Vehicles/Casualties: "Counts of vehicles and casualties"
    Date, Day_of_Week, Time: "Temporal details"
    Local_Authority: "Local authority codes"
    Road_Class, Road_Number, Road_Type: "Road specification"
    Speed_limit: "Speed limit km/h"
    Junction_Detail/Control: "Junction type/control"
    Pedestrian_Crossing…: "Crossing facility details"
    Light/Weather/Road_Surface_Conditions: "Environmental conditions"
    Special_Conditions_at_Site: "Special conditions at accident site"
    Carriageway_Hazards: "Hazards on carriageway"
    Urban_or_Rural_Area: "Area classification"
    Did_Police_Officer_Attend…: "Presence of police at scene"
    LSOA_of_Accident_Location: "Area socioeconomic status"
    Vehicle_Reference…Driver variables: "Details per vehicle/casualty/driver"

1482_MIP_2016_regression:
  dataset_description: "Meta features from MIP solver runs for regression modeling."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1482_MIP-2016-regression.csv"
  feature_descriptions:
    probtype, n_vars…runstatus: "Solver configuration, instance size, solver outcomes, time and cut metrics"

1487_tuiter:
  dataset_description: "Twitter-like Social Data (Spanish): Post metadata, topic and sentiment mapping."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1487_tuiter.csv"
  feature_descriptions:
    CUENTA, MES, topico, Sexo, Tier, Multimedia, Tipo, V9: "User and post metadata"
    numero: "Post number"
    funcion: "User function"
    sentimiento: "Sentiment classification"
    URLscitadas/X entries: "Count of cited URLs"
    tipo_web: "Web type interacted"
    *_n variants: "Encoded numeric variables"
    @#_n: "Hashtag count"
  
1494_ChronicKidneyDisease:
  dataset_description: "Chronic Kidney Disease Dataset: Clinical variables for predicting CKD presence."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1494_ChonicKidneyDisease.csv"
  feature_descriptions:
    Age, Sex, Blood_Pressure: "Basic patient info"
    Specific_Gravity to Bacteria: "Urinalysis markers"
    Blood_Gluc_rand: "Random blood glucose"
    Blood_Urea, Creatinine, Sodium, Potassium: "Blood chemistry"
    Hemoglobin: "Hemoglobin level"
    Packed_Cell_Volume: "PCV %"
    Wbc_cnt / Rbc_cnt: "White/red cell counts"
    ACR: "Albumin-to-creatinine ratio"
    Hypertension, Diabetes, CAD: "Comorbidity flags"
    Apetite (Appetite), Pedal_edema, Anemia: "Physical condition markers"
    Survival: "Patient survival flag (target)"

1495_movies:
  dataset_description: "Movie Metadata: Genres, cast, crew, and box office information."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1495_movies.csv"
  feature_descriptions:
    genres: "Genre list"
    id: "Movie identifier"
    keywords: "Associated keywords"
    original_title: "Original title"
    release_date: "Release date"
    revenue: "Box-office revenue"
    status: "Film status"
    title: "Localized title"
    cast: "Cast list"
    director: "Director name"

1496_red_wine1:
  dataset_description: "Red Wine Analytical Dataset: Chemical features with wine type classification."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1496_Red_wine1.csv"
  feature_descriptions:
    citric acid: "Citric acid content"
    sulphates: "Sulphate concentration"
    alcohol: "Alcohol percentage"
    type: "Wine type label (target)"

1499_red_wine:
  dataset_description: "Red Wine Quality Dataset: Chemistry measurements and type."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1499_red_wine.csv"
  feature_descriptions: *same as Red_wine1*

1500_Porto_Seguro_Safe_Driver:
  dataset_description: "Porto Seguro Driver Data (lightGBM solution): Selected features for risk modeling."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1500_Porto_Seguro_Safe_Driver_Prediction_2nd_Place_Lightgbm_Solution.csv"
  feature_descriptions:
    ps_ind_01 etc.: "Selected feature indices and counts used in LightGBM solution"
    Cat_0…Cat_183: "Encoded categorical feature aggregates"
    target: "Risk prediction target variable"

1503_mytestdataset:
  dataset_description: "Custom Phobia Dataset: User self-report data on phobias and demographics."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1503_mytestdataset.csv"
  feature_descriptions:
    age: "Age in years"
    weight: "Weight in kg"
    Height: "Height in cm"
    Sex: "Gender"
    ID, ID_test: "User identifiers"
    AgoraPhobia…Exercice: "Self-reported phobia indicators"
    Exercice: "Exercise habit"

1504_car_info:
  dataset_description: "Car Evaluation Dataset: Purchase evaluation attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1504_car_info.csv"
  feature_descriptions:
    buying, maintenance: "Buying and maintenance cost"
    doors: "Number of doors"
    person: "Passenger capacity"
    lug_boot: "Luggage capacity"
    safety: "Safety rating"
    class: "Car acceptability classification"

1507_QSAR_fish_toxicity:
  dataset_description: "QSAR Toxicity Prediction: Molecular descriptors to estimate fish LC50."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1507_QSAR_fish_toxycity.csv"
  feature_descriptions:
    CIC0, SM1_Dz, GATS1i, NdsCH, NdssC, MLOGP: "Molecular descriptors"
    LC50: "Median lethal concentration (target)"

1508_student_performance_por:
  dataset_description: "Portuguese Student Performance: Academic features to predict final grade."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1508_student_performance_por.csv"
  feature_descriptions:
    school…absences: "Identical to earlier student performance dataset"
    G3: "Final grade (target)"
1509_california:
  dataset_description: "California Housing Prices: Census-derived housing attributes and target price."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1509_california.csv"
  feature_descriptions:
    MedInc: "Median income in block group"
    HouseAge: "Median house age in block group"
    AveRooms: "Average number of rooms per household"
    AveBedrms: "Average number of bedrooms per household"
    Population: "Block group population"
    AveOccup: "Average household size"
    Latitude: "Block group latitude"
    Longitude: "Block group longitude"
    price: "Median house price (target)"

1509_coronary_dataset:
  dataset_description: "Coronary Health Data: Clinical and work-related risk factors."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1509_coronary_dataset.csv"
  feature_descriptions:
    Smoking: "Smoking status (0/1)"
    M. Work: "Mental workload level"
    P. Work: "Physical workload level"
    Pressure: "Blood pressure"
    Proteins: "Blood protein marker"
    Family: "Family history of coronary disease"

1510_fifa:
  dataset_description: "FIFA Player Details: Personal and financial attributes."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1510_fifa.csv"
  feature_descriptions:
    age: "Player age"
    height_cm: "Height in cm"
    weight_kg: "Weight in kg"
    release_clause_eur: "Release clause value in euros"
    club_joined: "Year joined current club"
    wage_eur: "Current weekly wage in euros"

1511_electricity:
  dataset_description: "Australian Electricity Market: Price and demand time series data."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1511_electricity.csv"
  feature_descriptions:
    date: "Date of record"
    day: "Day of week"
    period: "Half-hour period of day"
    nswprice: "Price in NSW region"
    nswdemand: "Demand in NSW region"
    vicprice: "Price in Victoria region"
    vicdemand: "Demand in Victoria region"
    transfer: "Net electricity transfer"
    class: "Categorical demand bin (target)"

1512_eye_movements:
  dataset_description: "Eye-Tracking Reading Data: Saccade and fixation metrics per word."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1512_eye_movements.csv"
  feature_descriptions:
    assgNo: "Reading assignment number"
    P1stFixation: "Previous word first-fixation status"
    P2stFixation: "Two-words-back first-fixation status"
    prevFixDur: "Previous fixation duration (ms)"
    firstfixDur: "Duration of first fixation on target (ms)"
    firstPassFixDur: "First-pass sum of fixations (ms)"
    nextFixDur: "Fixation duration on next word (ms)"
    firstSaccLen: "Length of first saccade"
    lastSaccLen: "Length of previous saccade"
    prevFixPos: "Position of previous fixation"
    landingPos: "Landing position of saccade"
    leavingPos: "Position from which saccade left"
    totalFixDur: "Total fixation duration on word"
    meanFixDur: "Mean fixation per word"
    regressLen: "Regression saccade length"
    nextWordRegress: "Flag if saccade regressed to next word"
    regressDur: "Duration of regression fixation"
    pupilDiamMax: "Maximum pupil diameter"
    pupilDiamLag: "Lagged pupil diameter"
    timePrtctg: "Time to pre-target fixation"
    titleNo: "Text title identifier"
    wordNo: "Word position number"
    label: "Reading difficulty label (target)"

1515_sells:
  dataset_description: "Sales Trend Data (‘sells’): Activity metrics across time."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1515_sells.csv"
  feature_descriptions:
    activity: "Sales activity type"
    month_progress: "Progression in the month"
    no_holiday: "Number of holidays"
    day_of_week: "Day of week"
    attribaverage_amountute_4: "Average attribute metric (ambiguous)"

1517_33:
  dataset_description: "Online Class Experience Survey (‘33’): Student responses about e-learning."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1517_33.csv"
  feature_descriptions:
    Level of study?: "Highest level of education"
    Age?: "Student's age bracket"
    Used smartphone/computer/laptop previously before online class?: "Prior device familiarity"
    Result increased after online education (comparatively)?: "Perceived improvement in results"
    Knowledge increased after online education (comparatively)?: "Perceived improvement in knowledge"
    Happy with online education?: "Satisfaction level"
    Education Institute Area?: "Area of educational institute"
    Have Internet availability?: "Internet access availability"
    Broadband / Mobile Internet?: "Type of internet access"
    Total hours of study before online education?: "Study hours before"
    Total hours of study after online education?: "Study hours after"
    Class performance increased in online education?: "Self-reported performance improvement"
    Institute Type: "Type of institute attended"
    Current location (During Study) ?: "Study location"
    Gender: "Student gender"
    Faced any issue with online class?: "Reported issues"
    Preferred device for an online course: "Device preference"
1527_1:
  dataset_description: "Student Academic Scores (‘1’): Demographics and standardized test scores."
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1527_1.csv"
  feature_descriptions:
    gender: "Student gender"
    race/ethnicity: "Student race/ethnicity"
    parental level of education: "Parents' education level"
    lunch: "Lunch type"
    test preparing course: "Test preparation course participation"
    math score: "Math exam score"
    reading score: "Reading exam score"
    writing score: "Writing exam score"
1552_CompleteCryptocurrencyMarketHistory:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1552_CompleteCryptocurrencyMarketHistory.csv"
  dataset_description: "Complete history of various cryptocurrencies including price and volume metrics."
  feature_descriptions:
    Unnamed:_0: "Index column"
    Date: "Date of record"
    Symbol: "Cryptocurrency symbol"
    Open: "Opening price"
    High: "Highest price"
    Low: "Lowest price"
    Close: "Closing price"
    Volume: "Trade volume"
    Market_Cap: "Market capitalization"

1553_DiabetesDataset2019:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1553_DiabetesDataset2019.csv"
  dataset_description: "Health and lifestyle survey data related to diabetes occurrence."
  feature_descriptions:
    Age: "Age of the person"
    Gender: "Gender"
    Family_Diabetes: "Family history of diabetes"
    highBP: "Presence of high blood pressure"
    PhysicallyActive: "Level of physical activity"
    BMI: "Body Mass Index"
    Smoking: "Smoking habit"
    Alcohol: "Alcohol consumption"
    Sleep: "Total sleep hours"
    SoundSleep: "Hours of sound sleep"
    RegularMedicine: "Usage of regular medication"
    JunkFood: "Frequency of junk food consumption"
    Stress: "Level of stress"
    BPLevel: "Blood pressure level"
    Pregancies: "Number of pregnancies"
    Pdiabetes: "Pre-diabetic condition"
    UriationFreq: "Urination frequency"
    Diabetic: "Target variable indicating diabetic status"

1555_USBreweries:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1555_USBreweries.csv"
  dataset_description: "Dataset listing breweries in the United States with type and location information."
  feature_descriptions:
    brewery_name: "Name of the brewery"
    type: "Type of brewery"
    address: "Full address of the brewery"
    website: "Brewery website"
    state: "U.S. state"
    state_breweries: "Number of breweries in the state"

1561_Complete-Cryptocurrency-Market-History:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1561_Complete-Cryptocurrency-Market-History.csv"
  dataset_description: "Duplicate or versioned copy of complete cryptocurrency market history."
  feature_descriptions:
    Unnamed:_0: "Index column"
    Date: "Date of record"
    Symbol: "Cryptocurrency symbol"
    Open: "Opening price"
    High: "Highest price"
    Low: "Lowest price"
    Close: "Closing price"
    Volume: "Trade volume"
    Market_Cap: "Market capitalization"

1563_Diabetes-Dataset-2019:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1563_Diabetes-Dataset-2019.csv"
  dataset_description: "Another version of diabetes lifestyle and health dataset."
  feature_descriptions:
    Age: "Age of the person"
    Gender: "Gender"
    Family_Diabetes: "Family history of diabetes"
    highBP: "Presence of high blood pressure"
    PhysicallyActive: "Level of physical activity"
    BMI: "Body Mass Index"
    Smoking: "Smoking habit"
    Alcohol: "Alcohol consumption"
    Sleep: "Total sleep hours"
    SoundSleep: "Hours of sound sleep"
    RegularMedicine: "Usage of regular medication"
    JunkFood: "Frequency of junk food consumption"
    Stress: "Level of stress"
    BPLevel: "Blood pressure level"
    Pregancies: "Number of pregnancies"
    Pdiabetes: "Pre-diabetic condition"
    UriationFreq: "Urination frequency"
    Diabetic: "Target variable indicating diabetic status"

1563_white_wine_quality:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1563_white_wine_quality.csv"
  dataset_description: "Physicochemical tests and quality rating of white wine samples."
  feature_descriptions:
    fixed_acidity: "Fixed acidity level"
    volatile_acidity: "Volatile acidity level"
    citric_acid: "Citric acid content"
    residual_sugar: "Amount of residual sugar"
    chlorides: "Chloride concentration"
    free_sulfur_dioxide: "Free SO2 level"
    total_sulfur_dioxide: "Total SO2 content"
    density: "Wine density"
    pH: "pH level"
    sulphates: "Sulphate concentration"
    alcohol: "Alcohol percentage"
    quality: "Sensory quality score"
path: /playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1564_Concrete.csv
dataset_description: "Concrete composition dataset used to predict compressive strength based on ingredient proportions."
feature_descriptions:
  cement: "Amount of cement in the mix (kg/m³)"
  blast_furnace_slag: "Amount of blast furnace slag (kg/m³)"
  fly_ash: "Amount of fly ash (kg/m³)"
  water: "Amount of water (kg/m³)"
  superplasticizer: "Amount of superplasticizer (kg/m³)"
  coarse_aggregate: "Amount of coarse aggregate (kg/m³)"
  fine_aggregate: "Amount of fine aggregate (kg/m³)"
  age: "Age of concrete in days"
  concrete_compressive_strength: "Concrete compressive strength (MPa)"
path: /playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1564_Mammographic-Mass-Data-Set.csv
dataset_description: "Diagnostic dataset for mammographic masses including shape, density, and severity."
feature_descriptions:
  BI-RADS: "BI-RADS assessment rating (1-5)"
  Age: "Patient's age"
  Shape: "Mass shape (round, oval, lobular, irregular)"
  Margin: "Mass margin (circumscribed, microlobulated, obscured, ill-defined, spiculated)"
  Density: "Mass density (high, iso, low, fat-containing)"
  Severity: "Diagnosis outcome (benign=0, malignant=1)"
path: /playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1567_Palmer-Penguins-Dataset-Alternative-Iris-Dataset.csv
dataset_description: "Alternative to the Iris dataset for classification, using penguin species and morphological traits."
feature_descriptions:
  species: "Species of penguin (Adelie, Gentoo, Chinstrap)"
  island: "Island where penguin was found (Biscoe, Dream, Torgersen)"
  bill_length_mm: "Bill length in millimeters"
  bill_depth_mm: "Bill depth in millimeters"
  flipper_length_mm: "Flipper length in millimeters"
  body_mass_g: "Body mass in grams"
  sex: "Sex of the penguin (male/female)"
path: /playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1552_CompleteCryptocurrencyMarketHistory.csv
dataset_description: "Historical cryptocurrency market data with open, high, low, close prices, volume, and market cap."
feature_descriptions:
  Unnamed:_0: "Index or row number"
  Date: "Date of the record"
  Symbol: "Cryptocurrency symbol (e.g., BTC, ETH)"
  Open: "Opening price on the date"
  High: "Highest price during the day"
  Low: "Lowest price during the day"
  Close: "Closing price on the date"
  Volume: "Trading volume on the date"
  Market_Cap: "Total market capitalization on the date"
path: /playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1553_DiabetesDataset2019.csv
dataset_description: "Lifestyle and medical attributes used to assess diabetes risk in individuals."
feature_descriptions:
  Age: "Age of the individual"
  Gender: "Gender of the individual"
  Family_Diabetes: "Family history of diabetes"
  highBP: "High blood pressure presence"
  PhysicallyActive: "Engagement in physical activity"
  BMI: "Body Mass Index"
  Smoking: "Smoking habits"
  Alcohol: "Alcohol consumption"
  Sleep: "Hours of sleep"
  SoundSleep: "Hours of sound sleep"
  RegularMedicine: "Usage of regular medication"
  JunkFood: "Frequency of junk food consumption"
  Stress: "Stress level"
  BPLevel: "Blood pressure level"
  Pregancies: "Number of pregnancies"
  Pdiabetes: "Pre-diabetes condition"
  UriationFreq: "Frequency of urination"
  Diabetic: "Diabetes status (Yes/No)"
path: /playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1555_USBreweries.csv
dataset_description: "Information on breweries across US states including location and type."
feature_descriptions:
  brewery_name: "Name of the brewery"
  type: "Type of brewery (e.g., micro, brewpub)"
  address: "Street address of the brewery"
  website: "Website of the brewery"
  state: "US state where the brewery is located"
  state_breweries: "Total breweries in the state"
path: /playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1561_Complete-Cryptocurrency-Market-History.csv
dataset_description: "Comprehensive cryptocurrency price data including open, close, volume, and market cap."
feature_descriptions:
  Unnamed:_0: "Index or row number"
  Date: "Date of entry"
  Symbol: "Cryptocurrency ticker symbol"
  Open: "Opening price"
  High: "Highest price"
  Low: "Lowest price"
  Close: "Closing price"
  Volume: "Daily trading volume"
  Market_Cap: "Total market capitalization"
path: /playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1563_Diabetes-Dataset-2019.csv
dataset_description: "Duplicate or alternate version of the 2019 diabetes dataset with same features."
feature_descriptions:
  Age: "Age of the individual"
  Gender: "Gender"
  Family_Diabetes: "Presence of family diabetes history"
  highBP: "High blood pressure indicator"
  PhysicallyActive: "Physical activity level"
  BMI: "Body Mass Index"
  Smoking: "Smoking status"
  Alcohol: "Alcohol consumption status"
  Sleep: "Average sleep duration"
  SoundSleep: "Average sound sleep"
  RegularMedicine: "Whether medication is taken regularly"
  JunkFood: "Frequency of eating junk food"
  Stress: "Stress levels"
  BPLevel: "Categorical blood pressure indicator"
  Pregancies: "Number of pregnancies"
  Pdiabetes: "Pre-diabetes condition"
  UriationFreq: "Urination frequency"
  Diabetic: "Diabetes status"
1569_COVID-19-World-Vaccination-Progress:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1569_COVID-19-World-Vaccination-Progress.csv"
  dataset_description: "Temperature readings from IoT devices used in the context of COVID-19 monitoring."
  feature_descriptions:
    id: "Unique identifier for the measurement"
    room_id/id: "Room or device ID"
    noted_date: "Timestamp of the temperature recording"
    temp: "Temperature value in Celsius"
    out/in: "Indicator whether reading was from outdoor or indoor"

1570_US-Breweries:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1570_US-Breweries.csv"
  dataset_description: "Database of breweries across US states with metadata including location and type."
  feature_descriptions:
    brewery_name: "Name of the brewery"
    type: "Brewery category (e.g., micro, brewpub)"
    address: "Address of the brewery"
    website: "Website URL"
    state: "State in which the brewery is located"
    state_breweries: "Total number of breweries in the state"

1571_Temperature-Readings--IOT-Devices:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1571_Temperature-Readings--IOT-Devices.csv"
  dataset_description: "IoT-based indoor/outdoor temperature readings over time for smart monitoring."
  feature_descriptions:
    id: "Sensor reading ID"
    room_id/id: "Room ID or unique device identifier"
    noted_date: "Date and time when the reading was logged"
    temp: "Recorded temperature (°C)"
    out/in: "Location flag for indoor or outdoor"

1572_PS4-Games:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1572_PS4-Games.csv"
  dataset_description: "PS4 game metadata including completion statistics and difficulty ratings."
  feature_descriptions:
    game: "Title of the game"
    score: "Review score or rating"
    leaderbord: "Leaderboard presence or ranking"
    gamers: "Number of players"
    comp_perc: "Percentage of players who completed the game"
    rating: "ESRB or community rating"
    url: "Game URL"
    min_comp_time: "Minimum estimated time to complete"
    max_comp_time: "Maximum estimated time to complete"

1573_Brilliant-Diamonds:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1573_Brilliant-Diamonds.csv"
  dataset_description: "Diamond listing dataset including cut, clarity, and certification attributes."
  feature_descriptions:
    url: "URL to the product page"
    shape: "Diamond shape (e.g., round, princess)"
    price: "Price of the diamond"
    carat: "Weight of the diamond in carats"
    cut: "Cut quality"
    color: "Color grade"
    clarity: "Clarity grade"
    report: "Type of lab report or certification"
    type: "Type of diamond (natural/synthetic)"
    date_fetched: "Date when the listing was scraped"
1574_Comprehensive-database-of-Minerals:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1574_Comprehensive-database-of-Minerals.csv"
  dataset_description: "A comprehensive dataset of minerals including physical and chemical properties, elemental composition, and crystallography."
  feature_descriptions:
    Name: "Name of the mineral"
    Crystal_Structure: "Crystal system (e.g., cubic, monoclinic)"
    Mohs_Hardness: "Hardness on the Mohs scale"
    Diaphaneity: "Transparency (e.g., transparent, opaque)"
    Specific_Gravity: "Density relative to water"
    Optical: "Optical properties of the mineral"
    Refractive_Index: "Measure of light bending"
    Dispersion: "Separation of light into colors"
    Hydrogen: "Presence of Hydrogen"
    Helium: "Presence of Helium"
    Lithium: "Presence of Lithium"
    # ...
    Calculated_Density: "Estimated mineral density"
    Molar_Mass: "Molecular mass in grams/mole"
    Molar_Volume: "Molar volume in cm³/mol"
    Hydrated_Water: "Presence of water molecules in structure"
    count: "Number of observations"

1575_Within-project-Defect-Prediction-for-Ansible:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1575_Within-project-Defect-Prediction-for-Ansible.csv"
  dataset_description: "Features from Ansible commits and playbooks for predicting software defect proneness within the project."
  feature_descriptions:
    additions: "Number of lines added"
    deletions: "Number of lines removed"
    failure_prone: "Binary label for defect prediction"
    avg_play_size: "Average size of a play"
    code_churn_avg: "Average code churn"
    num_tasks: "Number of Ansible tasks"
    text_entropy: "Entropy of text content in code"
    num_roles: "Number of roles used"
    delta_lines_code: "Change in number of code lines"
    repository: "Repository name"
    filepath: "Path to the file being analyzed"

1576_Earthquakes-Data-NZ:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1576_Earthquakes-Data-NZ.csv"
  dataset_description: "Dataset of earthquake events recorded in New Zealand, including location and magnitude."
  feature_descriptions:
    origintime: "Timestamp of the event"
    longitude: "Longitude of the epicenter"
    _latitude: "Latitude of the epicenter"
    _depth: "Depth of the earthquake in kilometers"
    _magnitude: "Magnitude of the earthquake"

1577_Popular-Halloween-2020--Costumes-Amazon-Reviews:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1577_Popular-Halloween-2020--Costumes-Amazon-Reviews.csv"
  dataset_description: "Amazon reviews for Halloween costumes in 2020, including text content and ratings."
  feature_descriptions:
    text: "Full review text"
    date: "Review date"
    title: "Title of the review"
    rating: "Star rating"
    product_name: "Name of the costume or product"

1578_Condominium-Comparable-Rental-Income-in-NYC:
  path: "/playpen-nvme/scribble/shbhat/OpenTabs/OpenTabs-Latest/clean_labeled_dataset/1578_Condominium-Comparable-Rental-Income-in-NYC.csv"
  dataset_description: "Rental income and valuation metrics for condos across NYC boroughs and neighborhoods."
  feature_descriptions:
    Boro-Block-Lot: "Unique building identifier"
    Address: "Property address"
    Neighborhood: "Name of the neighborhood"
    Building_Classification: "NYC building classification code"
    Total_Units: "Total number of housing units"
    Year_Built: "Year the building was constructed"
    Gross_SqFt: "Total gross square footage"
    Estimated_Gross_Income: "Estimated total rental income"
    Estimated_Expense: "Estimated annual expenses"
    Net_Operating_Income: "Estimated net income after expenses"
    Full_Market_Value: "Appraised market value"
    Report_Year: "Year of the financial report"

