{
    "dataset_name": "yeast",
    "source": "https://archive.ics.uci.edu/dataset/110/yeast",
    "description": "This dataset contains biochemical features extracted from yeast proteins. The task is to detect anomalous localizations using a binary label based on subcellular locations.",
    "columns": [
      {
        "name": "mcg",
        "dtype": "float",
        "logical_type": "numerical",
        "description": "Score from McGeoch's method for signal sequence recognition.",
        "missing_value_token": null
      },
      {
        "name": "gvh",
        "dtype": "float",
        "logical_type": "numerical",
        "description": "Score from von Heijne's method for signal sequence recognition.",
        "missing_value_token": null
      },
      {
        "name": "alm",
        "dtype": "float",
        "logical_type": "numerical",
        "description": "Score from ALOM program for transmembrane domain prediction.",
        "missing_value_token": null
      },
      {
        "name": "mit",
        "dtype": "float",
        "logical_type": "numerical",
        "description": "Discriminant score on the first 20 amino acids for mitochondrial prediction.",
        "missing_value_token": null
      },
      {
        "name": "erl",
        "dtype": "float",
        "logical_type": "binary",
        "description": "Presence (1.0) or absence (0.0) of the HDEL substring signal for ER retention.",
        "missing_value_token": null
      },
      {
        "name": "pox",
        "dtype": "float",
        "logical_type": "numerical",
        "description": "Score for peroxisomal targeting signal at the C-terminus.",
        "missing_value_token": null
      },
      {
        "name": "vac",
        "dtype": "float",
        "logical_type": "numerical",
        "description": "Discriminant score based on amino acid content for vacuolar and extracellular proteins.",
        "missing_value_token": null
      },
      {
        "name": "nuc",
        "dtype": "float",
        "logical_type": "numerical",
        "description": "Discriminant score for nuclear localization signals.",
        "missing_value_token": null
      },
      {
        "name": "label",
        "dtype": "int",
        "logical_type": "binary",
        "description": "Binary label: 1 if the protein is localized to the endoplasmic reticulum membrane (ME1 or ME2), 0 otherwise.",
        "missing_value_token": null
      }
    ],
    "label_column": "label",
    "label_description": "Label 0 represents normal cases: proteins with common subcellular localizations. Label 1 represents anomalies: proteins localized to the endoplasmic reticulum membrane (ME1 or ME2).",
    "normal_description": "Proteins with common subcellular localizations (non-ME1/ME2)."
  }