{
  "fahamu/ioi": {
    "tldr": "Indirect-Object Identification synthetic sentences \u2014 small, clean, analyzable.",
    "folder_name": "fahamu__ioi"
  },
  "mib-bench/ioi": {
    "tldr": "Mechanistic Interpretability Benchmark version of IOI with counterfactuals.",
    "folder_name": "mib-bench__ioi"
  },
  "mib-bench/arithmetic_addition": {
    "tldr": "Synthetic addition problems structured for circuit-analysis.",
    "folder_name": "mib-bench__arithmetic_addition"
  },
  "mib-bench/arithmetic_subtraction": {
    "tldr": "Synthetic subtraction problems designed similarly to the addition set.",
    "folder_name": "mib-bench__arithmetic_subtraction"
  },
  "mib-bench/copycolors_mcqa": {
    "tldr": "Multi-choice task where model must copy symbolic features/colors\u2014good for attention & binding probes.",
    "folder_name": "mib-bench__copycolors_mcqa"
  },
  "mib-bench/ravel": {
    "tldr": "Structured reasoning/composition benchmark with counterfactuals aimed at mechanistic interpretability.",
    "folder_name": "mib-bench__ravel"
  },
  "roneneldan/TinyStories": {
    "tldr": "Large corpus of very short stories \u2014 useful for narrative modeling and token-level circuit probing.",
    "folder_name": "roneneldan__TinyStories"
  },
  "allenai/ZebraLogicBench": {
    "tldr": "Logic-grid (zebra) puzzles with structured solutions\u2014ideal for analyzing reasoning circuits.",
    "folder_name": "allenai__ZebraLogicBench"
  },
  "WildEval/ZebraLogic": {
    "tldr": "Alternative zebra-logic puzzle set\u2014the text version permits transformer-based probing.",
    "folder_name": "WildEval__ZebraLogic"
  },
  "dtruong46me/mathqa-python": {
    "tldr": "Math word problems from MathQA translated into Python code \u2014 enabling alignment of activations with code-structure.",
    "folder_name": "dtruong46me__mathqa-python"
  },
  "allenai/math_qa": {
    "tldr": "Large-scale math word-problem dataset with annotated operation programs, good for probing numeric reasoning circuits.",
    "folder_name": "allenai__math_qa"
  },
  "Punchwe/COGS": {
    "tldr": "Semantic parsing dataset focusing on compositional generalization\u2014useful for syntactic/semantic role circuit analysis.",
    "folder_name": "Punchwe__COGS"
  },
  "scan": {
    "tldr": "Compositional navigation commands \u2192 action sequences; classic dataset for testing generalization and mechanistic structure.",
    "folder_name": "scan"
  },
  "openai/openai_humaneval": {
    "tldr": "Python code-generation tasks with unit tests\u2014allows studying how GPT-2 learns code-token circuits.",
    "folder_name": "openai__openai_humaneval"
  },
  "google-research-datasets/mbpp": {
    "tldr": "Mostly Basic Python Problems (~1k tasks) with instructions and code\u2014good for small-scale mechanical probing.",
    "folder_name": "google-research-datasets__mbpp"
  },
  "domenicrosati/TruthfulQA": {
    "tldr": "Benchmark measuring whether models produce truthful answers\u2014good for probing misunderstanding/false-belief circuits.",
    "folder_name": "domenicrosati__TruthfulQA"
  },
  "tau/commonsense_qa": {
    "tldr": "Commonsense multiple-choice QA dataset requiring world knowledge and relational reasoning.",
    "folder_name": "tau__commonsense_qa"
  },
  "wic": {
    "tldr": "Word-in-Context dataset (word\u2010sense disambiguation) \u2014 useful for analyzing how GPT-2 represents lexical semantics.",
    "folder_name": "wic"
  },
  "drop": {
    "tldr": "Reading comprehension dataset requiring numeric reasoning and span extraction\u2014a mix of text and arithmetic circuits.",
    "folder_name": "drop"
  },
  "wikitext": {
    "tldr": "Clean language-modeling corpus (WikiText-2/WikiText-103) \u2014 good baseline for LM behavior and lower-level circuit probing.",
    "folder_name": "wikitext"
  }
}