# Metric definitions and display names

# Pass@k metrics
pass_at_k:
  puzzle_pass_at_1: "Puzzle pass@1"
  puzzle_pass_at_8: "Puzzle pass@8"
  puzzle_pass_at_32: "Puzzle pass@32"
  math_pass_at_1: "Math pass@1"
  math_pass_at_8: "Math pass@8"
  math_pass_at_32: "Math pass@32"
  puzzle_pass32_gain: "Puzzle pass@32 gain vs SFT"
  math_pass32_gain: "Math pass@32 gain vs SFT"

# Primitive metrics
primitive:
  primitive_entropy: "Shannon entropy over primitive labels"
  primitive_bigram_entropy: "Entropy over primitive transition bigrams"
  n_unique_primitives: "Number of distinct primitive types"
  VERIFY_per_1k: "VERIFY episodes per 1k tokens"
  BACKTRACK_per_1k: "BACKTRACK episodes per 1k tokens"
  ENUMERATE_per_1k: "ENUMERATE episodes per 1k tokens"
  ERROR_DETECT_per_1k: "ERROR_DETECT episodes per 1k tokens"

# Diversity metrics
diversity:
  num_clusters: "Number of reasoning path clusters"
  cluster_entropy: "Shannon entropy of cluster sizes"
  effective_num_paths: "Inverse Simpson index (1/Σp²)"
  top_cluster_mass: "Fraction in largest cluster"
  successful_effective_num_paths: "Effective paths (successful traces only)"

# Novelty metrics
novelty:
  novel_cluster_mass_rl: "Fraction of RL traces in novel clusters"
  successful_novel_cluster_mass_rl: "Fraction of successful RL traces in novel clusters"
