name: exploration_analysis
output_dir: results/exploration_analysis

# SFT baselines — each GSPO run pairs with its correct SFT base for gain computation
sft_baselines:
  v1:
    label: dsr_sft_v1
    checkpoint_id: olmo3_7b_multi_puzzle_dsr__merged_best
    puzzle_results: results/olmo3_puzzle_pass32_v2/dsr_sft
    math_results: results/pass_at_k_s3/olmo3_sft  # OlymMATH hard pass64
  v2:
    label: dsr_sft_v2
    checkpoint_id: olmo3_7b_multi_puzzle_dsr_v2__merged_ep5_fp32
    puzzle_results: null   # NEEDS EVAL — puzzle pass@32 (5 tasks)
    math_results: results/olym_math_all/full_v2_puzzle  # OlymMATH hard avg8 only

# GSPO checkpoints — each specifies which SFT base it derives from
gspo_checkpoints:
  gspo_v3_step10:
    sft_base: v1
    math_results: results/pass_at_k_s3/olmo3_gspo
    puzzle_results: null  # needs eval
  gspo_exploration_fix_step20:
    sft_base: v1
    puzzle_results: results/olmo3_puzzle_pass32_v2/gspo_step20
    math_results: results/olmo3_exploration_fix/step20
  gspo_v2_sft_step20:
    sft_base: v2
    puzzle_results: null  # needs eval
    math_results: results/gspo_v2_sft_s20_math_eval_v2
  novelty_production_step15:
    sft_base: v2
    puzzle_results: null  # needs eval
    math_results: results/novelty_production_step15
  novelty_production_step25:
    sft_base: v2
    puzzle_results: null  # needs eval
    math_results: results/novelty_production_step25
  novelty_mini_step20:
    sft_base: v2
    puzzle_results: null  # needs eval
    math_results: results/novelty_mini_gspo_step20

# SFT dynamics — control trajectory (SFT-only, no GSPO, to show flat math ceiling)
sft_dynamics:
  epoch2: {math_results: results/olmo3_sft_dynamics/epoch2}
  epoch4: {math_results: results/olmo3_sft_dynamics/epoch4}
  epoch6: {math_results: results/olmo3_sft_dynamics/epoch6}
  epoch8: {math_results: results/olmo3_sft_dynamics/epoch8}

# Pre-SFT base model (for context)
pre_sft_base:
  math_results: results/pass_at_k_s3/olmo3_base

# Primary math outcome variable
math_outcome_task: olymp_math_hard
k_values: [1, 8, 32]

# Segmentation settings
segmentation:
  min_tokens: 80
  max_tokens: 250
  tokenizer: allenai/OLMo-3-7B-Instruct-SFT

# Clustering settings
clustering:
  distance_threshold: 0.3
  semantic_weight: 0.7
  linkage: average
  novel_tau: 0.1

# Primitive classification
primitive_classifier: heuristic
primitive_confidence_threshold: 0.01
