# Minimal config for smoke testing — just puzzle data, no math scoring
name: smoke_test
output_dir: results/exploration_analysis/smoke_test

sft_baselines:
  v1:
    label: dsr_sft_v1
    checkpoint_id: olmo3_7b_multi_puzzle_dsr__merged_best
    puzzle_results: results/olmo3_puzzle_pass32_v2/dsr_sft

gspo_checkpoints:
  gspo_exploration_fix_step20:
    sft_base: v1
    puzzle_results: results/olmo3_puzzle_pass32_v2/gspo_step20

segmentation:
  min_tokens: 80
  max_tokens: 250
  tokenizer: allenai/OLMo-3-7B-Instruct-SFT

clustering:
  distance_threshold: 0.3
  semantic_weight: 0.7
  linkage: average
  novel_tau: 0.1

primitive_classifier: heuristic
primitive_confidence_threshold: 0.01
