name: v90_sft_aime25
output_dir: results/exploration_analysis/v90_sft_aime25
primitive_batch_size: 256
primitive_classifier: learned
primitive_confidence_threshold: 0.01
primitive_fp16: true
primitive_multi_gpu: false
primitive_n_models: 1
segmentation:
  max_tokens: 250
  min_tokens: 80
  tokenizer: allenai/OLMo-3-7B-Instruct-SFT
sft_baselines:
  v2:
    label: dsr_sft_v2
    math_results: results/sft_baseline_math_eval_diverse/checkpoints__olmo3_7b_multi_puzzle_dsr_v2__merged_ep5_fp32
task_filter:
- aime25
