# OMEGA compositional reasoning benchmark — greedy single sample (pilot)
task: omega_r1
dataset_path: json
dataset_kwargs:
  data_files: "data/omega/omega_all.jsonl"
output_type: generate_until
test_split: train
doc_to_text: "Problem: {{question}}\n\nPlease reason step by step, and put your final answer within \\boxed{}."
doc_to_target: "{{answer}}"
process_results: !function utils.process_results
generation_kwargs:
  until:
    - "Problem:"
    - "</s>"
    - "<|im_end|>"
    - "<|endoftext|>"
    - "<|end▁of▁sentence|>"
  do_sample: false
  temperature: 0.0
  max_gen_toks: 16000
metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
num_fewshot: 0
metadata:
  version: 1.0
  description: "OMEGA compositional reasoning — greedy pilot (all categories, all difficulties)"
