# musique_support_only.yaml
task: musique_support_only
dataset_path: dgslibisey/MuSiQue
output_type: generate_until
test_split: validation

# Keep only supporting paragraph(s) via Python preprocessor
process_docs: !function utils.musique_process_docs

doc_to_text: |
  Context:
  {{support_text}}

  Question: {{question}}
  Think step by step. Finally output the final answer on its own line like "The answer is: <answer>".
doc_to_target: "{{ answer | default('') | string }}"

# Use your four-stage verifier (on extracted answer only)
# process_results: !function utils.process_results_freeform

# metric_list:
#   - metric: acc_fs4
#     aggregation: mean
#     higher_is_better: true

metric_list:
  - metric: !function utils.fs4_per_gen_list
    aggregation: mean
    higher_is_better: true

generation_kwargs:
  until:
    - "\n\nQ:"
    - "</s>"
    - "<|im_end|>"
    - "Question:"
    - "\nThe answer is:"
    - "\nAnswer:"
  do_sample: true
  temperature: 0.5
  top_p: 0.95

# Pass@5 wiring
repeats: 5
num_fewshot: 0

filter_list:
  - name: take_first_five
    filter:
      - function: take_first_k
        k: 5
