task: strategyqa_cot
dataset_path: ChilleD/StrategyQA
dataset_name: default
output_type: generate_until
test_split: test

doc_to_text: |
  Term: {{term}}
  Description: {{description}}
  Facts: {{facts}}
  Question: {{question}}
  Answer:
doc_to_target: "{{ 'Yes' if answer else 'No' }}"

generation_kwargs:
  until:
    - "Q:"
    - "Question: "
    - "Term: "
    - "Description: "
    - "</s>"
    - "<|im_end|>"
    - "\n\n"
    - "\n\nQ:"
  do_sample: true
  temperature: 0.5
  top_p: 0.95

repeats: 5
num_fewshot: 0

metric_list:
  - metric: !function utils.fs4_per_gen_list
    aggregation: mean
    higher_is_better: true

filter_list:
  - name: take_first_five
    filter:
      - function: take_first_k
        k: 5


# metric_list:
#   - metric: exact_match
#     aggregation: mean
#     higher_is_better: true
#     ignore_case: true
#     ignore_punctuation: true
#     regexes_to_ignore:
#       - "^Answer:\\s*"
#       - "^Final answer:\\s*"
#       - "\\.$"

# filter_list:
#   - name: "yesno-strict"
#     filter:
#       - function: "regex"
#         # capture Yes/No after common prefixes
#         regex_pattern: "(?i)(?:^|\\n)\\s*(?:Answer:|Final answer:)?\\s*(Yes|No)\\b"
#       - function: "take_first"
#   - name: "yesno-fallback"
#     filter:
#       - function: "regex"
#         # last Yes/No anywhere
#         group_select: -1
#         regex_pattern: "(?i)(Yes|No)"
#       - function: "take_first"