tag:
  - math_word_problems
task: hmmt_feb_2025
dataset_path: MathArena/hmmt_feb_2025
dataset_name: default
process_docs: !function utils.process_docs
output_type: generate_until
test_split: train
doc_to_text: !function utils.doc_to_text
doc_to_target: answer
process_results: !function utils.process_results
generation_kwargs:
  until: []
  do_sample: false
  temperature: 0
  max_gen_toks: 32768
metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
  - metric: extracted_answers
    aggregation: bypass
    higher_is_better: true
metadata:
  version: 1.0


# tag:
#   - math_word_problems
# task: hmmt_feb_2025
# dataset_path: MathArena/hmmt_feb_2025
# dataset_name: default
# process_docs: !function utils.process_docs
# output_type: generate_until
# test_split: train
# doc_to_text: !function utils.doc_to_text
# doc_to_target: answer
# process_results: !function utils.process_results
# generation_kwargs:
#   until: []
#   do_sample: true
#   temperature: 1
#   max_gen_toks: 32768
#   top_p: 0.95
# repeats: 40
# filter_list:
#   - name: "all" # Will do coverage, majority, and take_first_k
#     filter:
#       - function: "take_first_k"
#         k: 40
# metric_list:
#   - metric: exact_match
#     aggregation: mean
#     higher_is_better: true
#   - metric: pass@32
#     aggregation: mean
#     higher_is_better: true
#   - metric: pass@16
#     aggregation: mean
#     higher_is_better: true
#   - metric: pass@8
#     aggregation: mean
#     higher_is_better: true
#   - metric: pass@4
#     aggregation: mean
#     higher_is_better: true
#   - metric: pass@2
#     aggregation: mean
#     higher_is_better: true
#   - metric: maj@32
#     aggregation: mean
#     higher_is_better: true
#   - metric: maj@16
#     aggregation: mean
#     higher_is_better: true
#   - metric: maj@8
#     aggregation: mean
#     higher_is_better: true
#   - metric: maj@4
#     aggregation: mean
#     higher_is_better: true
#   - metric: maj@2
#     aggregation: mean
#     higher_is_better: true
#   - metric: avg@32
#     aggregation: mean
#     higher_is_better: true
#   - metric: avg@16
#     aggregation: mean
#     higher_is_better: true
#   - metric: avg@8
#     aggregation: mean
#     higher_is_better: true
#   - metric: avg@4
#     aggregation: mean
#     higher_is_better: true
#   - metric: avg@2
#     aggregation: mean
#     higher_is_better: true
#   - metric: extracted_answers
#     aggregation: bypass
#     higher_is_better: true
#   - metric: exact_matches
#     aggregation: bypass
#     higher_is_better: true
# metadata:
#   version: 1.0