dataset_path: lmms-lab/MMMU
task: "mmmu_test"
test_split: test
output_type: generate_until
doc_to_visual: !function utils.mmmu_doc_to_visual
doc_to_text: !function utils.mmmu_doc_to_text
doc_to_target: "answer"
# The return value of process_results will be used by metrics
process_results: !function utils.mmmu_process_results

generation_kwargs: 
# Set max_new_tokens to a higher value (e.g., 512) for models like Qwen2.5-VL series
# that tend to generate longer, more detailed responses and require larger token limits
  max_new_tokens: 512

metric_list:
  - metric: submission
    aggregation: !function utils.mmmu_test_aggregate_results_for_submission
    higher_is_better: true

include: _default_template_yaml