task: "mmbench_ru_dev"
test_split: dev
include: _default_template_mmbench_ru_yaml
metric_list:
  - metric: gpt_eval_score
    aggregation: !function ru_utils.mmbench_aggregate_dev_results_eval
    higher_is_better: true
  - metric: submission
    aggregation: !function ru_utils.mmbench_aggregate_dev_results_submission
    higher_is_better: true