dataset_name: "yes_no"
task: "tempcompass_yes_no"
test_split: test
output_type: generate_until
doc_to_visual: !function utils.tempcompass_doc_to_visual
doc_to_text: !function utils.tempcompass_doc_to_text_yes_no
doc_to_target: !function utils.tempcompass_doc_to_answer
process_results: !function utils.tempcompass_process_results_yes_no
metric_list:
  - metric: avg_accuracy
    aggregation: !function utils.tempcompass_aggregate_rating
    higher_is_better: true
  - metric: speed_accuracy
    aggregation: !function utils.tempcompass_aggregate_rating
    higher_is_better: true
  - metric: direction_accuracy
    aggregation: !function utils.tempcompass_aggregate_rating
    higher_is_better: true
  - metric: action_accuracy
    aggregation: !function utils.tempcompass_aggregate_rating
    higher_is_better: true
  - metric: order_accuracy
    aggregation: !function utils.tempcompass_aggregate_rating
    higher_is_better: true
  - metric: attribute_change_accuracy
    aggregation: !function utils.tempcompass_aggregate_rating
    higher_is_better: true
include: _default_template_yaml
