group: bbh_cot_zeroshot
task:
  - bbh_cot_zeroshot_boolean_expressions
  - bbh_cot_zeroshot_causal_judgement
  - bbh_cot_zeroshot_date_understanding
  - bbh_cot_zeroshot_disambiguation_qa
  - bbh_cot_zeroshot_dyck_languages
  - bbh_cot_zeroshot_formal_fallacies
  - bbh_cot_zeroshot_geometric_shapes
  - bbh_cot_zeroshot_hyperbaton
  - bbh_cot_zeroshot_logical_deduction_five_objects
  - bbh_cot_zeroshot_logical_deduction_seven_objects
  - bbh_cot_zeroshot_logical_deduction_three_objects
  - bbh_cot_zeroshot_movie_recommendation
  - bbh_cot_zeroshot_multistep_arithmetic_two
  - bbh_cot_zeroshot_navigate
  - bbh_cot_zeroshot_object_counting
  - bbh_cot_zeroshot_penguins_in_a_table
  - bbh_cot_zeroshot_reasoning_about_colored_objects
  - bbh_cot_zeroshot_ruin_names
  - bbh_cot_zeroshot_salient_translation_error_detection
  - bbh_cot_zeroshot_snarks
  - bbh_cot_zeroshot_sports_understanding
  - bbh_cot_zeroshot_temporal_sequences
  - bbh_cot_zeroshot_tracking_shuffled_objects_five_objects
  - bbh_cot_zeroshot_tracking_shuffled_objects_seven_objects
  - bbh_cot_zeroshot_tracking_shuffled_objects_three_objects
  - bbh_cot_zeroshot_web_of_lies
  - bbh_cot_zeroshot_word_sorting
aggregate_metric_list:
  - metric: exact_match
    aggregation: mean
    weight_by_size: true
    filter_list: flexible-extract
metadata:
  version: 3.0
