boolean_expressions: 20
causal_judgement: 20
date_understanding: 20
disambiguation_qa: 20
dyck_languages: 20
formal_fallacies: 20
geometric_shapes: 10
hyperbaton: 20
logical_deduction_five_objects: 20
logical_deduction_seven_objects: 20
logical_deduction_three_objects: 20
movie_recommendation: 20
multistep_arithmetic_two: 20
navigate: 20
object_counting: 20
penguins_in_a_table: 20
reasoning_about_colored_objects: 20
ruin_names: 20
salient_translation_error_detection: 20
snarks: 20
sports_understanding: 20
temporal_sequences: 20
tracking_shuffled_objects_five_objects: 20
tracking_shuffled_objects_seven_objects: 20
tracking_shuffled_objects_three_objects: 20
web_of_lies: 20
word_sorting: 20