boolean_expressions: 50
causal_judgement: 50
date_understanding: 50
disambiguation_qa: 50
dyck_languages: 50
formal_fallacies: 50
geometric_shapes: 10
hyperbaton: 50
logical_deduction_five_objects: 50
logical_deduction_seven_objects: 50
logical_deduction_three_objects: 50
movie_recommendation: 50
multistep_arithmetic_two: 50
navigate: 50
object_counting: 50
penguins_in_a_table: 50
reasoning_about_colored_objects: 50
ruin_names: 50
salient_translation_error_detection: 20
snarks: 50
sports_understanding: 50
temporal_sequences: 50
tracking_shuffled_objects_five_objects: 50
tracking_shuffled_objects_seven_objects: 50
tracking_shuffled_objects_three_objects: 50
web_of_lies: 50
word_sorting: 50