{"splits": ["train", "image_test_chartqa", "image_test_okvqa", "image_test_scienceqa", "image_test_seed_bench", "image_test_text_recognition", "image_test_textvqa", "image_test_vizwiz_vqa", "image_test_vqa_rad", "image_test_caltech101", "image_test_eurosat", "image_test_flowers102", "image_test_pets", "image_test_svhn", "image_test_camelyon", "text_test_arc_challenge", "text_test_arc_easy", "text_test_boolq", "text_test_hellaswag", "text_test_openbookqa", "text_test_piqa", "text_test_social_i_qa", "text_test_winogrande", "glue_test_sst2", "glue_test_qnli", "glue_test_qqp", "glue_test_cola", "glue_test_mrpc", "glue_test_stsb", "video_test_action_sequence", "video_test_action_prediction", "video_test_action_antonym", "video_test_fine_grained_action", "video_test_unexpected_action", "video_test_object_existence", "video_test_object_interaction", "video_test_object_shuffle", "video_test_moving_direction", "video_test_action_localization", "video_test_scene_transition", "video_test_action_count", "video_test_moving_count", "video_test_moving_attribute", "video_test_state_change", "video_test_character_order", "video_test_egocentric_navigation", "video_test_episodic_reasoning", "video_test_counterfactual_inference"]}