category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.33,0.155,0.6,0.625,0.6,0.585
attribute_recognition,200,0.61,0.105,0.795,0.75,0.775,0.775
dynamic_temporal,200,0.085,0.3,0.35,0.36,0.33,0.36
hallucination_detection,200,0.25,0.515,0.38,0.355,0.355,0.405
object_counting,200,0.165,0.365,0.365,0.35,0.37,0.39
object_localization,200,0.13,0.27,0.4,0.415,0.405,0.42
object_presence,200,0.335,0.25,0.55,0.51,0.575,0.55
scene_understanding,200,0.87,0.025,0.945,0.925,0.93,0.925
spatial_relationship,200,0.085,0.16,0.41,0.48,0.46,0.5
visual_grounding,200,0.03,0.595,0.14,0.17,0.175,0.16
Average,2000,0.289,0.274,0.493,0.494,0.497,0.507
