category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.37,0.27,0.52,0.56,0.575,0.53
attribute_recognition,200,0.5,0.13,0.715,0.655,0.71,0.715
dynamic_temporal,200,0.12,0.4,0.385,0.355,0.35,0.35
hallucination_detection,200,0.405,0.32,0.535,0.52,0.54,0.535
object_counting,200,0.3,0.465,0.415,0.45,0.44,0.405
object_localization,200,0.11,0.165,0.43,0.455,0.48,0.47
object_presence,200,0.45,0.26,0.575,0.595,0.595,0.635
scene_understanding,200,0.755,0.065,0.9,0.835,0.855,0.885
spatial_relationship,200,0.01,0.19,0.32,0.355,0.35,0.325
visual_grounding,200,0.0,1.0,0.0,0.0,0.0,0.0
Average,2000,0.302,0.326,0.48,0.478,0.489,0.485
