category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.32,0.16,0.545,0.56,0.585,0.62
attribute_recognition,200,0.57,0.105,0.74,0.765,0.735,0.805
dynamic_temporal,200,0.115,0.31,0.395,0.4,0.395,0.405
hallucination_detection,200,0.24,0.38,0.445,0.455,0.41,0.435
object_counting,200,0.26,0.255,0.51,0.5,0.5,0.53
object_localization,200,0.67,0.03,0.82,0.88,0.885,0.825
object_presence,200,0.715,0.06,0.855,0.835,0.81,0.835
scene_understanding,200,0.7,0.05,0.88,0.79,0.83,0.855
spatial_relationship,200,0.435,0.05,0.74,0.715,0.755,0.69
visual_grounding,200,0.0,1.0,0.0,0.0,0.0,0.0
Average,2000,0.402,0.24,0.593,0.59,0.59,0.6
