category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.44,0.18,0.64,0.61,0.62,0.67
attribute_recognition,200,0.57,0.115,0.73,0.735,0.77,0.755
dynamic_temporal,200,0.145,0.315,0.43,0.455,0.405,0.39
hallucination_detection,200,0.2,0.38,0.415,0.475,0.385,0.42
object_counting,200,0.325,0.235,0.49,0.545,0.58,0.54
object_localization,200,0.65,0.015,0.845,0.89,0.835,0.84
object_presence,200,0.78,0.025,0.885,0.9,0.89,0.88
scene_understanding,200,0.73,0.045,0.875,0.84,0.845,0.88
spatial_relationship,200,0.52,0.045,0.78,0.775,0.81,0.79
visual_grounding,200,0.0,1.0,0.0,0.0,0.0,0.0
Average,2000,0.436,0.236,0.609,0.622,0.614,0.616
