category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.365,0.16,0.62,0.615,0.615,0.535
attribute_recognition,200,0.525,0.17,0.695,0.645,0.705,0.725
dynamic_temporal,200,0.06,0.345,0.355,0.32,0.355,0.36
hallucination_detection,200,0.245,0.385,0.425,0.425,0.425,0.41
object_counting,200,0.185,0.28,0.425,0.44,0.415,0.435
object_localization,200,0.21,0.275,0.405,0.49,0.49,0.45
object_presence,200,0.63,0.045,0.845,0.775,0.84,0.81
scene_understanding,200,0.705,0.075,0.85,0.85,0.81,0.825
spatial_relationship,200,0.055,0.18,0.415,0.41,0.4,0.445
visual_grounding,200,0.0,1.0,0.0,0.0,0.0,0.0
Average,2000,0.298,0.292,0.503,0.497,0.505,0.5
