category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.42,0.145,0.65,0.635,0.69,0.615
attribute_recognition,200,0.61,0.065,0.8,0.795,0.805,0.785
dynamic_temporal,200,0.225,0.15,0.525,0.575,0.505,0.51
hallucination_detection,200,0.575,0.09,0.735,0.79,0.755,0.765
object_counting,200,0.2,0.215,0.385,0.51,0.48,0.485
object_localization,200,0.435,0.055,0.7,0.75,0.73,0.73
object_presence,200,0.645,0.04,0.805,0.81,0.83,0.815
scene_understanding,200,0.93,0.005,0.97,0.955,0.96,0.96
spatial_relationship,200,0.415,0.05,0.665,0.72,0.76,0.735
visual_grounding,200,0.015,0.645,0.155,0.125,0.115,0.135
Average,2000,0.447,0.146,0.639,0.666,0.663,0.654
