category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.6,0.21,0.74,0.675,0.72,0.675
attribute_recognition,200,0.7,0.075,0.84,0.85,0.845,0.82
dynamic_temporal,200,0.31,0.07,0.685,0.6,0.62,0.605
hallucination_detection,200,0.485,0.1,0.68,0.715,0.7,0.705
object_counting,200,0.185,0.245,0.46,0.455,0.395,0.5
object_localization,200,0.57,0.05,0.81,0.8,0.815,0.82
object_presence,200,0.735,0.055,0.845,0.855,0.84,0.87
scene_understanding,200,0.875,0.025,0.94,0.925,0.93,0.935
spatial_relationship,200,0.665,0.02,0.87,0.835,0.885,0.85
visual_grounding,200,0.175,0.265,0.41,0.345,0.45,0.445
Average,2000,0.53,0.112,0.728,0.706,0.72,0.722
