category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.41,0.17,0.655,0.62,0.615,0.625
attribute_recognition,200,0.555,0.07,0.765,0.785,0.74,0.755
dynamic_temporal,200,0.215,0.28,0.49,0.435,0.455,0.46
hallucination_detection,200,0.435,0.185,0.66,0.66,0.6,0.595
object_counting,200,0.325,0.23,0.57,0.525,0.585,0.5
object_localization,200,0.79,0.03,0.92,0.91,0.89,0.885
object_presence,200,0.695,0.055,0.865,0.83,0.855,0.86
scene_understanding,200,0.88,0.035,0.95,0.9,0.945,0.925
spatial_relationship,200,0.64,0.025,0.83,0.87,0.855,0.845
visual_grounding,200,0.0,1.0,0.0,0.0,0.0,0.0
Average,2000,0.494,0.208,0.67,0.654,0.654,0.645
