category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.375,0.235,0.59,0.585,0.595,0.585
attribute_recognition,200,0.63,0.065,0.81,0.825,0.79,0.83
dynamic_temporal,200,0.205,0.295,0.46,0.435,0.475,0.465
hallucination_detection,200,0.655,0.11,0.79,0.805,0.76,0.8
object_counting,200,0.115,0.405,0.335,0.34,0.315,0.295
object_localization,200,0.6,0.02,0.81,0.845,0.795,0.845
object_presence,200,0.435,0.21,0.605,0.62,0.615,0.605
scene_understanding,200,0.67,0.055,0.875,0.835,0.82,0.82
spatial_relationship,200,0.65,0.025,0.83,0.86,0.865,0.85
visual_grounding,200,0.405,0.23,0.565,0.585,0.59,0.615
Average,2000,0.474,0.165,0.667,0.674,0.662,0.671
