category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.455,0.1,0.665,0.73,0.685,0.685
attribute_recognition,200,0.755,0.05,0.835,0.865,0.845,0.88
dynamic_temporal,200,0.225,0.18,0.55,0.515,0.51,0.525
hallucination_detection,200,0.59,0.05,0.765,0.815,0.775,0.78
object_counting,200,0.145,0.25,0.425,0.435,0.415,0.455
object_localization,200,0.585,0.03,0.805,0.78,0.78,0.815
object_presence,200,0.77,0.065,0.87,0.86,0.875,0.86
scene_understanding,200,0.92,0.015,0.965,0.96,0.955,0.945
spatial_relationship,200,0.82,0.025,0.92,0.915,0.935,0.915
visual_grounding,200,0.33,0.07,0.63,0.645,0.645,0.68
Average,2000,0.56,0.083,0.743,0.752,0.742,0.754
