category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.37,0.28,0.56,0.56,0.525,0.535
attribute_recognition,200,0.71,0.09,0.835,0.82,0.815,0.83
dynamic_temporal,200,0.12,0.31,0.425,0.355,0.38,0.355
hallucination_detection,200,0.625,0.125,0.76,0.755,0.73,0.8
object_counting,200,0.26,0.36,0.465,0.395,0.435,0.47
object_localization,200,0.555,0.07,0.78,0.775,0.79,0.76
object_presence,200,0.63,0.15,0.77,0.75,0.745,0.72
scene_understanding,200,0.85,0.015,0.94,0.92,0.935,0.91
spatial_relationship,200,0.425,0.115,0.67,0.69,0.705,0.665
visual_grounding,200,0.005,0.875,0.03,0.035,0.055,0.06
Average,2000,0.455,0.239,0.624,0.605,0.612,0.61
