category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.45,0.18,0.64,0.665,0.62,0.61
attribute_recognition,200,0.29,0.11,0.76,0.405,0.75,0.75
dynamic_temporal,200,0.165,0.35,0.44,0.405,0.435,0.38
hallucination_detection,200,0.255,0.295,0.485,0.45,0.48,0.495
object_counting,200,0.13,0.28,0.395,0.44,0.37,0.45
object_localization,200,0.28,0.095,0.635,0.545,0.58,0.61
object_presence,200,0.805,0.04,0.9,0.9,0.895,0.88
scene_understanding,200,0.78,0.03,0.935,0.885,0.86,0.905
spatial_relationship,200,0.135,0.105,0.61,0.57,0.57,0.45
visual_grounding,200,0.015,0.77,0.105,0.085,0.115,0.075
Average,2000,0.33,0.225,0.591,0.535,0.568,0.56
