category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.51,0.195,0.645,0.675,0.645,0.71
attribute_recognition,200,0.525,0.125,0.695,0.71,0.72,0.73
dynamic_temporal,200,0.295,0.09,0.6,0.635,0.585,0.62
hallucination_detection,200,0.635,0.05,0.815,0.855,0.835,0.805
object_counting,200,0.135,0.315,0.365,0.445,0.41,0.355
object_localization,200,0.335,0.095,0.62,0.645,0.61,0.635
object_presence,200,0.43,0.15,0.65,0.68,0.615,0.655
scene_understanding,200,0.775,0.02,0.925,0.895,0.895,0.88
spatial_relationship,200,0.41,0.035,0.685,0.695,0.735,0.745
visual_grounding,200,0.03,0.57,0.165,0.21,0.165,0.225
Average,2000,0.408,0.164,0.616,0.644,0.622,0.636
