category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.61,0.235,0.68,0.695,0.71,0.685
attribute_recognition,200,0.48,0.13,0.665,0.67,0.65,0.69
dynamic_temporal,200,0.245,0.06,0.64,0.625,0.61,0.6
hallucination_detection,200,0.745,0.04,0.865,0.885,0.895,0.88
object_counting,200,0.115,0.385,0.365,0.355,0.36,0.335
object_localization,200,0.34,0.145,0.575,0.59,0.605,0.56
object_presence,200,0.325,0.25,0.525,0.505,0.555,0.565
scene_understanding,200,0.78,0.02,0.9,0.905,0.875,0.89
spatial_relationship,200,0.475,0.075,0.72,0.715,0.755,0.7
visual_grounding,200,0.035,0.725,0.175,0.105,0.14,0.125
Average,2000,0.415,0.206,0.611,0.605,0.615,0.603
