category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.56,0.25,0.66,0.67,0.65,0.685
attribute_recognition,200,0.725,0.085,0.82,0.85,0.81,0.845
dynamic_temporal,200,0.15,0.195,0.475,0.46,0.51,0.44
hallucination_detection,200,0.555,0.155,0.685,0.745,0.72,0.745
object_counting,200,0.235,0.4,0.415,0.395,0.36,0.41
object_localization,200,0.375,0.05,0.725,0.68,0.69,0.725
object_presence,200,0.645,0.14,0.77,0.75,0.75,0.775
scene_understanding,200,0.885,0.02,0.95,0.92,0.925,0.935
spatial_relationship,200,0.325,0.11,0.665,0.625,0.66,0.65
visual_grounding,200,0.065,0.365,0.255,0.335,0.265,0.29
Average,2000,0.452,0.177,0.642,0.643,0.634,0.65
