category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.655,0.295,0.675,0.68,0.66,0.685
attribute_recognition,200,0.575,0.105,0.73,0.79,0.735,0.745
dynamic_temporal,200,0.145,0.4,0.375,0.385,0.365,0.365
hallucination_detection,200,0.34,0.395,0.52,0.465,0.48,0.46
object_counting,200,0.105,0.42,0.325,0.31,0.285,0.335
object_localization,200,0.435,0.06,0.675,0.69,0.71,0.74
object_presence,200,0.52,0.15,0.69,0.69,0.68,0.745
scene_understanding,200,0.72,0.075,0.84,0.85,0.815,0.81
spatial_relationship,200,0.08,0.085,0.555,0.465,0.455,0.455
visual_grounding,200,0.16,0.27,0.43,0.4,0.4,0.365
Average,2000,0.374,0.225,0.581,0.572,0.559,0.57
