category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.49,0.155,0.665,0.695,0.665,0.68
attribute_recognition,200,0.59,0.08,0.76,0.76,0.77,0.79
dynamic_temporal,200,0.145,0.365,0.39,0.385,0.38,0.39
hallucination_detection,200,0.23,0.39,0.45,0.375,0.405,0.435
object_counting,200,0.125,0.355,0.365,0.35,0.385,0.36
object_localization,200,0.44,0.09,0.675,0.675,0.72,0.685
object_presence,200,0.89,0.015,0.96,0.945,0.955,0.935
scene_understanding,200,0.86,0.02,0.945,0.92,0.91,0.92
spatial_relationship,200,0.495,0.05,0.755,0.765,0.755,0.7
visual_grounding,200,0.02,0.68,0.13,0.11,0.12,0.12
Average,2000,0.428,0.22,0.609,0.598,0.606,0.602
