category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.395,0.27,0.555,0.55,0.565,0.545
attribute_recognition,200,0.54,0.11,0.7,0.75,0.735,0.745
dynamic_temporal,200,0.135,0.33,0.415,0.385,0.405,0.385
hallucination_detection,200,0.13,0.59,0.275,0.28,0.265,0.28
object_counting,200,0.32,0.29,0.53,0.5,0.48,0.505
object_localization,200,0.73,0.03,0.86,0.87,0.9,0.87
object_presence,200,0.72,0.045,0.83,0.84,0.845,0.84
scene_understanding,200,0.7,0.055,0.865,0.8,0.82,0.85
spatial_relationship,200,0.495,0.05,0.765,0.73,0.795,0.72
visual_grounding,200,0.0,1.0,0.0,0.0,0.0,0.0
Average,2000,0.416,0.277,0.579,0.57,0.581,0.574
