category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.635,0.195,0.735,0.715,0.745,0.715
attribute_recognition,200,0.8,0.055,0.86,0.85,0.88,0.905
dynamic_temporal,200,0.19,0.27,0.49,0.445,0.425,0.45
hallucination_detection,200,0.52,0.19,0.645,0.645,0.65,0.675
object_counting,200,0.34,0.365,0.475,0.48,0.47,0.475
object_localization,200,0.795,0.015,0.915,0.925,0.895,0.91
object_presence,200,0.645,0.135,0.765,0.775,0.765,0.775
scene_understanding,200,0.95,0.01,0.985,0.965,0.97,0.985
spatial_relationship,200,0.72,0.015,0.84,0.885,0.905,0.87
visual_grounding,200,0.0,1.0,0.0,0.0,0.0,0.0
Average,2000,0.56,0.225,0.671,0.669,0.67,0.676
