category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.49,0.18,0.63,0.645,0.67,0.645
attribute_recognition,200,0.66,0.08,0.83,0.84,0.755,0.805
dynamic_temporal,200,0.155,0.37,0.405,0.38,0.395,0.395
hallucination_detection,200,0.42,0.27,0.615,0.58,0.565,0.615
object_counting,200,0.175,0.425,0.355,0.345,0.34,0.365
object_localization,200,0.17,0.14,0.535,0.515,0.5,0.505
object_presence,200,0.57,0.185,0.705,0.675,0.7,0.705
scene_understanding,200,0.895,0.01,0.955,0.95,0.94,0.94
spatial_relationship,200,0.02,0.18,0.385,0.38,0.35,0.325
visual_grounding,200,0.005,0.75,0.095,0.075,0.06,0.07
Average,2000,0.356,0.259,0.551,0.538,0.528,0.537
