category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.53,0.195,0.655,0.705,0.665,0.69
attribute_recognition,200,0.505,0.14,0.69,0.715,0.68,0.725
dynamic_temporal,200,0.21,0.145,0.505,0.545,0.515,0.555
hallucination_detection,200,0.515,0.08,0.74,0.75,0.73,0.75
object_counting,200,0.165,0.365,0.41,0.42,0.405,0.355
object_localization,200,0.365,0.115,0.64,0.65,0.635,0.64
object_presence,200,0.34,0.23,0.53,0.555,0.53,0.585
scene_understanding,200,0.8,0.03,0.905,0.905,0.885,0.9
spatial_relationship,200,0.485,0.055,0.75,0.805,0.8,0.75
visual_grounding,200,0.01,0.58,0.205,0.115,0.17,0.135
Average,2000,0.392,0.194,0.603,0.616,0.601,0.608
