category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.66,0.305,0.69,0.665,0.68,0.68
attribute_recognition,200,0.66,0.115,0.775,0.795,0.79,0.795
dynamic_temporal,200,0.145,0.36,0.385,0.4,0.375,0.4
hallucination_detection,200,0.39,0.335,0.565,0.53,0.515,0.51
object_counting,200,0.085,0.435,0.27,0.29,0.29,0.26
object_localization,200,0.47,0.05,0.76,0.74,0.715,0.75
object_presence,200,0.605,0.21,0.695,0.7,0.705,0.725
scene_understanding,200,0.725,0.075,0.85,0.84,0.82,0.805
spatial_relationship,200,0.16,0.12,0.565,0.525,0.55,0.535
visual_grounding,200,0.285,0.13,0.615,0.55,0.645,0.55
Average,2000,0.419,0.213,0.617,0.604,0.608,0.601
