category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.495,0.245,0.62,0.625,0.615,0.62
attribute_recognition,200,0.535,0.115,0.715,0.745,0.745,0.725
dynamic_temporal,200,0.12,0.33,0.395,0.425,0.365,0.385
hallucination_detection,200,0.45,0.265,0.625,0.64,0.615,0.6
object_counting,200,0.205,0.48,0.35,0.335,0.335,0.36
object_localization,200,0.385,0.055,0.755,0.7,0.71,0.705
object_presence,200,0.62,0.095,0.775,0.76,0.78,0.79
scene_understanding,200,0.76,0.045,0.895,0.86,0.88,0.855
spatial_relationship,200,0.09,0.16,0.485,0.455,0.485,0.405
visual_grounding,200,0.61,0.03,0.85,0.77,0.85,0.845
Average,2000,0.427,0.182,0.646,0.632,0.638,0.629
