category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.255,0.115,0.575,0.59,0.575,0.605
attribute_recognition,200,0.435,0.135,0.66,0.65,0.7,0.705
dynamic_temporal,200,0.065,0.34,0.375,0.365,0.325,0.315
hallucination_detection,200,0.23,0.26,0.505,0.5,0.475,0.53
object_counting,200,0.185,0.285,0.41,0.45,0.46,0.455
object_localization,200,0.245,0.145,0.545,0.555,0.51,0.52
object_presence,200,0.545,0.085,0.765,0.765,0.765,0.775
scene_understanding,200,0.72,0.085,0.84,0.82,0.8,0.82
spatial_relationship,200,0.225,0.08,0.58,0.55,0.625,0.61
visual_grounding,200,0.0,1.0,0.0,0.0,0.0,0.0
Average,2000,0.291,0.253,0.526,0.524,0.524,0.534
