category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.42,0.205,0.665,0.585,0.64,0.63
attribute_recognition,200,0.48,0.11,0.69,0.66,0.71,0.71
dynamic_temporal,200,0.145,0.235,0.425,0.455,0.41,0.44
hallucination_detection,200,0.745,0.025,0.88,0.89,0.84,0.89
object_counting,200,0.175,0.37,0.345,0.34,0.39,0.39
object_localization,200,0.075,0.115,0.395,0.435,0.5,0.54
object_presence,200,0.465,0.215,0.625,0.64,0.625,0.65
scene_understanding,200,0.87,0.025,0.95,0.935,0.925,0.925
spatial_relationship,200,0.025,0.145,0.37,0.375,0.43,0.395
visual_grounding,200,0.015,0.82,0.075,0.07,0.08,0.08
Average,2000,0.3415,0.2265,0.542,0.5385,0.555,0.565
