category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.44,0.315,0.555,0.55,0.55,0.57
attribute_recognition,200,0.555,0.135,0.75,0.685,0.71,0.735
dynamic_temporal,200,0.165,0.3,0.46,0.44,0.425,0.445
hallucination_detection,200,0.245,0.52,0.36,0.365,0.37,0.36
object_counting,200,0.28,0.295,0.485,0.47,0.47,0.525
object_localization,200,0.625,0.045,0.805,0.805,0.835,0.815
object_presence,200,0.765,0.05,0.875,0.885,0.88,0.87
scene_understanding,200,0.69,0.065,0.87,0.805,0.82,0.81
spatial_relationship,200,0.435,0.085,0.69,0.705,0.755,0.705
visual_grounding,200,0.0,1.0,0.0,0.0,0.0,0.0
Average,2000,0.42,0.281,0.585,0.571,0.581,0.584
