category,total_questions,hit_all,miss_all,rot0,rot90,rot180,rot270
attribute_comparison,200,0.545,0.235,0.675,0.695,0.67,0.675
attribute_recognition,200,0.49,0.14,0.69,0.7,0.69,0.735
dynamic_temporal,200,0.2,0.155,0.585,0.5,0.485,0.505
hallucination_detection,200,0.6,0.05,0.8,0.82,0.755,0.81
object_counting,200,0.1,0.265,0.395,0.345,0.44,0.415
object_localization,200,0.42,0.08,0.735,0.69,0.665,0.68
object_presence,200,0.33,0.26,0.55,0.53,0.525,0.53
scene_understanding,200,0.745,0.06,0.9,0.845,0.855,0.86
spatial_relationship,200,0.48,0.05,0.775,0.76,0.775,0.725
visual_grounding,200,0.02,0.615,0.12,0.18,0.145,0.15
Average,2000,0.393,0.191,0.622,0.606,0.6,0.608
